From ecaf68efed830ee77e09b61d487dbed2b7508174 Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Sun, 27 Apr 2008 04:34:41 +0000 Subject: [PATCH] add some restriction git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@22 1aa58f4a-7d42-0410-adbc-911cccaed67c --- lzw.py | 1 - pdf2txt.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lzw.py b/lzw.py index 8081a48..0dde44d 100755 --- a/lzw.py +++ b/lzw.py @@ -17,7 +17,6 @@ class LZWDecoder: return def readbits(self, bits): - bits0 = bits v = 0 while 1: # the number of remaining bits we can get from the current buffer. diff --git a/pdf2txt.py b/pdf2txt.py index b5aa353..217547d 100755 --- a/pdf2txt.py +++ b/pdf2txt.py @@ -163,11 +163,14 @@ class TextConverter(PDFDevice): # pdf2txt +class TextExtractionNotAllowed(RuntimeError): pass def pdf2txt(outfp, rsrc, fname, pages, codec, debug=0): device = TextConverter(rsrc, debug=debug) doc = PDFDocument(debug=debug) fp = file(fname) parser = PDFParser(doc, fp, debug=debug) + if not doc.is_extractable: + raise TextExtractionNotAllowed('text extraction is not allowed: %r' % fname) interpreter = PDFPageInterpreter(rsrc, device, debug=debug) outfp.write('\n') for (i,page) in enumerate(doc.get_pages(debug=debug)):