diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 2c68846..0c0b5b0 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -808,7 +808,8 @@ class PDFPageInterpreter(object): ## class PDFTextExtractionNotAllowed(PDFInterpreterError): pass -def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password=''): +def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='', + check_extractable=True): # Create a PDF parser object associated with the file object. parser = PDFParser(fp) # Create a PDF document object that stores the document structure. @@ -820,7 +821,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password=''): # (If no password is set, give an empty string.) doc.initialize(password) # Check if the document allows text extraction. If not, abort. - if not doc.is_extractable: + if check_extractable and not doc.is_extractable: raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp) # Create a PDF interpreter object. interpreter = PDFPageInterpreter(rsrcmgr, device) diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index 5960f85..1704ffa 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -87,7 +87,8 @@ def main(argv): return usage() for fname in args: fp = file(fname, 'rb') - process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=password) + process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=password, + check_extractable=True) fp.close() device.close() outfp.close()