check_extractable paramater added

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@276 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-11-23 10:53:28 +00:00
parent 9f78915ea6
commit 2bf9c23801
2 changed files with 5 additions and 3 deletions

View File

@ -808,7 +808,8 @@ class PDFPageInterpreter(object):
## ##
class PDFTextExtractionNotAllowed(PDFInterpreterError): pass class PDFTextExtractionNotAllowed(PDFInterpreterError): pass
def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password=''): def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
check_extractable=True):
# Create a PDF parser object associated with the file object. # Create a PDF parser object associated with the file object.
parser = PDFParser(fp) parser = PDFParser(fp)
# Create a PDF document object that stores the document structure. # Create a PDF document object that stores the document structure.
@ -820,7 +821,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password=''):
# (If no password is set, give an empty string.) # (If no password is set, give an empty string.)
doc.initialize(password) doc.initialize(password)
# Check if the document allows text extraction. If not, abort. # Check if the document allows text extraction. If not, abort.
if not doc.is_extractable: if check_extractable and not doc.is_extractable:
raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp) raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
# Create a PDF interpreter object. # Create a PDF interpreter object.
interpreter = PDFPageInterpreter(rsrcmgr, device) interpreter = PDFPageInterpreter(rsrcmgr, device)

View File

@ -87,7 +87,8 @@ def main(argv):
return usage() return usage()
for fname in args: for fname in args:
fp = file(fname, 'rb') fp = file(fname, 'rb')
process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=password) process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=password,
check_extractable=True)
fp.close() fp.close()
device.close() device.close()
outfp.close() outfp.close()