check_extractable paramater added
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@276 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
9f78915ea6
commit
2bf9c23801
|
@ -808,7 +808,8 @@ class PDFPageInterpreter(object):
|
||||||
##
|
##
|
||||||
class PDFTextExtractionNotAllowed(PDFInterpreterError): pass
|
class PDFTextExtractionNotAllowed(PDFInterpreterError): pass
|
||||||
|
|
||||||
def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password=''):
|
def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
|
||||||
|
check_extractable=True):
|
||||||
# Create a PDF parser object associated with the file object.
|
# Create a PDF parser object associated with the file object.
|
||||||
parser = PDFParser(fp)
|
parser = PDFParser(fp)
|
||||||
# Create a PDF document object that stores the document structure.
|
# Create a PDF document object that stores the document structure.
|
||||||
|
@ -820,7 +821,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password=''):
|
||||||
# (If no password is set, give an empty string.)
|
# (If no password is set, give an empty string.)
|
||||||
doc.initialize(password)
|
doc.initialize(password)
|
||||||
# Check if the document allows text extraction. If not, abort.
|
# Check if the document allows text extraction. If not, abort.
|
||||||
if not doc.is_extractable:
|
if check_extractable and not doc.is_extractable:
|
||||||
raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
|
raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
|
||||||
# Create a PDF interpreter object.
|
# Create a PDF interpreter object.
|
||||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||||
|
|
|
@ -87,7 +87,8 @@ def main(argv):
|
||||||
return usage()
|
return usage()
|
||||||
for fname in args:
|
for fname in args:
|
||||||
fp = file(fname, 'rb')
|
fp = file(fname, 'rb')
|
||||||
process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=password)
|
process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=password,
|
||||||
|
check_extractable=True)
|
||||||
fp.close()
|
fp.close()
|
||||||
device.close()
|
device.close()
|
||||||
outfp.close()
|
outfp.close()
|
||||||
|
|
Loading…
Reference in New Issue