API Change: the PDFDocument cstr now takes PDFParser. set_parser() is removed.
parent
557c2c72e6
commit
c926874d20
|
@ -283,8 +283,7 @@ class PDFDocument(object):
|
|||
dynamically import the data as processing goes.
|
||||
|
||||
Typical usage:
|
||||
doc = PDFDocument()
|
||||
doc.set_parser(parser)
|
||||
doc = PDFDocument(parser)
|
||||
doc.initialize(password)
|
||||
obj = doc.getobj(objid)
|
||||
|
||||
|
@ -292,7 +291,8 @@ class PDFDocument(object):
|
|||
|
||||
debug = 0
|
||||
|
||||
def __init__(self, caching=True):
|
||||
def __init__(self, parser, caching=True, fallback=True):
|
||||
"Set the document to use a given PDFParser object."
|
||||
self.caching = caching
|
||||
self.xrefs = []
|
||||
self.info = []
|
||||
|
@ -302,12 +302,8 @@ class PDFDocument(object):
|
|||
self._parser = None
|
||||
self._cached_objs = {}
|
||||
self._parsed_objs = {}
|
||||
return
|
||||
|
||||
def set_parser(self, parser, fallback=True):
|
||||
"Set the document to use a given PDFParser object."
|
||||
if self._parser: return
|
||||
self._parser = parser
|
||||
self._parser.set_document(self)
|
||||
# Retrieve the information of each header that was appended
|
||||
# (maybe multiple times) at the end of the document.
|
||||
try:
|
||||
|
|
|
@ -814,10 +814,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
|
|||
# Create a PDF parser object associated with the file object.
|
||||
parser = PDFParser(fp)
|
||||
# Create a PDF document object that stores the document structure.
|
||||
doc = PDFDocument(caching=caching)
|
||||
# Connect the parser and document objects.
|
||||
parser.set_document(doc)
|
||||
doc.set_parser(parser)
|
||||
doc = PDFDocument(parser, caching=caching)
|
||||
# Supply the document password for initialization.
|
||||
# (If no password is set, give an empty string.)
|
||||
doc.initialize(password)
|
||||
|
|
|
@ -108,11 +108,9 @@ def dumpallobjs(out, doc, codec=None):
|
|||
# dumpoutline
|
||||
def dumpoutline(outfp, fname, objids, pagenos, password='',
|
||||
dumpall=False, codec=None):
|
||||
doc = PDFDocument()
|
||||
fp = file(fname, 'rb')
|
||||
parser = PDFParser(fp)
|
||||
parser.set_document(doc)
|
||||
doc.set_parser(parser)
|
||||
doc = PDFDocument(parser)
|
||||
doc.initialize(password)
|
||||
pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
|
||||
def resolve_dest(dest):
|
||||
|
@ -157,11 +155,9 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
|
|||
# dumppdf
|
||||
def dumppdf(outfp, fname, objids, pagenos, password='',
|
||||
dumpall=False, codec=None):
|
||||
doc = PDFDocument()
|
||||
fp = file(fname, 'rb')
|
||||
parser = PDFParser(fp)
|
||||
parser.set_document(doc)
|
||||
doc.set_parser(parser)
|
||||
doc = PDFDocument(parser)
|
||||
doc.initialize(password)
|
||||
if objids:
|
||||
for objid in objids:
|
||||
|
|
Loading…
Reference in New Issue