API Change: the PDFDocument cstr now takes PDFParser. set_parser() is removed.
parent
557c2c72e6
commit
c926874d20
|
@ -283,8 +283,7 @@ class PDFDocument(object):
|
||||||
dynamically import the data as processing goes.
|
dynamically import the data as processing goes.
|
||||||
|
|
||||||
Typical usage:
|
Typical usage:
|
||||||
doc = PDFDocument()
|
doc = PDFDocument(parser)
|
||||||
doc.set_parser(parser)
|
|
||||||
doc.initialize(password)
|
doc.initialize(password)
|
||||||
obj = doc.getobj(objid)
|
obj = doc.getobj(objid)
|
||||||
|
|
||||||
|
@ -292,7 +291,8 @@ class PDFDocument(object):
|
||||||
|
|
||||||
debug = 0
|
debug = 0
|
||||||
|
|
||||||
def __init__(self, caching=True):
|
def __init__(self, parser, caching=True, fallback=True):
|
||||||
|
"Set the document to use a given PDFParser object."
|
||||||
self.caching = caching
|
self.caching = caching
|
||||||
self.xrefs = []
|
self.xrefs = []
|
||||||
self.info = []
|
self.info = []
|
||||||
|
@ -302,12 +302,8 @@ class PDFDocument(object):
|
||||||
self._parser = None
|
self._parser = None
|
||||||
self._cached_objs = {}
|
self._cached_objs = {}
|
||||||
self._parsed_objs = {}
|
self._parsed_objs = {}
|
||||||
return
|
|
||||||
|
|
||||||
def set_parser(self, parser, fallback=True):
|
|
||||||
"Set the document to use a given PDFParser object."
|
|
||||||
if self._parser: return
|
|
||||||
self._parser = parser
|
self._parser = parser
|
||||||
|
self._parser.set_document(self)
|
||||||
# Retrieve the information of each header that was appended
|
# Retrieve the information of each header that was appended
|
||||||
# (maybe multiple times) at the end of the document.
|
# (maybe multiple times) at the end of the document.
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -814,10 +814,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
|
||||||
# Create a PDF parser object associated with the file object.
|
# Create a PDF parser object associated with the file object.
|
||||||
parser = PDFParser(fp)
|
parser = PDFParser(fp)
|
||||||
# Create a PDF document object that stores the document structure.
|
# Create a PDF document object that stores the document structure.
|
||||||
doc = PDFDocument(caching=caching)
|
doc = PDFDocument(parser, caching=caching)
|
||||||
# Connect the parser and document objects.
|
|
||||||
parser.set_document(doc)
|
|
||||||
doc.set_parser(parser)
|
|
||||||
# Supply the document password for initialization.
|
# Supply the document password for initialization.
|
||||||
# (If no password is set, give an empty string.)
|
# (If no password is set, give an empty string.)
|
||||||
doc.initialize(password)
|
doc.initialize(password)
|
||||||
|
|
|
@ -108,11 +108,9 @@ def dumpallobjs(out, doc, codec=None):
|
||||||
# dumpoutline
|
# dumpoutline
|
||||||
def dumpoutline(outfp, fname, objids, pagenos, password='',
|
def dumpoutline(outfp, fname, objids, pagenos, password='',
|
||||||
dumpall=False, codec=None):
|
dumpall=False, codec=None):
|
||||||
doc = PDFDocument()
|
|
||||||
fp = file(fname, 'rb')
|
fp = file(fname, 'rb')
|
||||||
parser = PDFParser(fp)
|
parser = PDFParser(fp)
|
||||||
parser.set_document(doc)
|
doc = PDFDocument(parser)
|
||||||
doc.set_parser(parser)
|
|
||||||
doc.initialize(password)
|
doc.initialize(password)
|
||||||
pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
|
pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
|
||||||
def resolve_dest(dest):
|
def resolve_dest(dest):
|
||||||
|
@ -157,11 +155,9 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
|
||||||
# dumppdf
|
# dumppdf
|
||||||
def dumppdf(outfp, fname, objids, pagenos, password='',
|
def dumppdf(outfp, fname, objids, pagenos, password='',
|
||||||
dumpall=False, codec=None):
|
dumpall=False, codec=None):
|
||||||
doc = PDFDocument()
|
|
||||||
fp = file(fname, 'rb')
|
fp = file(fname, 'rb')
|
||||||
parser = PDFParser(fp)
|
parser = PDFParser(fp)
|
||||||
parser.set_document(doc)
|
doc = PDFDocument(parser)
|
||||||
doc.set_parser(parser)
|
|
||||||
doc.initialize(password)
|
doc.initialize(password)
|
||||||
if objids:
|
if objids:
|
||||||
for objid in objids:
|
for objid in objids:
|
||||||
|
|
Loading…
Reference in New Issue