API Change: the PDFDocument cstr now takes PDFParser. set_parser() is removed.

pull/1/head
Yusuke Shinyama 2013-10-10 18:40:06 +09:00
parent 557c2c72e6
commit c926874d20
3 changed files with 7 additions and 18 deletions

View File

@ -283,8 +283,7 @@ class PDFDocument(object):
dynamically import the data as processing goes. dynamically import the data as processing goes.
Typical usage: Typical usage:
doc = PDFDocument() doc = PDFDocument(parser)
doc.set_parser(parser)
doc.initialize(password) doc.initialize(password)
obj = doc.getobj(objid) obj = doc.getobj(objid)
@ -292,7 +291,8 @@ class PDFDocument(object):
debug = 0 debug = 0
def __init__(self, caching=True): def __init__(self, parser, caching=True, fallback=True):
"Set the document to use a given PDFParser object."
self.caching = caching self.caching = caching
self.xrefs = [] self.xrefs = []
self.info = [] self.info = []
@ -302,12 +302,8 @@ class PDFDocument(object):
self._parser = None self._parser = None
self._cached_objs = {} self._cached_objs = {}
self._parsed_objs = {} self._parsed_objs = {}
return
def set_parser(self, parser, fallback=True):
"Set the document to use a given PDFParser object."
if self._parser: return
self._parser = parser self._parser = parser
self._parser.set_document(self)
# Retrieve the information of each header that was appended # Retrieve the information of each header that was appended
# (maybe multiple times) at the end of the document. # (maybe multiple times) at the end of the document.
try: try:

View File

@ -814,10 +814,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
# Create a PDF parser object associated with the file object. # Create a PDF parser object associated with the file object.
parser = PDFParser(fp) parser = PDFParser(fp)
# Create a PDF document object that stores the document structure. # Create a PDF document object that stores the document structure.
doc = PDFDocument(caching=caching) doc = PDFDocument(parser, caching=caching)
# Connect the parser and document objects.
parser.set_document(doc)
doc.set_parser(parser)
# Supply the document password for initialization. # Supply the document password for initialization.
# (If no password is set, give an empty string.) # (If no password is set, give an empty string.)
doc.initialize(password) doc.initialize(password)

View File

@ -108,11 +108,9 @@ def dumpallobjs(out, doc, codec=None):
# dumpoutline # dumpoutline
def dumpoutline(outfp, fname, objids, pagenos, password='', def dumpoutline(outfp, fname, objids, pagenos, password='',
dumpall=False, codec=None): dumpall=False, codec=None):
doc = PDFDocument()
fp = file(fname, 'rb') fp = file(fname, 'rb')
parser = PDFParser(fp) parser = PDFParser(fp)
parser.set_document(doc) doc = PDFDocument(parser)
doc.set_parser(parser)
doc.initialize(password) doc.initialize(password)
pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) ) pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
def resolve_dest(dest): def resolve_dest(dest):
@ -157,11 +155,9 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
# dumppdf # dumppdf
def dumppdf(outfp, fname, objids, pagenos, password='', def dumppdf(outfp, fname, objids, pagenos, password='',
dumpall=False, codec=None): dumpall=False, codec=None):
doc = PDFDocument()
fp = file(fname, 'rb') fp = file(fname, 'rb')
parser = PDFParser(fp) parser = PDFParser(fp)
parser.set_document(doc) doc = PDFDocument(parser)
doc.set_parser(parser)
doc.initialize(password) doc.initialize(password)
if objids: if objids:
for objid in objids: for objid in objids: