From c926874d20ede48c7ca8145524b13968d1be40e7 Mon Sep 17 00:00:00 2001 From: Yusuke Shinyama Date: Thu, 10 Oct 2013 18:40:06 +0900 Subject: [PATCH] API Change: the PDFDocument cstr now takes PDFParser. set_parser() is removed. --- pdfminer/pdfdocument.py | 12 ++++-------- pdfminer/pdfinterp.py | 5 +---- tools/dumppdf.py | 8 ++------ 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index cb26802..4de80c8 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -283,8 +283,7 @@ class PDFDocument(object): dynamically import the data as processing goes. Typical usage: - doc = PDFDocument() - doc.set_parser(parser) + doc = PDFDocument(parser) doc.initialize(password) obj = doc.getobj(objid) @@ -292,7 +291,8 @@ class PDFDocument(object): debug = 0 - def __init__(self, caching=True): + def __init__(self, parser, caching=True, fallback=True): + "Set the document to use a given PDFParser object." self.caching = caching self.xrefs = [] self.info = [] @@ -302,12 +302,8 @@ class PDFDocument(object): self._parser = None self._cached_objs = {} self._parsed_objs = {} - return - - def set_parser(self, parser, fallback=True): - "Set the document to use a given PDFParser object." - if self._parser: return self._parser = parser + self._parser.set_document(self) # Retrieve the information of each header that was appended # (maybe multiple times) at the end of the document. try: diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 350804b..59d01db 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -814,10 +814,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='', # Create a PDF parser object associated with the file object. parser = PDFParser(fp) # Create a PDF document object that stores the document structure. - doc = PDFDocument(caching=caching) - # Connect the parser and document objects. - parser.set_document(doc) - doc.set_parser(parser) + doc = PDFDocument(parser, caching=caching) # Supply the document password for initialization. # (If no password is set, give an empty string.) doc.initialize(password) diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 7b9ec57..9d28a30 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -108,11 +108,9 @@ def dumpallobjs(out, doc, codec=None): # dumpoutline def dumpoutline(outfp, fname, objids, pagenos, password='', dumpall=False, codec=None): - doc = PDFDocument() fp = file(fname, 'rb') parser = PDFParser(fp) - parser.set_document(doc) - doc.set_parser(parser) + doc = PDFDocument(parser) doc.initialize(password) pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) ) def resolve_dest(dest): @@ -157,11 +155,9 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', # dumppdf def dumppdf(outfp, fname, objids, pagenos, password='', dumpall=False, codec=None): - doc = PDFDocument() fp = file(fname, 'rb') parser = PDFParser(fp) - parser.set_document(doc) - doc.set_parser(parser) + doc = PDFDocument(parser) doc.initialize(password) if objids: for objid in objids: