API Change: the PDFDocument cstr now takes PDFParser. set_parser() is removed.

2013-10-10 18:40:06 +09:00 · 2013-10-10 18:40:06 +09:00 · c926874d20
parent 557c2c72e6
commit c926874d20
3 changed files with 7 additions and 18 deletions
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@ -283,8 +283,7 @@ class PDFDocument(object):
    dynamically import the data as processing goes.

    Typical usage:
-      doc = PDFDocument()
-      doc.set_parser(parser)
+      doc = PDFDocument(parser)
      doc.initialize(password)
      obj = doc.getobj(objid)
    
@ -292,7 +291,8 @@ class PDFDocument(object):

    debug = 0

-    def __init__(self, caching=True):
+    def __init__(self, parser, caching=True, fallback=True):
+        "Set the document to use a given PDFParser object."
        self.caching = caching
        self.xrefs = []
        self.info = []
@ -302,12 +302,8 @@ class PDFDocument(object):
        self._parser = None
        self._cached_objs = {}
        self._parsed_objs = {}
-        return
-
-    def set_parser(self, parser, fallback=True):
-        "Set the document to use a given PDFParser object."
-        if self._parser: return
        self._parser = parser
+        self._parser.set_document(self)
        # Retrieve the information of each header that was appended
        # (maybe multiple times) at the end of the document.
        try:
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -814,10 +814,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
    # Create a PDF parser object associated with the file object.
    parser = PDFParser(fp)
    # Create a PDF document object that stores the document structure.
-    doc = PDFDocument(caching=caching)
-    # Connect the parser and document objects.
-    parser.set_document(doc)
-    doc.set_parser(parser)
+    doc = PDFDocument(parser, caching=caching)
    # Supply the document password for initialization.
    # (If no password is set, give an empty string.)
    doc.initialize(password)
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@ -108,11 +108,9 @@ def dumpallobjs(out, doc, codec=None):
 # dumpoutline
 def dumpoutline(outfp, fname, objids, pagenos, password='',
                dumpall=False, codec=None):
-    doc = PDFDocument()
    fp = file(fname, 'rb')
    parser = PDFParser(fp)
-    parser.set_document(doc)
-    doc.set_parser(parser)
+    doc = PDFDocument(parser)
    doc.initialize(password)
    pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
    def resolve_dest(dest):
@ -157,11 +155,9 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
 # dumppdf
 def dumppdf(outfp, fname, objids, pagenos, password='',
            dumpall=False, codec=None):
-    doc = PDFDocument()
    fp = file(fname, 'rb')
    parser = PDFParser(fp)
-    parser.set_document(doc)
-    doc.set_parser(parser)
+    doc = PDFDocument(parser)
    doc.initialize(password)
    if objids:
        for objid in objids: