From c926874d20ede48c7ca8145524b13968d1be40e7 Mon Sep 17 00:00:00 2001
From: Yusuke Shinyama <yusuke@shinyama.jp>
Date: Thu, 10 Oct 2013 18:40:06 +0900
Subject: [PATCH] API Change: the PDFDocument cstr now takes PDFParser.
 set_parser() is removed.

---
 pdfminer/pdfdocument.py | 12 ++++--------
 pdfminer/pdfinterp.py   |  5 +----
 tools/dumppdf.py        |  8 ++------
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py
index cb26802..4de80c8 100644
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@@ -283,8 +283,7 @@ class PDFDocument(object):
     dynamically import the data as processing goes.
 
     Typical usage:
-      doc = PDFDocument()
-      doc.set_parser(parser)
+      doc = PDFDocument(parser)
       doc.initialize(password)
       obj = doc.getobj(objid)
     
@@ -292,7 +291,8 @@ class PDFDocument(object):
 
     debug = 0
 
-    def __init__(self, caching=True):
+    def __init__(self, parser, caching=True, fallback=True):
+        "Set the document to use a given PDFParser object."
         self.caching = caching
         self.xrefs = []
         self.info = []
@@ -302,12 +302,8 @@ class PDFDocument(object):
         self._parser = None
         self._cached_objs = {}
         self._parsed_objs = {}
-        return
-
-    def set_parser(self, parser, fallback=True):
-        "Set the document to use a given PDFParser object."
-        if self._parser: return
         self._parser = parser
+        self._parser.set_document(self)
         # Retrieve the information of each header that was appended
         # (maybe multiple times) at the end of the document.
         try:
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 350804b..59d01db 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -814,10 +814,7 @@ def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
     # Create a PDF parser object associated with the file object.
     parser = PDFParser(fp)
     # Create a PDF document object that stores the document structure.
-    doc = PDFDocument(caching=caching)
-    # Connect the parser and document objects.
-    parser.set_document(doc)
-    doc.set_parser(parser)
+    doc = PDFDocument(parser, caching=caching)
     # Supply the document password for initialization.
     # (If no password is set, give an empty string.)
     doc.initialize(password)
diff --git a/tools/dumppdf.py b/tools/dumppdf.py
index 7b9ec57..9d28a30 100755
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@@ -108,11 +108,9 @@ def dumpallobjs(out, doc, codec=None):
 # dumpoutline
 def dumpoutline(outfp, fname, objids, pagenos, password='',
                 dumpall=False, codec=None):
-    doc = PDFDocument()
     fp = file(fname, 'rb')
     parser = PDFParser(fp)
-    parser.set_document(doc)
-    doc.set_parser(parser)
+    doc = PDFDocument(parser)
     doc.initialize(password)
     pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
     def resolve_dest(dest):
@@ -157,11 +155,9 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
 # dumppdf
 def dumppdf(outfp, fname, objids, pagenos, password='',
             dumpall=False, codec=None):
-    doc = PDFDocument()
     fp = file(fname, 'rb')
     parser = PDFParser(fp)
-    parser.set_document(doc)
-    doc.set_parser(parser)
+    doc = PDFDocument(parser)
     doc.initialize(password)
     if objids:
         for objid in objids: