From 36977fbe0802b994069c800f9c55d253137c0aef Mon Sep 17 00:00:00 2001 From: speedplane Date: Tue, 11 Nov 2014 23:36:58 -0500 Subject: [PATCH] Add debug flags for much of the debug output. --- pdfminer/pdfdocument.py | 20 ++++++++++++++------ pdfminer/pdfinterp.py | 12 ++++++++---- pdfminer/pdfpage.py | 6 ++++-- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 2c3c274..66b575a 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -65,6 +65,8 @@ LITERAL_CATALOG = LIT('Catalog') ## class PDFBaseXRef(object): + debug = False + def get_trailer(self): raise NotImplementedError @@ -122,7 +124,7 @@ class PDFXRef(PDFBaseXRef): if use != b'n': continue self.offsets[objid] = (None, long(pos), int(genno)) - logging.info('xref objects: %r' % self.offsets) + if self.debug: logging.info('xref objects: %r' % self.offsets) self.load_trailer(parser) return @@ -173,7 +175,7 @@ class PDFXRefFallback(PDFXRef): if line.startswith(b'trailer'): parser.seek(pos) self.load_trailer(parser) - logging.info('trailer: %r' % self.get_trailer()) + if self.debug: logging.info('trailer: %r' % self.get_trailer()) break m = self.PDFOBJ_CUE.match(line) if not m: @@ -212,6 +214,8 @@ class PDFXRefFallback(PDFXRef): ## class PDFXRefStream(PDFBaseXRef): + debug = False + def __init__(self): self.data = None self.entlen = None @@ -238,7 +242,8 @@ class PDFXRefStream(PDFBaseXRef): self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs - logging.info('xref stream: objid=%s, fields=%d,%d,%d' % + if self.debug: + logging.info('xref stream: objid=%s, fields=%d,%d,%d' % (', '.join(map(repr, self.ranges)), self.fl1, self.fl2, self.fl3)) return @@ -761,7 +766,8 @@ class PDFDocument(object): prev = line else: raise PDFNoValidXRef('Unexpected EOF') - logging.info('xref found: pos=%r' % prev) + if self.debug: + logging.info('xref found: pos=%r' % prev) return long(prev) # read xref table @@ -773,7 +779,8 @@ class PDFDocument(object): (pos, token) = parser.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') - logging.info('read_xref_from: start=%d, token=%r' % (start, token)) + if self.debug: + logging.info('read_xref_from: start=%d, token=%r' % (start, token)) if isinstance(token, int): # XRefStream: PDF-1.5 parser.seek(pos) @@ -787,7 +794,8 @@ class PDFDocument(object): xref.load(parser) xrefs.append(xref) trailer = xref.get_trailer() - logging.info('trailer: %r' % trailer) + if self.debug: + logging.info('trailer: %r' % trailer) if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(parser, pos, xrefs) diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 3b368e0..3f3f393 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -139,6 +139,8 @@ class PDFResourceManager(object): allocated multiple times. """ + debug = False + def __init__(self, caching=True): self.caching = caching self._cached_fonts = {} @@ -167,7 +169,8 @@ class PDFResourceManager(object): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: - logging.info('get_font: create: objid=%r, spec=%r' % (objid, spec)) + if self.debug: + logging.info('get_font: create: objid=%r, spec=%r' % (objid, spec)) if STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') @@ -799,7 +802,7 @@ class PDFPageInterpreter(object): if STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return - logging.info('Processing xobj: %r' % xobj) + if self.debug: logging.info('Processing xobj: %r' % xobj) subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() @@ -822,7 +825,7 @@ class PDFPageInterpreter(object): return def process_page(self, page): - logging.info('Processing page: %r' % page) + if self.debug: logging.info('Processing page: %r' % page) (x0, y0, x1, y1) = page.mediabox if page.rotate == 90: ctm = (0, -1, 1, 0, -y0, x1) @@ -841,7 +844,8 @@ class PDFPageInterpreter(object): # Render the content streams. # This method may be called recursively. def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): - logging.info('render_contents: resources=%r, streams=%r, ctm=%r' % + if self.debug: + logging.info('render_contents: resources=%r, streams=%r, ctm=%r' % (resources, streams, ctm)) self.init_resources(resources) self.init_state(ctm) diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index fcdf17b..a48767c 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -39,6 +39,8 @@ class PDFPage(object): beads: a chain that represents natural reading order. """ + debug = False + def __init__(self, doc, pageid, attrs): """Initialize a page object. @@ -86,12 +88,12 @@ class PDFPage(object): if k in klass.INHERITABLE_ATTRS and k not in tree: tree[k] = v if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: - logging.info('Pages: Kids=%r' % tree['Kids']) + if klass.debug: logging.info('Pages: Kids=%r' % tree['Kids']) for c in list_value(tree['Kids']): for x in search(c, tree): yield x elif tree.get('Type') is LITERAL_PAGE: - logging.info('Page: %r' % tree) + if klass.debug: logging.info('Page: %r' % tree) yield (objid, tree) pages = False if 'Pages' in document.catalog: