From 1d54ecd31c49886fd43279f6ee2cc8674eaf2d8e Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Fri, 20 May 2016 21:12:05 +0200 Subject: [PATCH] Make the logger run in a namespace. --- pdfminer/__init__.py | 12 ++++++------ pdfminer/cmapdb.py | 11 +++++++---- pdfminer/converter.py | 7 +++++-- pdfminer/lzw.py | 4 ++-- pdfminer/pdfdocument.py | 26 ++++++++++++++------------ pdfminer/pdfinterp.py | 20 +++++++++++--------- pdfminer/pdfpage.py | 8 +++++--- pdfminer/pdfparser.py | 4 +++- pdfminer/pdftypes.py | 6 ++++-- pdfminer/psparser.py | 29 ++++++++++++++++------------- tools/runapp.py | 6 +++--- 11 files changed, 76 insertions(+), 57 deletions(-) diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py index e803bfe..5a8b431 100644 --- a/pdfminer/__init__.py +++ b/pdfminer/__init__.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -__version__ = '20160202' - -if __name__ == '__main__': - print (__version__) +#!/usr/bin/env python +# -*- coding: utf-8 -*- +__version__ = '20160202' + +if __name__ == '__main__': + print (__version__) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index a1cc53c..52bba8b 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -31,7 +31,10 @@ from .encodingdb import name2unicode from .utils import choplist from .utils import nunpack -import six #Python 2+3 compatibility +import six #Python 2+3 compatibility + +log = logging.getLogger(__name__) + class CMapError(Exception): pass @@ -91,7 +94,7 @@ class CMap(CMapBase): return def decode(self, code): - logging.debug('decode: %r, %r', self, code) + log.debug('decode: %r, %r', self, code) d = self.code2cid for i in six.iterbytes(code): if i in d: @@ -141,7 +144,7 @@ class UnicodeMap(CMapBase): return '' % self.attrs.get('CMapName') def get_unichr(self, cid): - logging.debug('get_unichr: %r, %r', self, cid) + log.debug('get_unichr: %r, %r', self, cid) return self.cid2unichr[cid] def dump(self, out=sys.stdout): @@ -228,7 +231,7 @@ class CMapDB(object): @classmethod def _load_data(klass, name): filename = '%s.pickle.gz' % name - logging.info('loading: %r', name) + log.info('loading: %r', name) cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), os.path.join(os.path.dirname(__file__), 'cmap'),) for directory in cmap_paths: diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 065f21e..75969b5 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -23,7 +23,10 @@ from .utils import enc from .utils import bbox2str from . import utils -import six # Python 2+3 compatibility +import six # Python 2+3 compatibility + +log = logging.getLogger(__name__) + ## PDFLayoutAnalyzer ## @@ -119,7 +122,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): return item.adv def handle_undefined_char(self, font, cid): - logging.info('undefined: %r, %r', font, cid) + log.info('undefined: %r, %r', font, cid) return '(cid:%d)' % cid def receive_layout(self, ltpage): diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index 571a894..30e2205 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -89,8 +89,8 @@ class LZWDecoder(object): # just ignore corrupt data and stop yielding there break yield x - #logging.debug('nbits=%d, code=%d, output=%r, table=%r' % - # (self.nbits, code, x, self.table[258:])) + #log.debug('nbits=%d, code=%d, output=%r, table=%r' % + # (self.nbits, code, x, self.table[258:])) return diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index fd62c82..43d6b6e 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -37,6 +37,8 @@ from .utils import nunpack from .utils import decode_text +log = logging.getLogger(__name__) + ## Exceptions ## class PDFNoValidXRef(PDFSyntaxError): @@ -127,7 +129,7 @@ class PDFXRef(PDFBaseXRef): if use != b'n': continue self.offsets[objid] = (None, long(pos) if six.PY2 else int(pos), int(genno)) - logging.info('xref objects: %r', self.offsets) + log.info('xref objects: %r', self.offsets) self.load_trailer(parser) return @@ -142,7 +144,7 @@ class PDFXRef(PDFBaseXRef): raise PDFNoValidXRef('Unexpected EOF - file corrupted') (_, dic) = x[0] self.trailer.update(dict_value(dic)) - logging.debug('trailer=%r', self.trailer) + log.debug('trailer=%r', self.trailer) return def get_trailer(self): @@ -177,7 +179,7 @@ class PDFXRefFallback(PDFXRef): if line.startswith(b'trailer'): parser.seek(pos) self.load_trailer(parser) - logging.info('trailer: %r', self.trailer) + log.info('trailer: %r', self.trailer) break if six.PY3: line=line.decode('latin-1') #default pdf encoding @@ -244,9 +246,9 @@ class PDFXRefStream(PDFBaseXRef): self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs - logging.info('xref stream: objid=%s, fields=%d,%d,%d', - ', '.join(map(repr, self.ranges)), - self.fl1, self.fl2, self.fl3) + log.info('xref stream: objid=%s, fields=%d,%d,%d', + ', '.join(map(repr, self.ranges)), + self.fl1, self.fl2, self.fl3) return def get_trailer(self): @@ -656,7 +658,7 @@ class PDFDocument(object): assert objid != 0 if not self.xrefs: raise PDFException('PDFDocument is not initialized') - logging.debug('getobj: objid=%r', objid) + log.debug('getobj: objid=%r', objid) if objid in self._cached_objs: (obj, genno) = self._cached_objs[objid] else: @@ -681,7 +683,7 @@ class PDFDocument(object): continue else: raise PDFObjectNotFound(objid) - logging.debug('register: objid=%r: %r', objid, obj) + log.debug('register: objid=%r: %r', objid, obj) if self.caching: self._cached_objs[objid] = (obj, genno) return obj @@ -754,14 +756,14 @@ class PDFDocument(object): prev = None for line in parser.revreadlines(): line = line.strip() - logging.debug('find_xref: %r', line) + log.debug('find_xref: %r', line) if line == b'startxref': break if line: prev = line else: raise PDFNoValidXRef('Unexpected EOF') - logging.info('xref found: pos=%r', prev) + log.info('xref found: pos=%r', prev) return long(prev) if six.PY2 else int(prev) # read xref table @@ -773,7 +775,7 @@ class PDFDocument(object): (pos, token) = parser.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') - logging.info('read_xref_from: start=%d, token=%r', start, token) + log.info('read_xref_from: start=%d, token=%r', start, token) if isinstance(token, int): # XRefStream: PDF-1.5 parser.seek(pos) @@ -787,7 +789,7 @@ class PDFDocument(object): xref.load(parser) xrefs.append(xref) trailer = xref.get_trailer() - logging.info('trailer: %r', trailer) + log.info('trailer: %r', trailer) if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(parser, pos, xrefs) diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index ff1d072..fbc8e37 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -31,7 +31,9 @@ from .utils import choplist from .utils import mult_matrix from .utils import MATRIX_IDENTITY -import six # Python 2+3 compatibility +import six # Python 2+3 compatibility + +log = logging.getLogger(__name__) ## Exceptions ## @@ -166,7 +168,7 @@ class PDFResourceManager(object): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: - logging.info('get_font: create: objid=%r, spec=%r', objid, spec) + log.info('get_font: create: objid=%r, spec=%r', objid, spec) if settings.STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') @@ -340,7 +342,7 @@ class PDFPageInterpreter(object): else: return PREDEFINED_COLORSPACE.get(name) for (k, v) in six.iteritems(dict_value(resources)): - logging.debug('Resource: %r: %r', k, v) + log.debug('Resource: %r: %r', k, v) if k == 'Font': for (fontid, spec) in six.iteritems(dict_value(v)): objid = None @@ -796,7 +798,7 @@ class PDFPageInterpreter(object): if settings.STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return - logging.info('Processing xobj: %r', xobj) + log.info('Processing xobj: %r', xobj) subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() @@ -820,7 +822,7 @@ class PDFPageInterpreter(object): return def process_page(self, page): - logging.info('Processing page: %r', page) + log.info('Processing page: %r', page) (x0, y0, x1, y1) = page.mediabox if page.rotate == 90: ctm = (0, -1, 1, 0, -y0, x1) @@ -839,8 +841,8 @@ class PDFPageInterpreter(object): # Render the content streams. # This method may be called recursively. def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): - logging.info('render_contents: resources=%r, streams=%r, ctm=%r', - resources, streams, ctm) + log.info('render_contents: resources=%r, streams=%r, ctm=%r', + resources, streams, ctm) self.init_resources(resources) self.init_state(ctm) self.execute(list_value(streams)) @@ -865,11 +867,11 @@ class PDFPageInterpreter(object): nargs = six.get_function_code(func).co_argcount-1 if nargs: args = self.pop(nargs) - logging.debug('exec: %s %r', name, args) + log.debug('exec: %s %r', name, args) if len(args) == nargs: func(*args) else: - logging.debug('exec: %s', name) + log.debug('exec: %s', name) func() else: if settings.STRICT: diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 41882ea..62c70a6 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -10,7 +10,9 @@ from .pdfparser import PDFParser from .pdfdocument import PDFDocument from .pdfdocument import PDFTextExtractionNotAllowed -import six # Python 2+3 compatibility +import six # Python 2+3 compatibility + +log = logging.getLogger(__name__) # some predefined literals and keywords. LITERAL_PAGE = LIT('Page') @@ -87,12 +89,12 @@ class PDFPage(object): if k in klass.INHERITABLE_ATTRS and k not in tree: tree[k] = v if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: - logging.info('Pages: Kids=%r', tree['Kids']) + log.info('Pages: Kids=%r', tree['Kids']) for c in list_value(tree['Kids']): for x in search(c, tree): yield x elif tree.get('Type') is LITERAL_PAGE: - logging.info('Page: %r', tree) + log.info('Page: %r', tree) yield (objid, tree) pages = False if 'Pages' in document.catalog: diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index e5202d6..d19e179 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -12,6 +12,8 @@ from .pdftypes import PDFObjRef from .pdftypes import int_value from .pdftypes import dict_value +log = logging.getLogger(__name__) + ## Exceptions ## @@ -120,7 +122,7 @@ class PDFParser(PSStackParser): data += line self.seek(pos+objlen) # XXX limit objlen not to exceed object boundary - logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10]) + log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10]) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index a8fc009..caad157 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import zlib +import logging from .lzw import lzwdecode from .ascii85 import ascii85decode from .ascii85 import asciihexdecode @@ -14,6 +15,8 @@ from .utils import isnumber import six #Python 2+3 compatibility +log = logging.getLogger(__name__) + LITERAL_CRYPT = LIT('Crypt') # Abbreviation of Filter names in PDF 4.8.6. "Inline Images" @@ -161,8 +164,7 @@ def dict_value(x): x = resolve1(x) if not isinstance(x, dict): if settings.STRICT: - import logging - logging.error('PDFTypeError : Dict required: %r', x) + log.error('PDFTypeError : Dict required: %r', x) raise PDFTypeError('Dict required: %r' % x) return {} return x diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 1b17695..af9c189 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -4,10 +4,13 @@ import re import logging -import six # Python 2+3 compatibility +import six # Python 2+3 compatibility from . import settings +log = logging.getLogger(__name__) + + def bytesindex(s,i,j=None): """implements s[i], s[i:], s[i:j] for Python2 and Python3""" if i<0 : i=len(s)+i @@ -208,14 +211,14 @@ class PSBaseParser(object): if not pos: pos = self.bufpos+self.charpos self.fp.seek(pos) - logging.info('poll(%d): %r', pos, self.fp.read(n)) + log.info('poll(%d): %r', pos, self.fp.read(n)) self.fp.seek(pos0) return def seek(self, pos): """Seeks the parser to the given position. """ - logging.debug('seek: %r', pos) + log.debug('seek: %r', pos) self.fp.seek(pos) # reset the status for nextline() self.bufpos = pos @@ -265,7 +268,7 @@ class PSBaseParser(object): else: linebuf += bytesindex(self.buf,self.charpos,-1) self.charpos = len(self.buf) - logging.debug('nextline: %r, %r', linepos, linebuf) + log.debug('nextline: %r, %r', linepos, linebuf) return (linepos, linebuf) @@ -508,7 +511,7 @@ class PSBaseParser(object): self.fillbuf() self.charpos = self._parse1(self.buf, self.charpos) token = self._tokens.pop(0) - logging.debug('nexttoken: %r', token) + log.debug('nexttoken: %r', token) return token @@ -549,16 +552,16 @@ class PSStackParser(PSBaseParser): def add_results(self, *objs): try: - logging.debug('add_results: %r', objs) + log.debug('add_results: %r', objs) except: - logging.debug('add_results: (unprintable object)') + log.debug('add_results: (unprintable object)') self.results.extend(objs) return def start_type(self, pos, type): self.context.append((pos, self.curtype, self.curstack)) (self.curtype, self.curstack) = (type, []) - logging.debug('start_type: pos=%r, type=%r', pos, type) + log.debug('start_type: pos=%r, type=%r', pos, type) return def end_type(self, type): @@ -566,7 +569,7 @@ class PSStackParser(PSBaseParser): raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) objs = [obj for (_, obj) in self.curstack] (pos, self.curtype, self.curstack) = self.context.pop() - logging.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs) + log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs) return (pos, objs) def do_keyword(self, pos, token): @@ -620,10 +623,10 @@ class PSStackParser(PSBaseParser): if settings.STRICT: raise elif isinstance(token,PSKeyword): - logging.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack) + log.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack) self.do_keyword(pos, token) else: - logging.error('unknown token: pos=%r, token=%r, stack=%r', pos, token, self.curstack) + log.error('unknown token: pos=%r, token=%r, stack=%r', pos, token, self.curstack) self.do_keyword(pos, token) raise if self.context: @@ -632,7 +635,7 @@ class PSStackParser(PSBaseParser): self.flush() obj = self.results.pop(0) try: - logging.debug('nextobject: %r', obj) + log.debug('nextobject: %r', obj) except: - logging.debug('nextobject: (unprintable object)') + log.debug('nextobject: (unprintable object)') return obj diff --git a/tools/runapp.py b/tools/runapp.py index 6bdff8b..b90f962 100755 --- a/tools/runapp.py +++ b/tools/runapp.py @@ -8,9 +8,9 @@ import sys import urllib -from httplib import responses -from BaseHTTPServer import HTTPServer -from SimpleHTTPServer import SimpleHTTPRequestHandler +from six.moves.http_client import responses +from six.moves.BaseHTTPServer import HTTPServer +from six.moves.SimpleHTTPServer import SimpleHTTPRequestHandler ## WebAppHandler ##