Make the logger run in a namespace.

pull/22/head
Friedrich Lindenberg 2016-05-20 21:12:05 +02:00
parent e121f7ec46
commit 1d54ecd31c
11 changed files with 76 additions and 57 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__version__ = '20160202' __version__ = '20160202'
if __name__ == '__main__': if __name__ == '__main__':
print (__version__) print (__version__)

View File

@ -31,7 +31,10 @@ from .encodingdb import name2unicode
from .utils import choplist from .utils import choplist
from .utils import nunpack from .utils import nunpack
import six #Python 2+3 compatibility import six #Python 2+3 compatibility
log = logging.getLogger(__name__)
class CMapError(Exception): class CMapError(Exception):
pass pass
@ -91,7 +94,7 @@ class CMap(CMapBase):
return return
def decode(self, code): def decode(self, code):
logging.debug('decode: %r, %r', self, code) log.debug('decode: %r, %r', self, code)
d = self.code2cid d = self.code2cid
for i in six.iterbytes(code): for i in six.iterbytes(code):
if i in d: if i in d:
@ -141,7 +144,7 @@ class UnicodeMap(CMapBase):
return '<UnicodeMap: %s>' % self.attrs.get('CMapName') return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
def get_unichr(self, cid): def get_unichr(self, cid):
logging.debug('get_unichr: %r, %r', self, cid) log.debug('get_unichr: %r, %r', self, cid)
return self.cid2unichr[cid] return self.cid2unichr[cid]
def dump(self, out=sys.stdout): def dump(self, out=sys.stdout):
@ -228,7 +231,7 @@ class CMapDB(object):
@classmethod @classmethod
def _load_data(klass, name): def _load_data(klass, name):
filename = '%s.pickle.gz' % name filename = '%s.pickle.gz' % name
logging.info('loading: %r', name) log.info('loading: %r', name)
cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
os.path.join(os.path.dirname(__file__), 'cmap'),) os.path.join(os.path.dirname(__file__), 'cmap'),)
for directory in cmap_paths: for directory in cmap_paths:

View File

@ -23,7 +23,10 @@ from .utils import enc
from .utils import bbox2str from .utils import bbox2str
from . import utils from . import utils
import six # Python 2+3 compatibility import six # Python 2+3 compatibility
log = logging.getLogger(__name__)
## PDFLayoutAnalyzer ## PDFLayoutAnalyzer
## ##
@ -119,7 +122,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
return item.adv return item.adv
def handle_undefined_char(self, font, cid): def handle_undefined_char(self, font, cid):
logging.info('undefined: %r, %r', font, cid) log.info('undefined: %r, %r', font, cid)
return '(cid:%d)' % cid return '(cid:%d)' % cid
def receive_layout(self, ltpage): def receive_layout(self, ltpage):

View File

@ -89,8 +89,8 @@ class LZWDecoder(object):
# just ignore corrupt data and stop yielding there # just ignore corrupt data and stop yielding there
break break
yield x yield x
#logging.debug('nbits=%d, code=%d, output=%r, table=%r' % #log.debug('nbits=%d, code=%d, output=%r, table=%r' %
# (self.nbits, code, x, self.table[258:])) # (self.nbits, code, x, self.table[258:]))
return return

View File

@ -37,6 +37,8 @@ from .utils import nunpack
from .utils import decode_text from .utils import decode_text
log = logging.getLogger(__name__)
## Exceptions ## Exceptions
## ##
class PDFNoValidXRef(PDFSyntaxError): class PDFNoValidXRef(PDFSyntaxError):
@ -127,7 +129,7 @@ class PDFXRef(PDFBaseXRef):
if use != b'n': if use != b'n':
continue continue
self.offsets[objid] = (None, long(pos) if six.PY2 else int(pos), int(genno)) self.offsets[objid] = (None, long(pos) if six.PY2 else int(pos), int(genno))
logging.info('xref objects: %r', self.offsets) log.info('xref objects: %r', self.offsets)
self.load_trailer(parser) self.load_trailer(parser)
return return
@ -142,7 +144,7 @@ class PDFXRef(PDFBaseXRef):
raise PDFNoValidXRef('Unexpected EOF - file corrupted') raise PDFNoValidXRef('Unexpected EOF - file corrupted')
(_, dic) = x[0] (_, dic) = x[0]
self.trailer.update(dict_value(dic)) self.trailer.update(dict_value(dic))
logging.debug('trailer=%r', self.trailer) log.debug('trailer=%r', self.trailer)
return return
def get_trailer(self): def get_trailer(self):
@ -177,7 +179,7 @@ class PDFXRefFallback(PDFXRef):
if line.startswith(b'trailer'): if line.startswith(b'trailer'):
parser.seek(pos) parser.seek(pos)
self.load_trailer(parser) self.load_trailer(parser)
logging.info('trailer: %r', self.trailer) log.info('trailer: %r', self.trailer)
break break
if six.PY3: if six.PY3:
line=line.decode('latin-1') #default pdf encoding line=line.decode('latin-1') #default pdf encoding
@ -244,9 +246,9 @@ class PDFXRefStream(PDFBaseXRef):
self.data = stream.get_data() self.data = stream.get_data()
self.entlen = self.fl1+self.fl2+self.fl3 self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.attrs self.trailer = stream.attrs
logging.info('xref stream: objid=%s, fields=%d,%d,%d', log.info('xref stream: objid=%s, fields=%d,%d,%d',
', '.join(map(repr, self.ranges)), ', '.join(map(repr, self.ranges)),
self.fl1, self.fl2, self.fl3) self.fl1, self.fl2, self.fl3)
return return
def get_trailer(self): def get_trailer(self):
@ -656,7 +658,7 @@ class PDFDocument(object):
assert objid != 0 assert objid != 0
if not self.xrefs: if not self.xrefs:
raise PDFException('PDFDocument is not initialized') raise PDFException('PDFDocument is not initialized')
logging.debug('getobj: objid=%r', objid) log.debug('getobj: objid=%r', objid)
if objid in self._cached_objs: if objid in self._cached_objs:
(obj, genno) = self._cached_objs[objid] (obj, genno) = self._cached_objs[objid]
else: else:
@ -681,7 +683,7 @@ class PDFDocument(object):
continue continue
else: else:
raise PDFObjectNotFound(objid) raise PDFObjectNotFound(objid)
logging.debug('register: objid=%r: %r', objid, obj) log.debug('register: objid=%r: %r', objid, obj)
if self.caching: if self.caching:
self._cached_objs[objid] = (obj, genno) self._cached_objs[objid] = (obj, genno)
return obj return obj
@ -754,14 +756,14 @@ class PDFDocument(object):
prev = None prev = None
for line in parser.revreadlines(): for line in parser.revreadlines():
line = line.strip() line = line.strip()
logging.debug('find_xref: %r', line) log.debug('find_xref: %r', line)
if line == b'startxref': if line == b'startxref':
break break
if line: if line:
prev = line prev = line
else: else:
raise PDFNoValidXRef('Unexpected EOF') raise PDFNoValidXRef('Unexpected EOF')
logging.info('xref found: pos=%r', prev) log.info('xref found: pos=%r', prev)
return long(prev) if six.PY2 else int(prev) return long(prev) if six.PY2 else int(prev)
# read xref table # read xref table
@ -773,7 +775,7 @@ class PDFDocument(object):
(pos, token) = parser.nexttoken() (pos, token) = parser.nexttoken()
except PSEOF: except PSEOF:
raise PDFNoValidXRef('Unexpected EOF') raise PDFNoValidXRef('Unexpected EOF')
logging.info('read_xref_from: start=%d, token=%r', start, token) log.info('read_xref_from: start=%d, token=%r', start, token)
if isinstance(token, int): if isinstance(token, int):
# XRefStream: PDF-1.5 # XRefStream: PDF-1.5
parser.seek(pos) parser.seek(pos)
@ -787,7 +789,7 @@ class PDFDocument(object):
xref.load(parser) xref.load(parser)
xrefs.append(xref) xrefs.append(xref)
trailer = xref.get_trailer() trailer = xref.get_trailer()
logging.info('trailer: %r', trailer) log.info('trailer: %r', trailer)
if 'XRefStm' in trailer: if 'XRefStm' in trailer:
pos = int_value(trailer['XRefStm']) pos = int_value(trailer['XRefStm'])
self.read_xref_from(parser, pos, xrefs) self.read_xref_from(parser, pos, xrefs)

View File

@ -31,7 +31,9 @@ from .utils import choplist
from .utils import mult_matrix from .utils import mult_matrix
from .utils import MATRIX_IDENTITY from .utils import MATRIX_IDENTITY
import six # Python 2+3 compatibility import six # Python 2+3 compatibility
log = logging.getLogger(__name__)
## Exceptions ## Exceptions
## ##
@ -166,7 +168,7 @@ class PDFResourceManager(object):
if objid and objid in self._cached_fonts: if objid and objid in self._cached_fonts:
font = self._cached_fonts[objid] font = self._cached_fonts[objid]
else: else:
logging.info('get_font: create: objid=%r, spec=%r', objid, spec) log.info('get_font: create: objid=%r, spec=%r', objid, spec)
if settings.STRICT: if settings.STRICT:
if spec['Type'] is not LITERAL_FONT: if spec['Type'] is not LITERAL_FONT:
raise PDFFontError('Type is not /Font') raise PDFFontError('Type is not /Font')
@ -340,7 +342,7 @@ class PDFPageInterpreter(object):
else: else:
return PREDEFINED_COLORSPACE.get(name) return PREDEFINED_COLORSPACE.get(name)
for (k, v) in six.iteritems(dict_value(resources)): for (k, v) in six.iteritems(dict_value(resources)):
logging.debug('Resource: %r: %r', k, v) log.debug('Resource: %r: %r', k, v)
if k == 'Font': if k == 'Font':
for (fontid, spec) in six.iteritems(dict_value(v)): for (fontid, spec) in six.iteritems(dict_value(v)):
objid = None objid = None
@ -796,7 +798,7 @@ class PDFPageInterpreter(object):
if settings.STRICT: if settings.STRICT:
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
return return
logging.info('Processing xobj: %r', xobj) log.info('Processing xobj: %r', xobj)
subtype = xobj.get('Subtype') subtype = xobj.get('Subtype')
if subtype is LITERAL_FORM and 'BBox' in xobj: if subtype is LITERAL_FORM and 'BBox' in xobj:
interpreter = self.dup() interpreter = self.dup()
@ -820,7 +822,7 @@ class PDFPageInterpreter(object):
return return
def process_page(self, page): def process_page(self, page):
logging.info('Processing page: %r', page) log.info('Processing page: %r', page)
(x0, y0, x1, y1) = page.mediabox (x0, y0, x1, y1) = page.mediabox
if page.rotate == 90: if page.rotate == 90:
ctm = (0, -1, 1, 0, -y0, x1) ctm = (0, -1, 1, 0, -y0, x1)
@ -839,8 +841,8 @@ class PDFPageInterpreter(object):
# Render the content streams. # Render the content streams.
# This method may be called recursively. # This method may be called recursively.
def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
logging.info('render_contents: resources=%r, streams=%r, ctm=%r', log.info('render_contents: resources=%r, streams=%r, ctm=%r',
resources, streams, ctm) resources, streams, ctm)
self.init_resources(resources) self.init_resources(resources)
self.init_state(ctm) self.init_state(ctm)
self.execute(list_value(streams)) self.execute(list_value(streams))
@ -865,11 +867,11 @@ class PDFPageInterpreter(object):
nargs = six.get_function_code(func).co_argcount-1 nargs = six.get_function_code(func).co_argcount-1
if nargs: if nargs:
args = self.pop(nargs) args = self.pop(nargs)
logging.debug('exec: %s %r', name, args) log.debug('exec: %s %r', name, args)
if len(args) == nargs: if len(args) == nargs:
func(*args) func(*args)
else: else:
logging.debug('exec: %s', name) log.debug('exec: %s', name)
func() func()
else: else:
if settings.STRICT: if settings.STRICT:

View File

@ -10,7 +10,9 @@ from .pdfparser import PDFParser
from .pdfdocument import PDFDocument from .pdfdocument import PDFDocument
from .pdfdocument import PDFTextExtractionNotAllowed from .pdfdocument import PDFTextExtractionNotAllowed
import six # Python 2+3 compatibility import six # Python 2+3 compatibility
log = logging.getLogger(__name__)
# some predefined literals and keywords. # some predefined literals and keywords.
LITERAL_PAGE = LIT('Page') LITERAL_PAGE = LIT('Page')
@ -87,12 +89,12 @@ class PDFPage(object):
if k in klass.INHERITABLE_ATTRS and k not in tree: if k in klass.INHERITABLE_ATTRS and k not in tree:
tree[k] = v tree[k] = v
if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
logging.info('Pages: Kids=%r', tree['Kids']) log.info('Pages: Kids=%r', tree['Kids'])
for c in list_value(tree['Kids']): for c in list_value(tree['Kids']):
for x in search(c, tree): for x in search(c, tree):
yield x yield x
elif tree.get('Type') is LITERAL_PAGE: elif tree.get('Type') is LITERAL_PAGE:
logging.info('Page: %r', tree) log.info('Page: %r', tree)
yield (objid, tree) yield (objid, tree)
pages = False pages = False
if 'Pages' in document.catalog: if 'Pages' in document.catalog:

View File

@ -12,6 +12,8 @@ from .pdftypes import PDFObjRef
from .pdftypes import int_value from .pdftypes import int_value
from .pdftypes import dict_value from .pdftypes import dict_value
log = logging.getLogger(__name__)
## Exceptions ## Exceptions
## ##
@ -120,7 +122,7 @@ class PDFParser(PSStackParser):
data += line data += line
self.seek(pos+objlen) self.seek(pos+objlen)
# XXX limit objlen not to exceed object boundary # XXX limit objlen not to exceed object boundary
logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10]) log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10])
obj = PDFStream(dic, data, self.doc.decipher) obj = PDFStream(dic, data, self.doc.decipher)
self.push((pos, obj)) self.push((pos, obj))

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import zlib import zlib
import logging
from .lzw import lzwdecode from .lzw import lzwdecode
from .ascii85 import ascii85decode from .ascii85 import ascii85decode
from .ascii85 import asciihexdecode from .ascii85 import asciihexdecode
@ -14,6 +15,8 @@ from .utils import isnumber
import six #Python 2+3 compatibility import six #Python 2+3 compatibility
log = logging.getLogger(__name__)
LITERAL_CRYPT = LIT('Crypt') LITERAL_CRYPT = LIT('Crypt')
# Abbreviation of Filter names in PDF 4.8.6. "Inline Images" # Abbreviation of Filter names in PDF 4.8.6. "Inline Images"
@ -161,8 +164,7 @@ def dict_value(x):
x = resolve1(x) x = resolve1(x)
if not isinstance(x, dict): if not isinstance(x, dict):
if settings.STRICT: if settings.STRICT:
import logging log.error('PDFTypeError : Dict required: %r', x)
logging.error('PDFTypeError : Dict required: %r', x)
raise PDFTypeError('Dict required: %r' % x) raise PDFTypeError('Dict required: %r' % x)
return {} return {}
return x return x

View File

@ -4,10 +4,13 @@
import re import re
import logging import logging
import six # Python 2+3 compatibility import six # Python 2+3 compatibility
from . import settings from . import settings
log = logging.getLogger(__name__)
def bytesindex(s,i,j=None): def bytesindex(s,i,j=None):
"""implements s[i], s[i:], s[i:j] for Python2 and Python3""" """implements s[i], s[i:], s[i:j] for Python2 and Python3"""
if i<0 : i=len(s)+i if i<0 : i=len(s)+i
@ -208,14 +211,14 @@ class PSBaseParser(object):
if not pos: if not pos:
pos = self.bufpos+self.charpos pos = self.bufpos+self.charpos
self.fp.seek(pos) self.fp.seek(pos)
logging.info('poll(%d): %r', pos, self.fp.read(n)) log.info('poll(%d): %r', pos, self.fp.read(n))
self.fp.seek(pos0) self.fp.seek(pos0)
return return
def seek(self, pos): def seek(self, pos):
"""Seeks the parser to the given position. """Seeks the parser to the given position.
""" """
logging.debug('seek: %r', pos) log.debug('seek: %r', pos)
self.fp.seek(pos) self.fp.seek(pos)
# reset the status for nextline() # reset the status for nextline()
self.bufpos = pos self.bufpos = pos
@ -265,7 +268,7 @@ class PSBaseParser(object):
else: else:
linebuf += bytesindex(self.buf,self.charpos,-1) linebuf += bytesindex(self.buf,self.charpos,-1)
self.charpos = len(self.buf) self.charpos = len(self.buf)
logging.debug('nextline: %r, %r', linepos, linebuf) log.debug('nextline: %r, %r', linepos, linebuf)
return (linepos, linebuf) return (linepos, linebuf)
@ -508,7 +511,7 @@ class PSBaseParser(object):
self.fillbuf() self.fillbuf()
self.charpos = self._parse1(self.buf, self.charpos) self.charpos = self._parse1(self.buf, self.charpos)
token = self._tokens.pop(0) token = self._tokens.pop(0)
logging.debug('nexttoken: %r', token) log.debug('nexttoken: %r', token)
return token return token
@ -549,16 +552,16 @@ class PSStackParser(PSBaseParser):
def add_results(self, *objs): def add_results(self, *objs):
try: try:
logging.debug('add_results: %r', objs) log.debug('add_results: %r', objs)
except: except:
logging.debug('add_results: (unprintable object)') log.debug('add_results: (unprintable object)')
self.results.extend(objs) self.results.extend(objs)
return return
def start_type(self, pos, type): def start_type(self, pos, type):
self.context.append((pos, self.curtype, self.curstack)) self.context.append((pos, self.curtype, self.curstack))
(self.curtype, self.curstack) = (type, []) (self.curtype, self.curstack) = (type, [])
logging.debug('start_type: pos=%r, type=%r', pos, type) log.debug('start_type: pos=%r, type=%r', pos, type)
return return
def end_type(self, type): def end_type(self, type):
@ -566,7 +569,7 @@ class PSStackParser(PSBaseParser):
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
objs = [obj for (_, obj) in self.curstack] objs = [obj for (_, obj) in self.curstack]
(pos, self.curtype, self.curstack) = self.context.pop() (pos, self.curtype, self.curstack) = self.context.pop()
logging.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs) log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs)
return (pos, objs) return (pos, objs)
def do_keyword(self, pos, token): def do_keyword(self, pos, token):
@ -620,10 +623,10 @@ class PSStackParser(PSBaseParser):
if settings.STRICT: if settings.STRICT:
raise raise
elif isinstance(token,PSKeyword): elif isinstance(token,PSKeyword):
logging.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack) log.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack)
self.do_keyword(pos, token) self.do_keyword(pos, token)
else: else:
logging.error('unknown token: pos=%r, token=%r, stack=%r', pos, token, self.curstack) log.error('unknown token: pos=%r, token=%r, stack=%r', pos, token, self.curstack)
self.do_keyword(pos, token) self.do_keyword(pos, token)
raise raise
if self.context: if self.context:
@ -632,7 +635,7 @@ class PSStackParser(PSBaseParser):
self.flush() self.flush()
obj = self.results.pop(0) obj = self.results.pop(0)
try: try:
logging.debug('nextobject: %r', obj) log.debug('nextobject: %r', obj)
except: except:
logging.debug('nextobject: (unprintable object)') log.debug('nextobject: (unprintable object)')
return obj return obj

View File

@ -8,9 +8,9 @@
import sys import sys
import urllib import urllib
from httplib import responses from six.moves.http_client import responses
from BaseHTTPServer import HTTPServer from six.moves.BaseHTTPServer import HTTPServer
from SimpleHTTPServer import SimpleHTTPRequestHandler from six.moves.SimpleHTTPServer import SimpleHTTPRequestHandler
## WebAppHandler ## WebAppHandler
## ##