avoid string formating when not logging

pull/2/head
cybjit 2014-09-11 23:40:18 +02:00
parent 01821c7d1e
commit 39942b6642
8 changed files with 38 additions and 38 deletions

View File

@ -91,7 +91,7 @@ class CMap(CMapBase):
return return
def decode(self, code): def decode(self, code):
logging.debug('decode: %r, %r' % (self, code)) logging.debug('decode: %r, %r', self, code)
d = self.code2cid d = self.code2cid
for i in six.iterbytes(code): for i in six.iterbytes(code):
if i in d: if i in d:
@ -141,7 +141,7 @@ class UnicodeMap(CMapBase):
return '<UnicodeMap: %s>' % self.attrs.get('CMapName') return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
def get_unichr(self, cid): def get_unichr(self, cid):
logging.debug('get_unichr: %r, %r' % (self, cid)) logging.debug('get_unichr: %r, %r', self, cid)
return self.cid2unichr[cid] return self.cid2unichr[cid]
def dump(self, out=sys.stdout): def dump(self, out=sys.stdout):
@ -228,7 +228,7 @@ class CMapDB(object):
@classmethod @classmethod
def _load_data(klass, name): def _load_data(klass, name):
filename = '%s.pickle.gz' % name filename = '%s.pickle.gz' % name
logging.info('loading: %r' % name) logging.info('loading: %r', name)
cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
os.path.join(os.path.dirname(__file__), 'cmap'),) os.path.join(os.path.dirname(__file__), 'cmap'),)
for directory in cmap_paths: for directory in cmap_paths:

View File

@ -117,7 +117,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
return item.adv return item.adv
def handle_undefined_char(self, font, cid): def handle_undefined_char(self, font, cid):
logging.info('undefined: %r, %r' % (font, cid)) logging.info('undefined: %r, %r', font, cid)
return '(cid:%d)' % cid return '(cid:%d)' % cid
def receive_layout(self, ltpage): def receive_layout(self, ltpage):

View File

@ -127,7 +127,7 @@ class PDFXRef(PDFBaseXRef):
if use != b'n': if use != b'n':
continue continue
self.offsets[objid] = (None, long(pos) if six.PY2 else int(pos), int(genno)) self.offsets[objid] = (None, long(pos) if six.PY2 else int(pos), int(genno))
logging.info('xref objects: %r' % self.offsets) logging.info('xref objects: %r', self.offsets)
self.load_trailer(parser) self.load_trailer(parser)
return return
@ -142,7 +142,7 @@ class PDFXRef(PDFBaseXRef):
raise PDFNoValidXRef('Unexpected EOF - file corrupted') raise PDFNoValidXRef('Unexpected EOF - file corrupted')
(_, dic) = x[0] (_, dic) = x[0]
self.trailer.update(dict_value(dic)) self.trailer.update(dict_value(dic))
logging.debug('trailer=%r'%self.trailer) logging.debug('trailer=%r', self.trailer)
return return
def get_trailer(self): def get_trailer(self):
@ -177,7 +177,7 @@ class PDFXRefFallback(PDFXRef):
if line.startswith(b'trailer'): if line.startswith(b'trailer'):
parser.seek(pos) parser.seek(pos)
self.load_trailer(parser) self.load_trailer(parser)
logging.info('trailer: %r' % self.get_trailer()) logging.info('trailer: %r', self.trailer)
break break
if six.PY3: if six.PY3:
line=line.decode('latin-1') #default pdf encoding line=line.decode('latin-1') #default pdf encoding
@ -244,9 +244,9 @@ class PDFXRefStream(PDFBaseXRef):
self.data = stream.get_data() self.data = stream.get_data()
self.entlen = self.fl1+self.fl2+self.fl3 self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.attrs self.trailer = stream.attrs
logging.info('xref stream: objid=%s, fields=%d,%d,%d' % logging.info('xref stream: objid=%s, fields=%d,%d,%d',
(', '.join(map(repr, self.ranges)), ', '.join(map(repr, self.ranges)),
self.fl1, self.fl2, self.fl3)) self.fl1, self.fl2, self.fl3)
return return
def get_trailer(self): def get_trailer(self):
@ -655,7 +655,7 @@ class PDFDocument(object):
assert objid != 0 assert objid != 0
if not self.xrefs: if not self.xrefs:
raise PDFException('PDFDocument is not initialized') raise PDFException('PDFDocument is not initialized')
logging.debug('getobj: objid=%r' % objid) logging.debug('getobj: objid=%r', objid)
if objid in self._cached_objs: if objid in self._cached_objs:
(obj, genno) = self._cached_objs[objid] (obj, genno) = self._cached_objs[objid]
else: else:
@ -680,7 +680,7 @@ class PDFDocument(object):
continue continue
else: else:
raise PDFObjectNotFound(objid) raise PDFObjectNotFound(objid)
logging.debug('register: objid=%r: %r' % (objid, obj)) logging.debug('register: objid=%r: %r', objid, obj)
if self.caching: if self.caching:
self._cached_objs[objid] = (obj, genno) self._cached_objs[objid] = (obj, genno)
return obj return obj
@ -753,14 +753,14 @@ class PDFDocument(object):
prev = None prev = None
for line in parser.revreadlines(): for line in parser.revreadlines():
line = line.strip() line = line.strip()
logging.debug('find_xref: %r' % line) logging.debug('find_xref: %r', line)
if line == b'startxref': if line == b'startxref':
break break
if line: if line:
prev = line prev = line
else: else:
raise PDFNoValidXRef('Unexpected EOF') raise PDFNoValidXRef('Unexpected EOF')
logging.info('xref found: pos=%r' % prev) logging.info('xref found: pos=%r', prev)
return long(prev) if six.PY2 else int(prev) return long(prev) if six.PY2 else int(prev)
# read xref table # read xref table
@ -772,7 +772,7 @@ class PDFDocument(object):
(pos, token) = parser.nexttoken() (pos, token) = parser.nexttoken()
except PSEOF: except PSEOF:
raise PDFNoValidXRef('Unexpected EOF') raise PDFNoValidXRef('Unexpected EOF')
logging.info('read_xref_from: start=%d, token=%r' % (start, token)) logging.info('read_xref_from: start=%d, token=%r', start, token)
if isinstance(token, int): if isinstance(token, int):
# XRefStream: PDF-1.5 # XRefStream: PDF-1.5
parser.seek(pos) parser.seek(pos)
@ -786,7 +786,7 @@ class PDFDocument(object):
xref.load(parser) xref.load(parser)
xrefs.append(xref) xrefs.append(xref)
trailer = xref.get_trailer() trailer = xref.get_trailer()
logging.info('trailer: %r' % trailer) logging.info('trailer: %r', trailer)
if 'XRefStm' in trailer: if 'XRefStm' in trailer:
pos = int_value(trailer['XRefStm']) pos = int_value(trailer['XRefStm'])
self.read_xref_from(parser, pos, xrefs) self.read_xref_from(parser, pos, xrefs)

View File

@ -166,7 +166,7 @@ class PDFResourceManager(object):
if objid and objid in self._cached_fonts: if objid and objid in self._cached_fonts:
font = self._cached_fonts[objid] font = self._cached_fonts[objid]
else: else:
logging.info('get_font: create: objid=%r, spec=%r' % (objid, spec)) logging.info('get_font: create: objid=%r, spec=%r', objid, spec)
if STRICT: if STRICT:
if spec['Type'] is not LITERAL_FONT: if spec['Type'] is not LITERAL_FONT:
raise PDFFontError('Type is not /Font') raise PDFFontError('Type is not /Font')
@ -340,7 +340,7 @@ class PDFPageInterpreter(object):
else: else:
return PREDEFINED_COLORSPACE.get(name) return PREDEFINED_COLORSPACE.get(name)
for (k, v) in six.iteritems(dict_value(resources)): for (k, v) in six.iteritems(dict_value(resources)):
logging.debug('Resource: %r: %r' % (k, v)) logging.debug('Resource: %r: %r', k, v)
if k == 'Font': if k == 'Font':
for (fontid, spec) in six.iteritems(dict_value(v)): for (fontid, spec) in six.iteritems(dict_value(v)):
objid = None objid = None
@ -796,7 +796,7 @@ class PDFPageInterpreter(object):
if STRICT: if STRICT:
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
return return
logging.info('Processing xobj: %r' % xobj) logging.info('Processing xobj: %r', xobj)
subtype = xobj.get('Subtype') subtype = xobj.get('Subtype')
if subtype is LITERAL_FORM and 'BBox' in xobj: if subtype is LITERAL_FORM and 'BBox' in xobj:
interpreter = self.dup() interpreter = self.dup()
@ -819,7 +819,7 @@ class PDFPageInterpreter(object):
return return
def process_page(self, page): def process_page(self, page):
logging.info('Processing page: %r' % page) logging.info('Processing page: %r', page)
(x0, y0, x1, y1) = page.mediabox (x0, y0, x1, y1) = page.mediabox
if page.rotate == 90: if page.rotate == 90:
ctm = (0, -1, 1, 0, -y0, x1) ctm = (0, -1, 1, 0, -y0, x1)
@ -838,8 +838,8 @@ class PDFPageInterpreter(object):
# Render the content streams. # Render the content streams.
# This method may be called recursively. # This method may be called recursively.
def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
logging.info('render_contents: resources=%r, streams=%r, ctm=%r' % logging.info('render_contents: resources=%r, streams=%r, ctm=%r',
(resources, streams, ctm)) resources, streams, ctm)
self.init_resources(resources) self.init_resources(resources)
self.init_state(ctm) self.init_state(ctm)
self.execute(list_value(streams)) self.execute(list_value(streams))
@ -864,11 +864,11 @@ class PDFPageInterpreter(object):
nargs = six.get_function_code(func).co_argcount-1 nargs = six.get_function_code(func).co_argcount-1
if nargs: if nargs:
args = self.pop(nargs) args = self.pop(nargs)
logging.debug('exec: %s %r' % (name, args)) logging.debug('exec: %s %r', name, args)
if len(args) == nargs: if len(args) == nargs:
func(*args) func(*args)
else: else:
logging.debug('exec: %s' % name) logging.debug('exec: %s', name)
func() func()
else: else:
if STRICT: if STRICT:

View File

@ -87,12 +87,12 @@ class PDFPage(object):
if k in klass.INHERITABLE_ATTRS and k not in tree: if k in klass.INHERITABLE_ATTRS and k not in tree:
tree[k] = v tree[k] = v
if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
logging.info('Pages: Kids=%r' % tree['Kids']) logging.info('Pages: Kids=%r', tree['Kids'])
for c in list_value(tree['Kids']): for c in list_value(tree['Kids']):
for x in search(c, tree): for x in search(c, tree):
yield x yield x
elif tree.get('Type') is LITERAL_PAGE: elif tree.get('Type') is LITERAL_PAGE:
logging.info('Page: %r' % tree) logging.info('Page: %r', tree)
yield (objid, tree) yield (objid, tree)
pages = False pages = False
if 'Pages' in document.catalog: if 'Pages' in document.catalog:

View File

@ -120,7 +120,7 @@ class PDFParser(PSStackParser):
data += line data += line
self.seek(pos+objlen) self.seek(pos+objlen)
# XXX limit objlen not to exceed object boundary # XXX limit objlen not to exceed object boundary
logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % (pos, objlen, dic, data[:10])) logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10])
obj = PDFStream(dic, data, self.doc.decipher) obj = PDFStream(dic, data, self.doc.decipher)
self.push((pos, obj)) self.push((pos, obj))

View File

@ -162,7 +162,7 @@ def dict_value(x):
if not isinstance(x, dict): if not isinstance(x, dict):
if STRICT: if STRICT:
import logging import logging
logging.error('PDFTypeError : Dict required: %r' % x) logging.error('PDFTypeError : Dict required: %r', x)
raise PDFTypeError('Dict required: %r' % x) raise PDFTypeError('Dict required: %r' % x)
return {} return {}
return x return x

View File

@ -207,14 +207,14 @@ class PSBaseParser(object):
if not pos: if not pos:
pos = self.bufpos+self.charpos pos = self.bufpos+self.charpos
self.fp.seek(pos) self.fp.seek(pos)
logging.info('poll(%d): %r' % (pos, self.fp.read(n))) logging.info('poll(%d): %r', pos, self.fp.read(n))
self.fp.seek(pos0) self.fp.seek(pos0)
return return
def seek(self, pos): def seek(self, pos):
"""Seeks the parser to the given position. """Seeks the parser to the given position.
""" """
logging.debug('seek: %r' % pos) logging.debug('seek: %r', pos)
self.fp.seek(pos) self.fp.seek(pos)
# reset the status for nextline() # reset the status for nextline()
self.bufpos = pos self.bufpos = pos
@ -264,7 +264,7 @@ class PSBaseParser(object):
else: else:
linebuf += bytesindex(self.buf,self.charpos,-1) linebuf += bytesindex(self.buf,self.charpos,-1)
self.charpos = len(self.buf) self.charpos = len(self.buf)
logging.debug('nextline: %r, %r' % (linepos, linebuf)) logging.debug('nextline: %r, %r', linepos, linebuf)
return (linepos, linebuf) return (linepos, linebuf)
@ -507,7 +507,7 @@ class PSBaseParser(object):
self.fillbuf() self.fillbuf()
self.charpos = self._parse1(self.buf, self.charpos) self.charpos = self._parse1(self.buf, self.charpos)
token = self._tokens.pop(0) token = self._tokens.pop(0)
logging.debug('nexttoken: (%r:%r)' % token) logging.debug('nexttoken: %r', token)
return token return token
@ -548,7 +548,7 @@ class PSStackParser(PSBaseParser):
def add_results(self, *objs): def add_results(self, *objs):
try: try:
logging.debug('add_results: %s' % repr(objs)) logging.debug('add_results: %r', objs)
except: except:
logging.debug('add_results: (unprintable object)') logging.debug('add_results: (unprintable object)')
self.results.extend(objs) self.results.extend(objs)
@ -557,7 +557,7 @@ class PSStackParser(PSBaseParser):
def start_type(self, pos, type): def start_type(self, pos, type):
self.context.append((pos, self.curtype, self.curstack)) self.context.append((pos, self.curtype, self.curstack))
(self.curtype, self.curstack) = (type, []) (self.curtype, self.curstack) = (type, [])
logging.debug('start_type: pos=%r, type=%r' % (pos, type)) logging.debug('start_type: pos=%r, type=%r', pos, type)
return return
def end_type(self, type): def end_type(self, type):
@ -565,7 +565,7 @@ class PSStackParser(PSBaseParser):
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
objs = [obj for (_, obj) in self.curstack] objs = [obj for (_, obj) in self.curstack]
(pos, self.curtype, self.curstack) = self.context.pop() (pos, self.curtype, self.curstack) = self.context.pop()
logging.debug('end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)) logging.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs)
return (pos, objs) return (pos, objs)
def do_keyword(self, pos, token): def do_keyword(self, pos, token):
@ -619,10 +619,10 @@ class PSStackParser(PSBaseParser):
if STRICT: if STRICT:
raise raise
elif isinstance(token,PSKeyword): elif isinstance(token,PSKeyword):
logging.debug('do_keyword: pos=%r, token=%r, stack=%r' % (pos, token, self.curstack)) logging.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack)
self.do_keyword(pos, token) self.do_keyword(pos, token)
else: else:
logging.error('unknown token: pos=%r, token=%r, stack=%r' % (pos, token, self.curstack)) logging.error('unknown token: pos=%r, token=%r, stack=%r', pos, token, self.curstack)
self.do_keyword(pos, token) self.do_keyword(pos, token)
raise raise
if self.context: if self.context:
@ -631,7 +631,7 @@ class PSStackParser(PSBaseParser):
self.flush() self.flush()
obj = self.results.pop(0) obj = self.results.pop(0)
try: try:
logging.debug('nextobject: %s' % repr(obj)) logging.debug('nextobject: %r', obj)
except: except:
logging.debug('nextobject: (unprintable object)') logging.debug('nextobject: (unprintable object)')
return obj return obj