Use logging module instead of print.

pull/1/head
Yusuke Shinyama 2014-06-14 12:00:49 +09:00
parent fb3f2d9629
commit 8e14ebf4e1
8 changed files with 49 additions and 41 deletions

View File

@ -17,6 +17,7 @@ import os.path
import gzip
import cPickle as pickle
import struct
import logging
from psparser import PSStackParser
from psparser import PSSyntaxError, PSEOF
from psparser import PSLiteral
@ -84,7 +85,7 @@ class CMap(CMapBase):
def decode(self, code):
if self.debug:
print >>sys.stderr, 'decode: %r, %r' % (self, code)
logging.debug('decode: %r, %r' % (self, code))
d = self.code2cid
for c in code:
c = ord(c)
@ -136,7 +137,7 @@ class UnicodeMap(CMapBase):
def get_unichr(self, cid):
if self.debug:
print >>sys.stderr, 'get_unichr: %r, %r' % (self, cid)
logging.debug('get_unichr: %r, %r' % (self, cid))
return self.cid2unichr[cid]
def dump(self, out=sys.stdout):
@ -225,7 +226,7 @@ class CMapDB(object):
def _load_data(klass, name):
filename = '%s.pickle.gz' % name
if klass.debug:
print >>sys.stderr, 'loading:', name
logging.info('loading: %r' % name)
cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
os.path.join(os.path.dirname(__file__), 'cmap'),)
for directory in cmap_paths:

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python
import sys
import logging
from pdfdevice import PDFTextDevice
from pdffont import PDFUnicodeNotDefined
from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTCurve
@ -104,7 +105,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
def handle_undefined_char(self, font, cid):
if self.debug:
print >>sys.stderr, 'undefined: %r, %r' % (font, cid)
logging.info('undefined: %r, %r' % (font, cid))
return '(cid:%d)' % cid
def receive_layout(self, ltpage):

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python
import sys
import logging
try:
from cStringIO import StringIO
except ImportError:
@ -94,7 +95,7 @@ class LZWDecoder(object):
break
yield x
if self.debug:
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
logging.debug('nbits=%d, code=%d, output=%r, table=%r' %
(self.nbits, code, x, self.table[258:]))
return

View File

@ -2,6 +2,7 @@
import sys
import re
import struct
import logging
try:
import hashlib as md5
except ImportError:
@ -116,7 +117,7 @@ class PDFXRef(PDFBaseXRef):
continue
self.offsets[objid] = (None, long(pos), int(genno))
if 1 <= debug:
print >>sys.stderr, 'xref objects:', self.offsets
logging.info('xref objects: %r' % self.offsets)
self.load_trailer(parser)
return
@ -168,7 +169,7 @@ class PDFXRefFallback(PDFXRef):
parser.seek(pos)
self.load_trailer(parser)
if 1 <= debug:
print >>sys.stderr, 'trailer: %r' % self.get_trailer()
logging.info('trailer: %r' % self.get_trailer())
break
m = self.PDFOBJ_CUE.match(line)
if not m:
@ -234,7 +235,7 @@ class PDFXRefStream(PDFBaseXRef):
self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.attrs
if 1 <= debug:
print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
logging.info('xref stream: objid=%s, fields=%d,%d,%d' %
(', '.join(map(repr, self.ranges)),
self.fl1, self.fl2, self.fl3))
return
@ -635,7 +636,7 @@ class PDFDocument(object):
if not self.xrefs:
raise PDFException('PDFDocument is not initialized')
if 2 <= self.debug:
print >>sys.stderr, 'getobj: objid=%r' % (objid)
logging.debug('getobj: objid=%r' % objid)
if objid in self._cached_objs:
(obj, genno) = self._cached_objs[objid]
else:
@ -661,7 +662,7 @@ class PDFDocument(object):
else:
raise PDFObjectNotFound(objid)
if 2 <= self.debug:
print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj)
logging.debug('register: objid=%r: %r' % (objid, obj))
if self.caching:
self._cached_objs[objid] = (obj, genno)
return obj
@ -735,7 +736,7 @@ class PDFDocument(object):
for line in parser.revreadlines():
line = line.strip()
if 2 <= self.debug:
print >>sys.stderr, 'find_xref: %r' % line
logging.debug('find_xref: %r' % line)
if line == 'startxref':
break
if line:
@ -743,7 +744,7 @@ class PDFDocument(object):
else:
raise PDFNoValidXRef('Unexpected EOF')
if 1 <= self.debug:
print >>sys.stderr, 'xref found: pos=%r' % prev
logging.info('xref found: pos=%r' % prev)
return long(prev)
# read xref table
@ -755,8 +756,8 @@ class PDFDocument(object):
(pos, token) = parser.nexttoken()
except PSEOF:
raise PDFNoValidXRef('Unexpected EOF')
if 2 <= self.debug:
print >>sys.stderr, 'read_xref_from: start=%d, token=%r' % (start, token)
if 1 <= self.debug:
logging.info('read_xref_from: start=%d, token=%r' % (start, token))
if isinstance(token, int):
# XRefStream: PDF-1.5
parser.seek(pos)
@ -771,7 +772,7 @@ class PDFDocument(object):
xrefs.append(xref)
trailer = xref.get_trailer()
if 1 <= self.debug:
print >>sys.stderr, 'trailer: %r' % trailer
logging.info('trailer: %r' % trailer)
if 'XRefStm' in trailer:
pos = int_value(trailer['XRefStm'])
self.read_xref_from(parser, pos, xrefs)

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
import sys
import re
import logging
try:
from cStringIO import StringIO
except ImportError:
@ -160,8 +161,8 @@ class PDFResourceManager(object):
if objid and objid in self._cached_fonts:
font = self._cached_fonts[objid]
else:
if 2 <= self.debug:
print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec)
if 1 <= self.debug:
logging.info('get_font: create: objid=%r, spec=%r' % (objid, spec))
if STRICT:
if spec['Type'] is not LITERAL_FONT:
raise PDFFontError('Type is not /Font')
@ -337,7 +338,7 @@ class PDFPageInterpreter(object):
return PREDEFINED_COLORSPACE.get(name)
for (k, v) in dict_value(resources).iteritems():
if 2 <= self.debug:
print >>sys.stderr, 'Resource: %r: %r' % (k, v)
logging.debug('Resource: %r: %r' % (k, v))
if k == 'Font':
for (fontid, spec) in dict_value(v).iteritems():
objid = None
@ -794,7 +795,7 @@ class PDFPageInterpreter(object):
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
return
if 1 <= self.debug:
print >>sys.stderr, 'Processing xobj: %r' % xobj
logging.info('Processing xobj: %r' % xobj)
subtype = xobj.get('Subtype')
if subtype is LITERAL_FORM and 'BBox' in xobj:
interpreter = self.dup()
@ -818,7 +819,7 @@ class PDFPageInterpreter(object):
def process_page(self, page):
if 1 <= self.debug:
print >>sys.stderr, 'Processing page: %r' % page
logging.info('Processing page: %r' % page)
(x0, y0, x1, y1) = page.mediabox
if page.rotate == 90:
ctm = (0, -1, 1, 0, -y0, x1)
@ -838,7 +839,7 @@ class PDFPageInterpreter(object):
# This method may be called recursively.
def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
if 1 <= self.debug:
print >>sys.stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' %
logging.info('render_contents: resources=%r, streams=%r, ctm=%r' %
(resources, streams, ctm))
self.init_resources(resources)
self.init_state(ctm)
@ -865,12 +866,12 @@ class PDFPageInterpreter(object):
if nargs:
args = self.pop(nargs)
if 2 <= self.debug:
print >>sys.stderr, 'exec: %s %r' % (name, args)
logging.debug('exec: %s %r' % (name, args))
if len(args) == nargs:
func(*args)
else:
if 2 <= self.debug:
print >>sys.stderr, 'exec: %s' % (name)
logging.debug('exec: %s' % name)
func()
else:
if STRICT:

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python
import sys
import logging
from psparser import LIT
from pdftypes import PDFObjectNotFound
from pdftypes import resolve1
@ -86,13 +87,13 @@ class PDFPage(object):
tree[k] = v
if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
if 1 <= debug:
print >>sys.stderr, 'Pages: Kids=%r' % tree['Kids']
logging.info('Pages: Kids=%r' % tree['Kids'])
for c in list_value(tree['Kids']):
for x in search(c, tree):
yield x
elif tree.get('Type') is LITERAL_PAGE:
if 1 <= debug:
print >>sys.stderr, 'Page: %r' % tree
logging.info('Page: %r' % tree)
yield (objid, tree)
pages = False
if 'Pages' in document.catalog:

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python
import sys
import logging
try:
from cStringIO import StringIO
except ImportError:
@ -121,8 +122,8 @@ class PDFParser(PSStackParser):
self.seek(pos+objlen)
# XXX limit objlen not to exceed object boundary
if 2 <= self.debug:
print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
(pos, objlen, dic, data[:10])
logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
(pos, objlen, dic, data[:10]))
obj = PDFStream(dic, data, self.doc.decipher)
self.push((pos, obj))

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
import sys
import re
import logging
from utils import choplist
STRICT = 0
@ -184,7 +185,7 @@ class PSBaseParser(object):
if not pos:
pos = self.bufpos+self.charpos
self.fp.seek(pos)
print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
logging.info('poll(%d): %r' % (pos, self.fp.read(n)))
self.fp.seek(pos0)
return
@ -192,7 +193,7 @@ class PSBaseParser(object):
"""Seeks the parser to the given position.
"""
if 2 <= self.debug:
print >>sys.stderr, 'seek: %r' % pos
logging.debug('seek: %r' % pos)
self.fp.seek(pos)
# reset the status for nextline()
self.bufpos = pos
@ -243,7 +244,7 @@ class PSBaseParser(object):
linebuf += self.buf[self.charpos:]
self.charpos = len(self.buf)
if 2 <= self.debug:
print >>sys.stderr, 'nextline: %r' % ((linepos, linebuf),)
logging.debug('nextline: %r, %r' % (linepos, linebuf))
return (linepos, linebuf)
def revreadlines(self):
@ -483,7 +484,7 @@ class PSBaseParser(object):
self.charpos = self._parse1(self.buf, self.charpos)
token = self._tokens.pop(0)
if 2 <= self.debug:
print >>sys.stderr, 'nexttoken: %r' % (token,)
logging.debug('nexttoken: %r' % token)
return token
@ -524,7 +525,7 @@ class PSStackParser(PSBaseParser):
def add_results(self, *objs):
if 2 <= self.debug:
print >>sys.stderr, 'add_results: %r' % (objs,)
logging.debug('add_results: %r' % objs)
self.results.extend(objs)
return
@ -532,7 +533,7 @@ class PSStackParser(PSBaseParser):
self.context.append((pos, self.curtype, self.curstack))
(self.curtype, self.curstack) = (type, [])
if 2 <= self.debug:
print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type)
logging.debug('start_type: pos=%r, type=%r' % (pos, type))
return
def end_type(self, type):
@ -541,7 +542,7 @@ class PSStackParser(PSBaseParser):
objs = [obj for (_, obj) in self.curstack]
(pos, self.curtype, self.curstack) = self.context.pop()
if 2 <= self.debug:
print >>sys.stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)
logging.debug('end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs))
return (pos, objs)
def do_keyword(self, pos, token):
@ -596,8 +597,8 @@ class PSStackParser(PSBaseParser):
raise
else:
if 2 <= self.debug:
print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
(pos, token, self.curstack)
logging.debug('do_keyword: pos=%r, token=%r, stack=%r' % \
(pos, token, self.curstack))
self.do_keyword(pos, token)
if self.context:
continue
@ -605,7 +606,7 @@ class PSStackParser(PSBaseParser):
self.flush()
obj = self.results.pop(0)
if 2 <= self.debug:
print >>sys.stderr, 'nextobject: %r' % (obj,)
logging.debug('nextobject: %r' % obj)
return obj