Use logging module instead of print.

pull/1/head
Yusuke Shinyama 2014-06-14 12:00:49 +09:00
parent fb3f2d9629
commit 8e14ebf4e1
8 changed files with 49 additions and 41 deletions

View File

@ -17,6 +17,7 @@ import os.path
import gzip import gzip
import cPickle as pickle import cPickle as pickle
import struct import struct
import logging
from psparser import PSStackParser from psparser import PSStackParser
from psparser import PSSyntaxError, PSEOF from psparser import PSSyntaxError, PSEOF
from psparser import PSLiteral from psparser import PSLiteral
@ -84,7 +85,7 @@ class CMap(CMapBase):
def decode(self, code): def decode(self, code):
if self.debug: if self.debug:
print >>sys.stderr, 'decode: %r, %r' % (self, code) logging.debug('decode: %r, %r' % (self, code))
d = self.code2cid d = self.code2cid
for c in code: for c in code:
c = ord(c) c = ord(c)
@ -136,7 +137,7 @@ class UnicodeMap(CMapBase):
def get_unichr(self, cid): def get_unichr(self, cid):
if self.debug: if self.debug:
print >>sys.stderr, 'get_unichr: %r, %r' % (self, cid) logging.debug('get_unichr: %r, %r' % (self, cid))
return self.cid2unichr[cid] return self.cid2unichr[cid]
def dump(self, out=sys.stdout): def dump(self, out=sys.stdout):
@ -225,7 +226,7 @@ class CMapDB(object):
def _load_data(klass, name): def _load_data(klass, name):
filename = '%s.pickle.gz' % name filename = '%s.pickle.gz' % name
if klass.debug: if klass.debug:
print >>sys.stderr, 'loading:', name logging.info('loading: %r' % name)
cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
os.path.join(os.path.dirname(__file__), 'cmap'),) os.path.join(os.path.dirname(__file__), 'cmap'),)
for directory in cmap_paths: for directory in cmap_paths:

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import logging
from pdfdevice import PDFTextDevice from pdfdevice import PDFTextDevice
from pdffont import PDFUnicodeNotDefined from pdffont import PDFUnicodeNotDefined
from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTCurve from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTCurve
@ -104,7 +105,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
def handle_undefined_char(self, font, cid): def handle_undefined_char(self, font, cid):
if self.debug: if self.debug:
print >>sys.stderr, 'undefined: %r, %r' % (font, cid) logging.info('undefined: %r, %r' % (font, cid))
return '(cid:%d)' % cid return '(cid:%d)' % cid
def receive_layout(self, ltpage): def receive_layout(self, ltpage):

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import logging
try: try:
from cStringIO import StringIO from cStringIO import StringIO
except ImportError: except ImportError:
@ -94,8 +95,8 @@ class LZWDecoder(object):
break break
yield x yield x
if self.debug: if self.debug:
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' % logging.debug('nbits=%d, code=%d, output=%r, table=%r' %
(self.nbits, code, x, self.table[258:])) (self.nbits, code, x, self.table[258:]))
return return

View File

@ -2,6 +2,7 @@
import sys import sys
import re import re
import struct import struct
import logging
try: try:
import hashlib as md5 import hashlib as md5
except ImportError: except ImportError:
@ -116,7 +117,7 @@ class PDFXRef(PDFBaseXRef):
continue continue
self.offsets[objid] = (None, long(pos), int(genno)) self.offsets[objid] = (None, long(pos), int(genno))
if 1 <= debug: if 1 <= debug:
print >>sys.stderr, 'xref objects:', self.offsets logging.info('xref objects: %r' % self.offsets)
self.load_trailer(parser) self.load_trailer(parser)
return return
@ -168,7 +169,7 @@ class PDFXRefFallback(PDFXRef):
parser.seek(pos) parser.seek(pos)
self.load_trailer(parser) self.load_trailer(parser)
if 1 <= debug: if 1 <= debug:
print >>sys.stderr, 'trailer: %r' % self.get_trailer() logging.info('trailer: %r' % self.get_trailer())
break break
m = self.PDFOBJ_CUE.match(line) m = self.PDFOBJ_CUE.match(line)
if not m: if not m:
@ -234,9 +235,9 @@ class PDFXRefStream(PDFBaseXRef):
self.entlen = self.fl1+self.fl2+self.fl3 self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.attrs self.trailer = stream.attrs
if 1 <= debug: if 1 <= debug:
print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' % logging.info('xref stream: objid=%s, fields=%d,%d,%d' %
(', '.join(map(repr, self.ranges)), (', '.join(map(repr, self.ranges)),
self.fl1, self.fl2, self.fl3)) self.fl1, self.fl2, self.fl3))
return return
def get_trailer(self): def get_trailer(self):
@ -635,7 +636,7 @@ class PDFDocument(object):
if not self.xrefs: if not self.xrefs:
raise PDFException('PDFDocument is not initialized') raise PDFException('PDFDocument is not initialized')
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'getobj: objid=%r' % (objid) logging.debug('getobj: objid=%r' % objid)
if objid in self._cached_objs: if objid in self._cached_objs:
(obj, genno) = self._cached_objs[objid] (obj, genno) = self._cached_objs[objid]
else: else:
@ -661,7 +662,7 @@ class PDFDocument(object):
else: else:
raise PDFObjectNotFound(objid) raise PDFObjectNotFound(objid)
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj) logging.debug('register: objid=%r: %r' % (objid, obj))
if self.caching: if self.caching:
self._cached_objs[objid] = (obj, genno) self._cached_objs[objid] = (obj, genno)
return obj return obj
@ -735,7 +736,7 @@ class PDFDocument(object):
for line in parser.revreadlines(): for line in parser.revreadlines():
line = line.strip() line = line.strip()
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'find_xref: %r' % line logging.debug('find_xref: %r' % line)
if line == 'startxref': if line == 'startxref':
break break
if line: if line:
@ -743,7 +744,7 @@ class PDFDocument(object):
else: else:
raise PDFNoValidXRef('Unexpected EOF') raise PDFNoValidXRef('Unexpected EOF')
if 1 <= self.debug: if 1 <= self.debug:
print >>sys.stderr, 'xref found: pos=%r' % prev logging.info('xref found: pos=%r' % prev)
return long(prev) return long(prev)
# read xref table # read xref table
@ -755,8 +756,8 @@ class PDFDocument(object):
(pos, token) = parser.nexttoken() (pos, token) = parser.nexttoken()
except PSEOF: except PSEOF:
raise PDFNoValidXRef('Unexpected EOF') raise PDFNoValidXRef('Unexpected EOF')
if 2 <= self.debug: if 1 <= self.debug:
print >>sys.stderr, 'read_xref_from: start=%d, token=%r' % (start, token) logging.info('read_xref_from: start=%d, token=%r' % (start, token))
if isinstance(token, int): if isinstance(token, int):
# XRefStream: PDF-1.5 # XRefStream: PDF-1.5
parser.seek(pos) parser.seek(pos)
@ -771,7 +772,7 @@ class PDFDocument(object):
xrefs.append(xref) xrefs.append(xref)
trailer = xref.get_trailer() trailer = xref.get_trailer()
if 1 <= self.debug: if 1 <= self.debug:
print >>sys.stderr, 'trailer: %r' % trailer logging.info('trailer: %r' % trailer)
if 'XRefStm' in trailer: if 'XRefStm' in trailer:
pos = int_value(trailer['XRefStm']) pos = int_value(trailer['XRefStm'])
self.read_xref_from(parser, pos, xrefs) self.read_xref_from(parser, pos, xrefs)

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import re import re
import logging
try: try:
from cStringIO import StringIO from cStringIO import StringIO
except ImportError: except ImportError:
@ -160,8 +161,8 @@ class PDFResourceManager(object):
if objid and objid in self._cached_fonts: if objid and objid in self._cached_fonts:
font = self._cached_fonts[objid] font = self._cached_fonts[objid]
else: else:
if 2 <= self.debug: if 1 <= self.debug:
print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec) logging.info('get_font: create: objid=%r, spec=%r' % (objid, spec))
if STRICT: if STRICT:
if spec['Type'] is not LITERAL_FONT: if spec['Type'] is not LITERAL_FONT:
raise PDFFontError('Type is not /Font') raise PDFFontError('Type is not /Font')
@ -337,7 +338,7 @@ class PDFPageInterpreter(object):
return PREDEFINED_COLORSPACE.get(name) return PREDEFINED_COLORSPACE.get(name)
for (k, v) in dict_value(resources).iteritems(): for (k, v) in dict_value(resources).iteritems():
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'Resource: %r: %r' % (k, v) logging.debug('Resource: %r: %r' % (k, v))
if k == 'Font': if k == 'Font':
for (fontid, spec) in dict_value(v).iteritems(): for (fontid, spec) in dict_value(v).iteritems():
objid = None objid = None
@ -794,7 +795,7 @@ class PDFPageInterpreter(object):
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
return return
if 1 <= self.debug: if 1 <= self.debug:
print >>sys.stderr, 'Processing xobj: %r' % xobj logging.info('Processing xobj: %r' % xobj)
subtype = xobj.get('Subtype') subtype = xobj.get('Subtype')
if subtype is LITERAL_FORM and 'BBox' in xobj: if subtype is LITERAL_FORM and 'BBox' in xobj:
interpreter = self.dup() interpreter = self.dup()
@ -818,7 +819,7 @@ class PDFPageInterpreter(object):
def process_page(self, page): def process_page(self, page):
if 1 <= self.debug: if 1 <= self.debug:
print >>sys.stderr, 'Processing page: %r' % page logging.info('Processing page: %r' % page)
(x0, y0, x1, y1) = page.mediabox (x0, y0, x1, y1) = page.mediabox
if page.rotate == 90: if page.rotate == 90:
ctm = (0, -1, 1, 0, -y0, x1) ctm = (0, -1, 1, 0, -y0, x1)
@ -838,8 +839,8 @@ class PDFPageInterpreter(object):
# This method may be called recursively. # This method may be called recursively.
def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
if 1 <= self.debug: if 1 <= self.debug:
print >>sys.stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' % logging.info('render_contents: resources=%r, streams=%r, ctm=%r' %
(resources, streams, ctm)) (resources, streams, ctm))
self.init_resources(resources) self.init_resources(resources)
self.init_state(ctm) self.init_state(ctm)
self.execute(list_value(streams)) self.execute(list_value(streams))
@ -865,12 +866,12 @@ class PDFPageInterpreter(object):
if nargs: if nargs:
args = self.pop(nargs) args = self.pop(nargs)
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'exec: %s %r' % (name, args) logging.debug('exec: %s %r' % (name, args))
if len(args) == nargs: if len(args) == nargs:
func(*args) func(*args)
else: else:
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'exec: %s' % (name) logging.debug('exec: %s' % name)
func() func()
else: else:
if STRICT: if STRICT:

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import logging
from psparser import LIT from psparser import LIT
from pdftypes import PDFObjectNotFound from pdftypes import PDFObjectNotFound
from pdftypes import resolve1 from pdftypes import resolve1
@ -86,13 +87,13 @@ class PDFPage(object):
tree[k] = v tree[k] = v
if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
if 1 <= debug: if 1 <= debug:
print >>sys.stderr, 'Pages: Kids=%r' % tree['Kids'] logging.info('Pages: Kids=%r' % tree['Kids'])
for c in list_value(tree['Kids']): for c in list_value(tree['Kids']):
for x in search(c, tree): for x in search(c, tree):
yield x yield x
elif tree.get('Type') is LITERAL_PAGE: elif tree.get('Type') is LITERAL_PAGE:
if 1 <= debug: if 1 <= debug:
print >>sys.stderr, 'Page: %r' % tree logging.info('Page: %r' % tree)
yield (objid, tree) yield (objid, tree)
pages = False pages = False
if 'Pages' in document.catalog: if 'Pages' in document.catalog:

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import logging
try: try:
from cStringIO import StringIO from cStringIO import StringIO
except ImportError: except ImportError:
@ -121,8 +122,8 @@ class PDFParser(PSStackParser):
self.seek(pos+objlen) self.seek(pos+objlen)
# XXX limit objlen not to exceed object boundary # XXX limit objlen not to exceed object boundary
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
(pos, objlen, dic, data[:10]) (pos, objlen, dic, data[:10]))
obj = PDFStream(dic, data, self.doc.decipher) obj = PDFStream(dic, data, self.doc.decipher)
self.push((pos, obj)) self.push((pos, obj))

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import re import re
import logging
from utils import choplist from utils import choplist
STRICT = 0 STRICT = 0
@ -184,7 +185,7 @@ class PSBaseParser(object):
if not pos: if not pos:
pos = self.bufpos+self.charpos pos = self.bufpos+self.charpos
self.fp.seek(pos) self.fp.seek(pos)
print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) logging.info('poll(%d): %r' % (pos, self.fp.read(n)))
self.fp.seek(pos0) self.fp.seek(pos0)
return return
@ -192,7 +193,7 @@ class PSBaseParser(object):
"""Seeks the parser to the given position. """Seeks the parser to the given position.
""" """
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'seek: %r' % pos logging.debug('seek: %r' % pos)
self.fp.seek(pos) self.fp.seek(pos)
# reset the status for nextline() # reset the status for nextline()
self.bufpos = pos self.bufpos = pos
@ -243,7 +244,7 @@ class PSBaseParser(object):
linebuf += self.buf[self.charpos:] linebuf += self.buf[self.charpos:]
self.charpos = len(self.buf) self.charpos = len(self.buf)
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'nextline: %r' % ((linepos, linebuf),) logging.debug('nextline: %r, %r' % (linepos, linebuf))
return (linepos, linebuf) return (linepos, linebuf)
def revreadlines(self): def revreadlines(self):
@ -483,7 +484,7 @@ class PSBaseParser(object):
self.charpos = self._parse1(self.buf, self.charpos) self.charpos = self._parse1(self.buf, self.charpos)
token = self._tokens.pop(0) token = self._tokens.pop(0)
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'nexttoken: %r' % (token,) logging.debug('nexttoken: %r' % token)
return token return token
@ -524,7 +525,7 @@ class PSStackParser(PSBaseParser):
def add_results(self, *objs): def add_results(self, *objs):
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'add_results: %r' % (objs,) logging.debug('add_results: %r' % objs)
self.results.extend(objs) self.results.extend(objs)
return return
@ -532,7 +533,7 @@ class PSStackParser(PSBaseParser):
self.context.append((pos, self.curtype, self.curstack)) self.context.append((pos, self.curtype, self.curstack))
(self.curtype, self.curstack) = (type, []) (self.curtype, self.curstack) = (type, [])
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type) logging.debug('start_type: pos=%r, type=%r' % (pos, type))
return return
def end_type(self, type): def end_type(self, type):
@ -541,7 +542,7 @@ class PSStackParser(PSBaseParser):
objs = [obj for (_, obj) in self.curstack] objs = [obj for (_, obj) in self.curstack]
(pos, self.curtype, self.curstack) = self.context.pop() (pos, self.curtype, self.curstack) = self.context.pop()
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs) logging.debug('end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs))
return (pos, objs) return (pos, objs)
def do_keyword(self, pos, token): def do_keyword(self, pos, token):
@ -596,8 +597,8 @@ class PSStackParser(PSBaseParser):
raise raise
else: else:
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \ logging.debug('do_keyword: pos=%r, token=%r, stack=%r' % \
(pos, token, self.curstack) (pos, token, self.curstack))
self.do_keyword(pos, token) self.do_keyword(pos, token)
if self.context: if self.context:
continue continue
@ -605,7 +606,7 @@ class PSStackParser(PSBaseParser):
self.flush() self.flush()
obj = self.results.pop(0) obj = self.results.pop(0)
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'nextobject: %r' % (obj,) logging.debug('nextobject: %r' % obj)
return obj return obj