diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 64994ab..9f39997 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -17,6 +17,7 @@ import os.path import gzip import cPickle as pickle import struct +import logging from psparser import PSStackParser from psparser import PSSyntaxError, PSEOF from psparser import PSLiteral @@ -84,7 +85,7 @@ class CMap(CMapBase): def decode(self, code): if self.debug: - print >>sys.stderr, 'decode: %r, %r' % (self, code) + logging.debug('decode: %r, %r' % (self, code)) d = self.code2cid for c in code: c = ord(c) @@ -136,7 +137,7 @@ class UnicodeMap(CMapBase): def get_unichr(self, cid): if self.debug: - print >>sys.stderr, 'get_unichr: %r, %r' % (self, cid) + logging.debug('get_unichr: %r, %r' % (self, cid)) return self.cid2unichr[cid] def dump(self, out=sys.stdout): @@ -225,7 +226,7 @@ class CMapDB(object): def _load_data(klass, name): filename = '%s.pickle.gz' % name if klass.debug: - print >>sys.stderr, 'loading:', name + logging.info('loading: %r' % name) cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), os.path.join(os.path.dirname(__file__), 'cmap'),) for directory in cmap_paths: diff --git a/pdfminer/converter.py b/pdfminer/converter.py index e0d487c..aacd4f0 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import sys +import logging from pdfdevice import PDFTextDevice from pdffont import PDFUnicodeNotDefined from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTCurve @@ -104,7 +105,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): def handle_undefined_char(self, font, cid): if self.debug: - print >>sys.stderr, 'undefined: %r, %r' % (font, cid) + logging.info('undefined: %r, %r' % (font, cid)) return '(cid:%d)' % cid def receive_layout(self, ltpage): diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index 29e1795..4c96c4e 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import sys +import logging try: from cStringIO import StringIO except ImportError: @@ -94,8 +95,8 @@ class LZWDecoder(object): break yield x if self.debug: - print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' % - (self.nbits, code, x, self.table[258:])) + logging.debug('nbits=%d, code=%d, output=%r, table=%r' % + (self.nbits, code, x, self.table[258:])) return diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 73c4002..2f8bb72 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -2,6 +2,7 @@ import sys import re import struct +import logging try: import hashlib as md5 except ImportError: @@ -116,7 +117,7 @@ class PDFXRef(PDFBaseXRef): continue self.offsets[objid] = (None, long(pos), int(genno)) if 1 <= debug: - print >>sys.stderr, 'xref objects:', self.offsets + logging.info('xref objects: %r' % self.offsets) self.load_trailer(parser) return @@ -168,7 +169,7 @@ class PDFXRefFallback(PDFXRef): parser.seek(pos) self.load_trailer(parser) if 1 <= debug: - print >>sys.stderr, 'trailer: %r' % self.get_trailer() + logging.info('trailer: %r' % self.get_trailer()) break m = self.PDFOBJ_CUE.match(line) if not m: @@ -234,9 +235,9 @@ class PDFXRefStream(PDFBaseXRef): self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs if 1 <= debug: - print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' % - (', '.join(map(repr, self.ranges)), - self.fl1, self.fl2, self.fl3)) + logging.info('xref stream: objid=%s, fields=%d,%d,%d' % + (', '.join(map(repr, self.ranges)), + self.fl1, self.fl2, self.fl3)) return def get_trailer(self): @@ -635,7 +636,7 @@ class PDFDocument(object): if not self.xrefs: raise PDFException('PDFDocument is not initialized') if 2 <= self.debug: - print >>sys.stderr, 'getobj: objid=%r' % (objid) + logging.debug('getobj: objid=%r' % objid) if objid in self._cached_objs: (obj, genno) = self._cached_objs[objid] else: @@ -661,7 +662,7 @@ class PDFDocument(object): else: raise PDFObjectNotFound(objid) if 2 <= self.debug: - print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj) + logging.debug('register: objid=%r: %r' % (objid, obj)) if self.caching: self._cached_objs[objid] = (obj, genno) return obj @@ -735,7 +736,7 @@ class PDFDocument(object): for line in parser.revreadlines(): line = line.strip() if 2 <= self.debug: - print >>sys.stderr, 'find_xref: %r' % line + logging.debug('find_xref: %r' % line) if line == 'startxref': break if line: @@ -743,7 +744,7 @@ class PDFDocument(object): else: raise PDFNoValidXRef('Unexpected EOF') if 1 <= self.debug: - print >>sys.stderr, 'xref found: pos=%r' % prev + logging.info('xref found: pos=%r' % prev) return long(prev) # read xref table @@ -755,8 +756,8 @@ class PDFDocument(object): (pos, token) = parser.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') - if 2 <= self.debug: - print >>sys.stderr, 'read_xref_from: start=%d, token=%r' % (start, token) + if 1 <= self.debug: + logging.info('read_xref_from: start=%d, token=%r' % (start, token)) if isinstance(token, int): # XRefStream: PDF-1.5 parser.seek(pos) @@ -771,7 +772,7 @@ class PDFDocument(object): xrefs.append(xref) trailer = xref.get_trailer() if 1 <= self.debug: - print >>sys.stderr, 'trailer: %r' % trailer + logging.info('trailer: %r' % trailer) if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(parser, pos, xrefs) diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 4b7a6b0..d31c0c8 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import sys import re +import logging try: from cStringIO import StringIO except ImportError: @@ -160,8 +161,8 @@ class PDFResourceManager(object): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: - if 2 <= self.debug: - print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec) + if 1 <= self.debug: + logging.info('get_font: create: objid=%r, spec=%r' % (objid, spec)) if STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') @@ -337,7 +338,7 @@ class PDFPageInterpreter(object): return PREDEFINED_COLORSPACE.get(name) for (k, v) in dict_value(resources).iteritems(): if 2 <= self.debug: - print >>sys.stderr, 'Resource: %r: %r' % (k, v) + logging.debug('Resource: %r: %r' % (k, v)) if k == 'Font': for (fontid, spec) in dict_value(v).iteritems(): objid = None @@ -794,7 +795,7 @@ class PDFPageInterpreter(object): raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return if 1 <= self.debug: - print >>sys.stderr, 'Processing xobj: %r' % xobj + logging.info('Processing xobj: %r' % xobj) subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() @@ -818,7 +819,7 @@ class PDFPageInterpreter(object): def process_page(self, page): if 1 <= self.debug: - print >>sys.stderr, 'Processing page: %r' % page + logging.info('Processing page: %r' % page) (x0, y0, x1, y1) = page.mediabox if page.rotate == 90: ctm = (0, -1, 1, 0, -y0, x1) @@ -838,8 +839,8 @@ class PDFPageInterpreter(object): # This method may be called recursively. def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): if 1 <= self.debug: - print >>sys.stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' % - (resources, streams, ctm)) + logging.info('render_contents: resources=%r, streams=%r, ctm=%r' % + (resources, streams, ctm)) self.init_resources(resources) self.init_state(ctm) self.execute(list_value(streams)) @@ -865,12 +866,12 @@ class PDFPageInterpreter(object): if nargs: args = self.pop(nargs) if 2 <= self.debug: - print >>sys.stderr, 'exec: %s %r' % (name, args) + logging.debug('exec: %s %r' % (name, args)) if len(args) == nargs: func(*args) else: if 2 <= self.debug: - print >>sys.stderr, 'exec: %s' % (name) + logging.debug('exec: %s' % name) func() else: if STRICT: diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 154bbc3..65251b7 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import sys +import logging from psparser import LIT from pdftypes import PDFObjectNotFound from pdftypes import resolve1 @@ -86,13 +87,13 @@ class PDFPage(object): tree[k] = v if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: if 1 <= debug: - print >>sys.stderr, 'Pages: Kids=%r' % tree['Kids'] + logging.info('Pages: Kids=%r' % tree['Kids']) for c in list_value(tree['Kids']): for x in search(c, tree): yield x elif tree.get('Type') is LITERAL_PAGE: if 1 <= debug: - print >>sys.stderr, 'Page: %r' % tree + logging.info('Page: %r' % tree) yield (objid, tree) pages = False if 'Pages' in document.catalog: diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 32d1220..7b40898 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import sys +import logging try: from cStringIO import StringIO except ImportError: @@ -121,8 +122,8 @@ class PDFParser(PSStackParser): self.seek(pos+objlen) # XXX limit objlen not to exceed object boundary if 2 <= self.debug: - print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ - (pos, objlen, dic, data[:10]) + logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ + (pos, objlen, dic, data[:10])) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 2858553..0d05319 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import sys import re +import logging from utils import choplist STRICT = 0 @@ -184,7 +185,7 @@ class PSBaseParser(object): if not pos: pos = self.bufpos+self.charpos self.fp.seek(pos) - print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) + logging.info('poll(%d): %r' % (pos, self.fp.read(n))) self.fp.seek(pos0) return @@ -192,7 +193,7 @@ class PSBaseParser(object): """Seeks the parser to the given position. """ if 2 <= self.debug: - print >>sys.stderr, 'seek: %r' % pos + logging.debug('seek: %r' % pos) self.fp.seek(pos) # reset the status for nextline() self.bufpos = pos @@ -243,7 +244,7 @@ class PSBaseParser(object): linebuf += self.buf[self.charpos:] self.charpos = len(self.buf) if 2 <= self.debug: - print >>sys.stderr, 'nextline: %r' % ((linepos, linebuf),) + logging.debug('nextline: %r, %r' % (linepos, linebuf)) return (linepos, linebuf) def revreadlines(self): @@ -483,7 +484,7 @@ class PSBaseParser(object): self.charpos = self._parse1(self.buf, self.charpos) token = self._tokens.pop(0) if 2 <= self.debug: - print >>sys.stderr, 'nexttoken: %r' % (token,) + logging.debug('nexttoken: %r' % token) return token @@ -524,7 +525,7 @@ class PSStackParser(PSBaseParser): def add_results(self, *objs): if 2 <= self.debug: - print >>sys.stderr, 'add_results: %r' % (objs,) + logging.debug('add_results: %r' % objs) self.results.extend(objs) return @@ -532,7 +533,7 @@ class PSStackParser(PSBaseParser): self.context.append((pos, self.curtype, self.curstack)) (self.curtype, self.curstack) = (type, []) if 2 <= self.debug: - print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type) + logging.debug('start_type: pos=%r, type=%r' % (pos, type)) return def end_type(self, type): @@ -541,7 +542,7 @@ class PSStackParser(PSBaseParser): objs = [obj for (_, obj) in self.curstack] (pos, self.curtype, self.curstack) = self.context.pop() if 2 <= self.debug: - print >>sys.stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs) + logging.debug('end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)) return (pos, objs) def do_keyword(self, pos, token): @@ -596,8 +597,8 @@ class PSStackParser(PSBaseParser): raise else: if 2 <= self.debug: - print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \ - (pos, token, self.curstack) + logging.debug('do_keyword: pos=%r, token=%r, stack=%r' % \ + (pos, token, self.curstack)) self.do_keyword(pos, token) if self.context: continue @@ -605,7 +606,7 @@ class PSStackParser(PSBaseParser): self.flush() obj = self.results.pop(0) if 2 <= self.debug: - print >>sys.stderr, 'nextobject: %r' % (obj,) + logging.debug('nextobject: %r' % obj) return obj