From 846cd1818654634ae4a3e787eaad9b06c7c569c2 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 2 Sep 2014 15:49:46 +0200 Subject: [PATCH] Python 3.4 support --- pdfminer/__init__.py | 2 +- pdfminer/cmapdb.py | 6 ++---- pdfminer/pdfdocument.py | 20 ++++++++------------ pdfminer/pdfinterp.py | 13 ++++--------- pdfminer/pdftypes.py | 2 ++ pdfminer/psparser.py | 25 +++++++++++++++++++------ 6 files changed, 36 insertions(+), 32 deletions(-) diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py index a7bc049..be82f84 100644 --- a/pdfminer/__init__.py +++ b/pdfminer/__init__.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -__version__ = '20140328' +__version__ = '20140829' if __name__ == '__main__': print (__version__) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index dbe9871..69d9de2 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -90,8 +90,7 @@ class CMap(CMapBase): return def decode(self, code): - if self.debug: - logging.debug('decode: %r, %r' % (self, code)) + logging.debug('decode: %r, %r' % (self, code)) d = self.code2cid for c in code: c = ord(c) @@ -142,8 +141,7 @@ class UnicodeMap(CMapBase): return '' % self.attrs.get('CMapName') def get_unichr(self, cid): - if self.debug: - logging.debug('get_unichr: %r, %r' % (self, cid)) + logging.debug('get_unichr: %r, %r' % (self, cid)) return self.cid2unichr[cid] def dump(self, out=sys.stdout): diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 7bc3074..a9fb717 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -93,7 +93,7 @@ class PDFXRef(PDFBaseXRef): return '' % (self.offsets.keys()) def load(self, parser): - while 1: + while True: try: (pos, line) = parser.nextline() if not line.strip(): @@ -134,7 +134,7 @@ class PDFXRef(PDFBaseXRef): def load_trailer(self, parser): try: (_, kwd) = parser.nexttoken() - assert kwd.name == 'trailer' + assert kwd is KWD(b'trailer') (_, dic) = parser.nextobject() except PSEOF: x = parser.pop(1) @@ -142,6 +142,7 @@ class PDFXRef(PDFBaseXRef): raise PDFNoValidXRef('Unexpected EOF - file corrupted') (_, dic) = x[0] self.trailer.update(dict_value(dic)) + logging.debug('trailer=%r'%self.trailer) return def get_trailer(self): @@ -535,8 +536,6 @@ class PDFDocument(object): if SHA256 is not None: security_handler_registry[5] = PDFStandardSecurityHandlerV5 - debug = 0 - def __init__(self, parser, password=b'', caching=True, fallback=True): "Set the document to use a given PDFParser object." self.caching = caching @@ -557,7 +556,7 @@ class PDFDocument(object): pos = self.find_xref(parser) self.read_xref_from(parser, pos, self.xrefs) except PDFNoValidXRef: - fallback = True + pass # fallback = True if fallback: parser.fallback = True xref = PDFXRefFallback() @@ -646,7 +645,7 @@ class PDFDocument(object): raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid)) (_, genno) = self._parser.nexttoken() # genno (_, kwd) = self._parser.nexttoken() - if kwd.name !='obj': + if kwd != KWD(b'obj'): raise PDFSyntaxError('Invalid object spec: offset=%r' % pos) (_, obj) = self._parser.nextobject() return obj @@ -656,8 +655,7 @@ class PDFDocument(object): assert objid != 0 if not self.xrefs: raise PDFException('PDFDocument is not initialized') - if self.debug: - logging.debug('getobj: objid=%r' % objid) + logging.debug('getobj: objid=%r' % objid) if objid in self._cached_objs: (obj, genno) = self._cached_objs[objid] else: @@ -682,8 +680,7 @@ class PDFDocument(object): continue else: raise PDFObjectNotFound(objid) - if self.debug: - logging.debug('register: objid=%r: %r' % (objid, obj)) + logging.debug('register: objid=%r: %r' % (objid, obj)) if self.caching: self._cached_objs[objid] = (obj, genno) return obj @@ -756,8 +753,7 @@ class PDFDocument(object): prev = None for line in parser.revreadlines(): line = line.strip() - if self.debug: - logging.debug('find_xref: %r' % line) + logging.debug('find_xref: %r' % line) if line == b'startxref': break if line: diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 7220049..1469a6b 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -302,8 +302,6 @@ class PDFContentParser(PSStackParser): ## class PDFPageInterpreter(object): - debug = 0 - def __init__(self, rsrcmgr, device): self.rsrcmgr = rsrcmgr self.device = device @@ -334,8 +332,7 @@ class PDFPageInterpreter(object): else: return PREDEFINED_COLORSPACE.get(name) for (k, v) in six.iteritems(dict_value(resources)): - if self.debug: - logging.debug('Resource: %r: %r' % (k, v)) + logging.debug('Resource: %r: %r' % (k, v)) if k == 'Font': for (fontid, spec) in dict_value(v).iteritems(): objid = None @@ -856,16 +853,14 @@ class PDFPageInterpreter(object): method = 'do_%s' % name.replace('*', '_a').replace('"', '_w').replace("'", '_q') if hasattr(self, method): func = getattr(self, method) - nargs = func.func_code.co_argcount-1 + nargs = six.get_function_code(func).co_argcount-1 if nargs: args = self.pop(nargs) - if self.debug: - logging.debug('exec: %s %r' % (name, args)) + logging.debug('exec: %s %r' % (name, args)) if len(args) == nargs: func(*args) else: - if self.debug: - logging.debug('exec: %s' % name) + logging.debug('exec: %s' % name) func() else: if STRICT: diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 10c0777..0bb942c 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -160,6 +160,8 @@ def dict_value(x): x = resolve1(x) if not isinstance(x, dict): if STRICT: + import logging + logging.error('PDFTypeError : Dict required: %r' % x) raise PDFTypeError('Dict required: %r' % x) return {} return x diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index b37359e..880d4aa 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -22,7 +22,7 @@ def bytes(s,i,j=None): from .utils import choplist -STRICT = 0 +STRICT = True ## PS Exceptions ## @@ -143,17 +143,30 @@ def literal_name(x): if STRICT: raise PSTypeError('Literal required: %r' % x) else: - return str(x) - return x.name - + name=x + else: + name=x.name + if six.PY3: + try: + name = str(name,'utf-8') + except: + pass + return name def keyword_name(x): if not isinstance(x, PSKeyword): if STRICT: raise PSTypeError('Keyword required: %r' % x) else: - return str(x) - return x.name + name=x + else: + name=x.name + if six.PY3: + try: + name = str(name,'utf-8') + except: + pass + return name ## PSBaseParser