Python 3.4 support

2014-09-02 15:49:46 +02:00 · 2014-09-02 15:49:46 +02:00 · 846cd18186
parent faea7291a8
commit 846cd18186
6 changed files with 36 additions and 32 deletions
--- a/pdfminer/init.py
+++ b/pdfminer/init.py
@ -1,5 +1,5 @@
 #!/usr/bin/env python
-__version__ = '20140328'
+__version__ = '20140829'
 if __name__ == '__main__':
    print (__version__)
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@ -90,8 +90,7 @@ class CMap(CMapBase):
        return
    def decode(self, code):
-        if self.debug:
+        logging.debug('decode: %r, %r' % (self, code))
            logging.debug('decode: %r, %r' % (self, code))
        d = self.code2cid
        for c in code:
            c = ord(c)
@ -142,8 +141,7 @@ class UnicodeMap(CMapBase):
        return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
    def get_unichr(self, cid):
-        if self.debug:
+        logging.debug('get_unichr: %r, %r' % (self, cid))
            logging.debug('get_unichr: %r, %r' % (self, cid))
        return self.cid2unichr[cid]
    def dump(self, out=sys.stdout):
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@ -93,7 +93,7 @@ class PDFXRef(PDFBaseXRef):
        return '<PDFXRef: offsets=%r>' % (self.offsets.keys())
    def load(self, parser):
-        while 1:
+        while True:
            try:
                (pos, line) = parser.nextline()
                if not line.strip():
@ -134,7 +134,7 @@ class PDFXRef(PDFBaseXRef):
    def load_trailer(self, parser):
        try:
            (_, kwd) = parser.nexttoken()
-            assert kwd.name == 'trailer'
+            assert kwd is KWD(b'trailer')
            (_, dic) = parser.nextobject()
        except PSEOF:
            x = parser.pop(1)
@ -142,6 +142,7 @@ class PDFXRef(PDFBaseXRef):
                raise PDFNoValidXRef('Unexpected EOF - file corrupted')
            (_, dic) = x[0]
        self.trailer.update(dict_value(dic))
        logging.debug('trailer=%r'%self.trailer)
        return
    def get_trailer(self):
@ -535,8 +536,6 @@ class PDFDocument(object):
        if SHA256 is not None:
            security_handler_registry[5] = PDFStandardSecurityHandlerV5
    debug = 0
    def __init__(self, parser, password=b'', caching=True, fallback=True):
        "Set the document to use a given PDFParser object."
        self.caching = caching
@ -557,7 +556,7 @@ class PDFDocument(object):
            pos = self.find_xref(parser)
            self.read_xref_from(parser, pos, self.xrefs)
        except PDFNoValidXRef:
-            fallback = True
+            pass # fallback = True
        if fallback:
            parser.fallback = True
            xref = PDFXRefFallback()
@ -646,7 +645,7 @@ class PDFDocument(object):
            raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid))
        (_, genno) = self._parser.nexttoken()  # genno
        (_, kwd) = self._parser.nexttoken()
-        if kwd.name !='obj':
+        if kwd != KWD(b'obj'):
            raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
        (_, obj) = self._parser.nextobject()
        return obj
@ -656,8 +655,7 @@ class PDFDocument(object):
        assert objid != 0
        if not self.xrefs:
            raise PDFException('PDFDocument is not initialized')
-        if self.debug:
+        logging.debug('getobj: objid=%r' % objid)
            logging.debug('getobj: objid=%r' % objid)
        if objid in self._cached_objs:
            (obj, genno) = self._cached_objs[objid]
        else:
@ -682,8 +680,7 @@ class PDFDocument(object):
                    continue
            else:
                raise PDFObjectNotFound(objid)
-            if self.debug:
+            logging.debug('register: objid=%r: %r' % (objid, obj))
                logging.debug('register: objid=%r: %r' % (objid, obj))
            if self.caching:
                self._cached_objs[objid] = (obj, genno)
        return obj
@ -756,8 +753,7 @@ class PDFDocument(object):
        prev = None
        for line in parser.revreadlines():
            line = line.strip()
-            if self.debug:
+            logging.debug('find_xref: %r' % line)
                logging.debug('find_xref: %r' % line)
            if line == b'startxref':
                break
            if line:
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -302,8 +302,6 @@ class PDFContentParser(PSStackParser):
 ##
 class PDFPageInterpreter(object):
    debug = 0
    def __init__(self, rsrcmgr, device):
        self.rsrcmgr = rsrcmgr
        self.device = device
@ -334,8 +332,7 @@ class PDFPageInterpreter(object):
            else:
                return PREDEFINED_COLORSPACE.get(name)
        for (k, v) in six.iteritems(dict_value(resources)):
-            if self.debug:
+            logging.debug('Resource: %r: %r' % (k, v))
                logging.debug('Resource: %r: %r' % (k, v))
            if k == 'Font':
                for (fontid, spec) in dict_value(v).iteritems():
                    objid = None
@ -856,16 +853,14 @@ class PDFPageInterpreter(object):
                method = 'do_%s' % name.replace('*', '_a').replace('"', '_w').replace("'", '_q')
                if hasattr(self, method):
                    func = getattr(self, method)
-                    nargs = func.func_code.co_argcount-1
+                    nargs = six.get_function_code(func).co_argcount-1
                    if nargs:
                        args = self.pop(nargs)
-                        if self.debug:
+                        logging.debug('exec: %s %r' % (name, args))
                            logging.debug('exec: %s %r' % (name, args))
                        if len(args) == nargs:
                            func(*args)
                    else:
-                        if self.debug:
+                        logging.debug('exec: %s' % name)
                            logging.debug('exec: %s' % name)
                        func()
                else:
                    if STRICT:
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@ -160,6 +160,8 @@ def dict_value(x):
    x = resolve1(x)
    if not isinstance(x, dict):
        if STRICT:
            import logging
            logging.error('PDFTypeError : Dict required: %r' % x)
            raise PDFTypeError('Dict required: %r' % x)
        return {}
    return x
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@ -22,7 +22,7 @@ def bytes(s,i,j=None):
 from .utils import choplist
-STRICT = 0
+STRICT = True
 ##  PS Exceptions
 ##
@ -143,17 +143,30 @@ def literal_name(x):
        if STRICT:
            raise PSTypeError('Literal required: %r' % x)
        else:
-            return str(x)
+            name=x
-    return x.name
+    else:
-
+        name=x.name
        if six.PY3:
            try:
                name = str(name,'utf-8')
            except:
                pass
    return name
 def keyword_name(x):
    if not isinstance(x, PSKeyword):
        if STRICT:
            raise PSTypeError('Keyword required: %r' % x)
        else:
-            return str(x)
+            name=x
-    return x.name
+    else:
        name=x.name
        if six.PY3:
            try:
                name = str(name,'utf-8')
            except:
                pass
    return name
 ##  PSBaseParser