Miscellaneous bug fixes (#47)

* utils.decode_text: fix "TypeError: ord() expected string of length 1, but int found" fixes https://github.com/goulu/pdfminer/issues/24 * pdfinterp.execute: don't assume that every keyword name can be decoded as utf-8 fixes "'str' does not support the buffer interface", https://github.com/goulu/pdfminer/issues/23 * default settings.STRICT to False, for compatibility with the original pdfminer * PDFCIDFont: handle font registry/orderings that may be PDFObjRefs * utils.nunpack: handle 8-byte integers
2017-02-06 05:57:01 -08:00 · 2017-02-06 05:57:01 -08:00 · 9439a3a31a
parent fd63dbf62e
commit 9439a3a31a
4 changed files with 8 additions and 9 deletions
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@ -640,8 +640,8 @@ class PDFCIDFont(PDFFont):
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
-        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', b'unknown').decode("latin1"),
-                                    self.cidsysteminfo.get('Ordering', b'unknown').decode("latin1"))
+        self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"),
+                                    resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1"))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@ -160,10 +160,7 @@ def keyword_name(x):
    else:
        name=x.name
        if six.PY3:
-            try:
-                name = str(name,'utf-8')
-            except:
-                pass
+            name = str(name,'utf-8','ignore')
    return name


--- a/pdfminer/settings.py
+++ b/pdfminer/settings.py
@ -1,4 +1,4 @@
-STRICT = True
+STRICT = False

 try:
    from django.conf import settings
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@ -211,7 +211,7 @@ def choplist(n, seq):

 # nunpack
 def nunpack(s, default=0):
-    """Unpacks 1 to 4 byte integers (big endian)."""
+    """Unpacks 1 to 4 or 8 byte integers (big endian)."""
    l = len(s)
    if not l:
        return default
@ -223,6 +223,8 @@ def nunpack(s, default=0):
        return struct.unpack('>L', b'\x00'+s)[0]
    elif l == 4:
        return struct.unpack('>L', s)[0]
+    elif l == 8:
+        return struct.unpack('>Q', s)[0]
    else:
        raise TypeError('invalid length: %d' % l)

@ -269,7 +271,7 @@ def decode_text(s):
    if s.startswith(b'\xfe\xff'):
        return six.text_type(s[2:], 'utf-16be', 'ignore')
    else:
-        return ''.join(PDFDocEncoding[ord(c)] for c in s)
+        return ''.join(PDFDocEncoding[c] for c in s)


 # enc