Miscellaneous bug fixes (#47)
* utils.decode_text: fix "TypeError: ord() expected string of length 1, but int found" fixes https://github.com/goulu/pdfminer/issues/24 * pdfinterp.execute: don't assume that every keyword name can be decoded as utf-8 fixes "'str' does not support the buffer interface", https://github.com/goulu/pdfminer/issues/23 * default settings.STRICT to False, for compatibility with the original pdfminer * PDFCIDFont: handle font registry/orderings that may be PDFObjRefs * utils.nunpack: handle 8-byte integerspull/55/head
parent
fd63dbf62e
commit
9439a3a31a
|
@ -640,8 +640,8 @@ class PDFCIDFont(PDFFont):
|
|||
raise PDFFontError('BaseFont is missing')
|
||||
self.basefont = 'unknown'
|
||||
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
|
||||
self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', b'unknown').decode("latin1"),
|
||||
self.cidsysteminfo.get('Ordering', b'unknown').decode("latin1"))
|
||||
self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"),
|
||||
resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1"))
|
||||
try:
|
||||
name = literal_name(spec['Encoding'])
|
||||
except KeyError:
|
||||
|
|
|
@ -160,10 +160,7 @@ def keyword_name(x):
|
|||
else:
|
||||
name=x.name
|
||||
if six.PY3:
|
||||
try:
|
||||
name = str(name,'utf-8')
|
||||
except:
|
||||
pass
|
||||
name = str(name,'utf-8','ignore')
|
||||
return name
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
STRICT = True
|
||||
STRICT = False
|
||||
|
||||
try:
|
||||
from django.conf import settings
|
||||
|
|
|
@ -211,7 +211,7 @@ def choplist(n, seq):
|
|||
|
||||
# nunpack
|
||||
def nunpack(s, default=0):
|
||||
"""Unpacks 1 to 4 byte integers (big endian)."""
|
||||
"""Unpacks 1 to 4 or 8 byte integers (big endian)."""
|
||||
l = len(s)
|
||||
if not l:
|
||||
return default
|
||||
|
@ -223,6 +223,8 @@ def nunpack(s, default=0):
|
|||
return struct.unpack('>L', b'\x00'+s)[0]
|
||||
elif l == 4:
|
||||
return struct.unpack('>L', s)[0]
|
||||
elif l == 8:
|
||||
return struct.unpack('>Q', s)[0]
|
||||
else:
|
||||
raise TypeError('invalid length: %d' % l)
|
||||
|
||||
|
@ -269,7 +271,7 @@ def decode_text(s):
|
|||
if s.startswith(b'\xfe\xff'):
|
||||
return six.text_type(s[2:], 'utf-16be', 'ignore')
|
||||
else:
|
||||
return ''.join(PDFDocEncoding[ord(c)] for c in s)
|
||||
return ''.join(PDFDocEncoding[c] for c in s)
|
||||
|
||||
|
||||
# enc
|
||||
|
|
Loading…
Reference in New Issue