Miscellaneous bug fixes (#47)
* utils.decode_text: fix "TypeError: ord() expected string of length 1, but int found" fixes https://github.com/goulu/pdfminer/issues/24 * pdfinterp.execute: don't assume that every keyword name can be decoded as utf-8 fixes "'str' does not support the buffer interface", https://github.com/goulu/pdfminer/issues/23 * default settings.STRICT to False, for compatibility with the original pdfminer * PDFCIDFont: handle font registry/orderings that may be PDFObjRefs * utils.nunpack: handle 8-byte integerspull/55/head
parent
fd63dbf62e
commit
9439a3a31a
|
@ -640,8 +640,8 @@ class PDFCIDFont(PDFFont):
|
||||||
raise PDFFontError('BaseFont is missing')
|
raise PDFFontError('BaseFont is missing')
|
||||||
self.basefont = 'unknown'
|
self.basefont = 'unknown'
|
||||||
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
|
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
|
||||||
self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', b'unknown').decode("latin1"),
|
self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"),
|
||||||
self.cidsysteminfo.get('Ordering', b'unknown').decode("latin1"))
|
resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1"))
|
||||||
try:
|
try:
|
||||||
name = literal_name(spec['Encoding'])
|
name = literal_name(spec['Encoding'])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
|
|
@ -160,10 +160,7 @@ def keyword_name(x):
|
||||||
else:
|
else:
|
||||||
name=x.name
|
name=x.name
|
||||||
if six.PY3:
|
if six.PY3:
|
||||||
try:
|
name = str(name,'utf-8','ignore')
|
||||||
name = str(name,'utf-8')
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
STRICT = True
|
STRICT = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
|
@ -211,7 +211,7 @@ def choplist(n, seq):
|
||||||
|
|
||||||
# nunpack
|
# nunpack
|
||||||
def nunpack(s, default=0):
|
def nunpack(s, default=0):
|
||||||
"""Unpacks 1 to 4 byte integers (big endian)."""
|
"""Unpacks 1 to 4 or 8 byte integers (big endian)."""
|
||||||
l = len(s)
|
l = len(s)
|
||||||
if not l:
|
if not l:
|
||||||
return default
|
return default
|
||||||
|
@ -223,6 +223,8 @@ def nunpack(s, default=0):
|
||||||
return struct.unpack('>L', b'\x00'+s)[0]
|
return struct.unpack('>L', b'\x00'+s)[0]
|
||||||
elif l == 4:
|
elif l == 4:
|
||||||
return struct.unpack('>L', s)[0]
|
return struct.unpack('>L', s)[0]
|
||||||
|
elif l == 8:
|
||||||
|
return struct.unpack('>Q', s)[0]
|
||||||
else:
|
else:
|
||||||
raise TypeError('invalid length: %d' % l)
|
raise TypeError('invalid length: %d' % l)
|
||||||
|
|
||||||
|
@ -269,7 +271,7 @@ def decode_text(s):
|
||||||
if s.startswith(b'\xfe\xff'):
|
if s.startswith(b'\xfe\xff'):
|
||||||
return six.text_type(s[2:], 'utf-16be', 'ignore')
|
return six.text_type(s[2:], 'utf-16be', 'ignore')
|
||||||
else:
|
else:
|
||||||
return ''.join(PDFDocEncoding[ord(c)] for c in s)
|
return ''.join(PDFDocEncoding[c] for c in s)
|
||||||
|
|
||||||
|
|
||||||
# enc
|
# enc
|
||||||
|
|
Loading…
Reference in New Issue