diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 00361fa..ae18f0b 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -640,8 +640,8 @@ class PDFCIDFont(PDFFont): raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) - self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', b'unknown').decode("latin1"), - self.cidsysteminfo.get('Ordering', b'unknown').decode("latin1")) + self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"), + resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1")) try: name = literal_name(spec['Encoding']) except KeyError: diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 644a8c0..6ebd583 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -160,10 +160,7 @@ def keyword_name(x): else: name=x.name if six.PY3: - try: - name = str(name,'utf-8') - except: - pass + name = str(name,'utf-8','ignore') return name diff --git a/pdfminer/settings.py b/pdfminer/settings.py index 344d066..2dd99c0 100644 --- a/pdfminer/settings.py +++ b/pdfminer/settings.py @@ -1,4 +1,4 @@ -STRICT = True +STRICT = False try: from django.conf import settings diff --git a/pdfminer/utils.py b/pdfminer/utils.py index a6ccabe..50e6447 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -211,7 +211,7 @@ def choplist(n, seq): # nunpack def nunpack(s, default=0): - """Unpacks 1 to 4 byte integers (big endian).""" + """Unpacks 1 to 4 or 8 byte integers (big endian).""" l = len(s) if not l: return default @@ -223,6 +223,8 @@ def nunpack(s, default=0): return struct.unpack('>L', b'\x00'+s)[0] elif l == 4: return struct.unpack('>L', s)[0] + elif l == 8: + return struct.unpack('>Q', s)[0] else: raise TypeError('invalid length: %d' % l) @@ -269,7 +271,7 @@ def decode_text(s): if s.startswith(b'\xfe\xff'): return six.text_type(s[2:], 'utf-16be', 'ignore') else: - return ''.join(PDFDocEncoding[ord(c)] for c in s) + return ''.join(PDFDocEncoding[c] for c in s) # enc