From a6f31a713dc662234fe60d1e52d282a795984da7 Mon Sep 17 00:00:00 2001 From: cybjit Date: Sun, 7 Sep 2014 18:41:04 +0200 Subject: [PATCH] cmap bytes and decode --- pdfminer/cmapdb.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 6dee4a7..e964b8f 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -180,11 +180,11 @@ class FileUnicodeMap(UnicodeMap): if isinstance(code, PSLiteral): # Interpret as an Adobe glyph name. self.cid2unichr[cid] = name2unicode(code.name) - elif isinstance(code, str): + elif isinstance(code, bytes): # Interpret as UTF-16BE. - self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore') + self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore') elif isinstance(code, int): - self.cid2unichr[cid] = unichr(code) + self.cid2unichr[cid] = six.unichr(code) else: raise TypeError(code) return @@ -379,7 +379,7 @@ class CMapParser(PSStackParser): if token is self.KEYWORD_ENDBFRANGE: objs = [obj for (__, obj) in self.popall()] for (s, e, code) in choplist(3, objs): - if (not isinstance(s, str) or not isinstance(e, str) or + if (not isinstance(s, bytes) or not isinstance(e, bytes) or len(s) != len(e)): continue s1 = nunpack(s) @@ -404,7 +404,7 @@ class CMapParser(PSStackParser): if token is self.KEYWORD_ENDBFCHAR: objs = [obj for (__, obj) in self.popall()] for (cid, code) in choplist(2, objs): - if isinstance(cid, str) and isinstance(code, str): + if isinstance(cid, bytes) and isinstance(code, bytes): self.cmap.add_cid2unichr(nunpack(cid), code) return