conv_cmap py3 compat

pull/2/head
cybjit 2014-09-11 23:34:09 +02:00
parent cba5a42ba8
commit ed13f7c47d
2 changed files with 14 additions and 9 deletions

View File

@ -3,7 +3,7 @@
PACKAGE=pdfminer PACKAGE=pdfminer
PYTHON=python2 PYTHON=python
GIT=git GIT=git
RM=rm -f RM=rm -f
CP=cp -f CP=cp -f

View File

@ -4,6 +4,8 @@ try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle as pickle import pickle as pickle
import codecs
import six
## CMapConverter ## CMapConverter
@ -56,6 +58,7 @@ class CMapConverter(object):
def put(dmap, code, cid, force=False): def put(dmap, code, cid, force=False):
for b in code[:-1]: for b in code[:-1]:
if six.PY2:
b = ord(b) b = ord(b)
if b in dmap: if b in dmap:
dmap = dmap[b] dmap = dmap[b]
@ -63,7 +66,9 @@ class CMapConverter(object):
d = {} d = {}
dmap[b] = d dmap[b] = d
dmap = d dmap = d
b = ord(code[-1]) b = code[-1]
if six.PY2:
b = ord(b)
if force or ((b not in dmap) or dmap[b] == cid): if force or ((b not in dmap) or dmap[b] == cid):
dmap[b] = cid dmap[b] = cid
return return
@ -83,8 +88,8 @@ class CMapConverter(object):
return return
def pick(unimap): def pick(unimap):
chars = unimap.items() chars = list(unimap.items())
chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True) chars.sort(key=(lambda x:(x[1],-ord(x[0]))), reverse=True)
(c,_) = chars[0] (c,_) = chars[0]
return c return c
@ -103,7 +108,7 @@ class CMapConverter(object):
if vertical: if vertical:
code = code[:-1] code = code[:-1]
try: try:
code = code.decode('hex') code = codecs.decode(code, 'hex_codec')
except: except:
code = chr(int(code, 16)) code = chr(int(code, 16))
if vertical: if vertical:
@ -138,7 +143,7 @@ class CMapConverter(object):
IS_VERTICAL=self.is_vertical.get(enc, False), IS_VERTICAL=self.is_vertical.get(enc, False),
CODE2CID=self.code2cid.get(enc), CODE2CID=self.code2cid.get(enc),
) )
fp.write(pickle.dumps(data)) fp.write(pickle.dumps(data, 2))
return return
def dump_unicodemap(self, fp): def dump_unicodemap(self, fp):
@ -146,7 +151,7 @@ class CMapConverter(object):
CID2UNICHR_H=self.cid2unichr_h, CID2UNICHR_H=self.cid2unichr_h,
CID2UNICHR_V=self.cid2unichr_v, CID2UNICHR_V=self.cid2unichr_v,
) )
fp.write(pickle.dumps(data)) fp.write(pickle.dumps(data, 2))
return return
# main # main
@ -175,7 +180,7 @@ def main(argv):
converter = CMapConverter(enc2codec) converter = CMapConverter(enc2codec)
for path in args: for path in args:
print ('reading: %r...' % path) print ('reading: %r...' % path)
fp = file(path) fp = open(path)
converter.load(fp) converter.load(fp)
fp.close() fp.close()