fixed: encoding problem with vertical characters.

pull/1/head
Yusuke Shinyama 2013-10-22 18:44:40 +09:00
parent e927bd307e
commit 8a70a9f657
1 changed files with 8 additions and 9 deletions

View File

@ -47,6 +47,7 @@ class CMapConverter(object):
if not line: continue if not line: continue
values = line.split('\t') values = line.split('\t')
if encs is None: if encs is None:
assert values[0] == 'CID'
encs = values encs = values
continue continue
@ -84,17 +85,13 @@ class CMapConverter(object):
(c,_) = chars[0] (c,_) = chars[0]
return c return c
cid = None cid = int(values[0])
unimap_h = {} unimap_h = {}
unimap_v = {} unimap_v = {}
for (enc,value) in zip(encs, values): for (enc,value) in zip(encs, values):
if enc == 'CID': if enc == 'CID': continue
cid = int(value) if value == '*': continue
continue
assert cid is not None
if value == '*':
continue
# hcodes, vcodes: encoded bytes for each writing mode. # hcodes, vcodes: encoded bytes for each writing mode.
hcodes = [] hcodes = []
vcodes = [] vcodes = []
@ -124,11 +121,13 @@ class CMapConverter(object):
for code in hcodes: for code in hcodes:
put(hmap, code, cid) put(hmap, code, cid)
put(vmap, code, cid) put(vmap, code, cid)
# Determine the "most popular" candidate. # Determine the "most popular" candidate.
if unimap_h: if unimap_h:
self.cid2unichr_h[cid] = pick(unimap_h) self.cid2unichr_h[cid] = pick(unimap_h)
if unimap_v or unimap_h:
self.cid2unichr_v[cid] = pick(unimap_v or unimap_h) self.cid2unichr_v[cid] = pick(unimap_v or unimap_h)
return return
def dump_cmap(self, fp, enc): def dump_cmap(self, fp, enc):