glyphlist bug (due to my misunderstanding of spec.)

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@237 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-08-26 15:02:46 +00:00
parent 055b4861af
commit 4554705881
7 changed files with 4320 additions and 4299 deletions

View File

@ -163,7 +163,7 @@ class FileUnicodeMap(UnicodeMap):
assert isinstance(cid, int)
if isinstance(code, PSLiteral):
# Interpret as an Adobe glyph name.
self.cid2unichr[cid] = unichr(name2unicode(code.name))
self.cid2unichr[cid] = name2unicode(code.name)
elif isinstance(code, str):
# Interpret as UTF-16BE.
self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore')

View File

@ -2,7 +2,7 @@
import re
from psparser import PSLiteral
from glyphlist import charname2unicode
from glyphlist import glyphname2unicode
from latin_enc import ENCODING
@ -11,11 +11,11 @@ from latin_enc import ENCODING
STRIP_NAME = re.compile(r'[0-9]+')
def name2unicode(name):
"""Converts Adobe glyph names to Unicode numbers."""
if name in charname2unicode:
return charname2unicode[name]
if name in glyphname2unicode:
return glyphname2unicode[name]
m = STRIP_NAME.search(name)
if not m: raise KeyError(name)
return int(m.group(0))
return unichr(int(m.group(0)))
## EncodingDB
@ -27,7 +27,7 @@ class EncodingDB(object):
win2unicode = {}
pdf2unicode = {}
for (name,std,mac,win,pdf) in ENCODING:
c = unichr(name2unicode(name))
c = name2unicode(name)
if std: std2unicode[std] = c
if mac: mac2unicode[mac] = c
if win: win2unicode[win] = c
@ -51,7 +51,7 @@ class EncodingDB(object):
cid = x
elif isinstance(x, PSLiteral):
try:
cid2unicode[cid] = unichr(name2unicode(x.name))
cid2unicode[cid] = name2unicode(x.name)
except KeyError:
pass
cid += 1

File diff suppressed because it is too large Load Diff

View File

@ -212,6 +212,7 @@ class LTChar(LTItem, LTText):
self.adv = font.char_width(cid) * fontsize * scaling
try:
text = font.to_unichr(cid)
assert isinstance(text, unicode), text
except PDFUnicodeNotDefined:
text = '?'
(a,b,c,d,e,f) = self.matrix

View File

@ -159,7 +159,7 @@ class TagExtractor(PDFDevice):
def begin_tag(self, tag, props=None):
s = ''
if props:
if isinstance(props, dict):
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
in sorted(props.iteritems()) )
self.outfp.write('<%s%s>' % (enc(tag.name), s))

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python
import sys
import fileinput
stdout = sys.stdout
stderr = sys.stderr
def main(argv):
fonts = {}

24
tools/conv_glyphlist.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python
import sys
import fileinput
def main(argv):
state = 0
for line in fileinput.input():
line = line.strip()
if not line or line.startswith('#'):
if state == 1:
state = 2
print '}'
print
print line
continue
if state == 0:
print
print 'glyphname2unicode = {'
state = 1
(name,x) = line.split(';')
codes = x.split(' ')
print ' %r: u\'%s\',' % (name, ''.join( '\\u%s' % code for code in codes ))
if __name__ == '__main__': sys.exit(main(sys.argv))