glyphlist bug (due to my misunderstanding of spec.)

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@237 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-08-26 15:02:46 +00:00
parent 055b4861af
commit 4554705881
7 changed files with 4320 additions and 4299 deletions

View File

@ -163,7 +163,7 @@ class FileUnicodeMap(UnicodeMap):
assert isinstance(cid, int) assert isinstance(cid, int)
if isinstance(code, PSLiteral): if isinstance(code, PSLiteral):
# Interpret as an Adobe glyph name. # Interpret as an Adobe glyph name.
self.cid2unichr[cid] = unichr(name2unicode(code.name)) self.cid2unichr[cid] = name2unicode(code.name)
elif isinstance(code, str): elif isinstance(code, str):
# Interpret as UTF-16BE. # Interpret as UTF-16BE.
self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore') self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore')

View File

@ -2,7 +2,7 @@
import re import re
from psparser import PSLiteral from psparser import PSLiteral
from glyphlist import charname2unicode from glyphlist import glyphname2unicode
from latin_enc import ENCODING from latin_enc import ENCODING
@ -11,11 +11,11 @@ from latin_enc import ENCODING
STRIP_NAME = re.compile(r'[0-9]+') STRIP_NAME = re.compile(r'[0-9]+')
def name2unicode(name): def name2unicode(name):
"""Converts Adobe glyph names to Unicode numbers.""" """Converts Adobe glyph names to Unicode numbers."""
if name in charname2unicode: if name in glyphname2unicode:
return charname2unicode[name] return glyphname2unicode[name]
m = STRIP_NAME.search(name) m = STRIP_NAME.search(name)
if not m: raise KeyError(name) if not m: raise KeyError(name)
return int(m.group(0)) return unichr(int(m.group(0)))
## EncodingDB ## EncodingDB
@ -27,7 +27,7 @@ class EncodingDB(object):
win2unicode = {} win2unicode = {}
pdf2unicode = {} pdf2unicode = {}
for (name,std,mac,win,pdf) in ENCODING: for (name,std,mac,win,pdf) in ENCODING:
c = unichr(name2unicode(name)) c = name2unicode(name)
if std: std2unicode[std] = c if std: std2unicode[std] = c
if mac: mac2unicode[mac] = c if mac: mac2unicode[mac] = c
if win: win2unicode[win] = c if win: win2unicode[win] = c
@ -51,7 +51,7 @@ class EncodingDB(object):
cid = x cid = x
elif isinstance(x, PSLiteral): elif isinstance(x, PSLiteral):
try: try:
cid2unicode[cid] = unichr(name2unicode(x.name)) cid2unicode[cid] = name2unicode(x.name)
except KeyError: except KeyError:
pass pass
cid += 1 cid += 1

File diff suppressed because it is too large Load Diff

View File

@ -212,6 +212,7 @@ class LTChar(LTItem, LTText):
self.adv = font.char_width(cid) * fontsize * scaling self.adv = font.char_width(cid) * fontsize * scaling
try: try:
text = font.to_unichr(cid) text = font.to_unichr(cid)
assert isinstance(text, unicode), text
except PDFUnicodeNotDefined: except PDFUnicodeNotDefined:
text = '?' text = '?'
(a,b,c,d,e,f) = self.matrix (a,b,c,d,e,f) = self.matrix

View File

@ -159,7 +159,7 @@ class TagExtractor(PDFDevice):
def begin_tag(self, tag, props=None): def begin_tag(self, tag, props=None):
s = '' s = ''
if props: if isinstance(props, dict):
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v) s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
in sorted(props.iteritems()) ) in sorted(props.iteritems()) )
self.outfp.write('<%s%s>' % (enc(tag.name), s)) self.outfp.write('<%s%s>' % (enc(tag.name), s))

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import fileinput import fileinput
stdout = sys.stdout
stderr = sys.stderr
def main(argv): def main(argv):
fonts = {} fonts = {}

24
tools/conv_glyphlist.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python
import sys
import fileinput
def main(argv):
state = 0
for line in fileinput.input():
line = line.strip()
if not line or line.startswith('#'):
if state == 1:
state = 2
print '}'
print
print line
continue
if state == 0:
print
print 'glyphname2unicode = {'
state = 1
(name,x) = line.split(';')
codes = x.split(' ')
print ' %r: u\'%s\',' % (name, ''.join( '\\u%s' % code for code in codes ))
if __name__ == '__main__': sys.exit(main(sys.argv))