glyphlist bug (due to my misunderstanding of spec.)
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@237 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
055b4861af
commit
4554705881
|
@ -163,7 +163,7 @@ class FileUnicodeMap(UnicodeMap):
|
|||
assert isinstance(cid, int)
|
||||
if isinstance(code, PSLiteral):
|
||||
# Interpret as an Adobe glyph name.
|
||||
self.cid2unichr[cid] = unichr(name2unicode(code.name))
|
||||
self.cid2unichr[cid] = name2unicode(code.name)
|
||||
elif isinstance(code, str):
|
||||
# Interpret as UTF-16BE.
|
||||
self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore')
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import re
|
||||
from psparser import PSLiteral
|
||||
from glyphlist import charname2unicode
|
||||
from glyphlist import glyphname2unicode
|
||||
from latin_enc import ENCODING
|
||||
|
||||
|
||||
|
@ -11,11 +11,11 @@ from latin_enc import ENCODING
|
|||
STRIP_NAME = re.compile(r'[0-9]+')
|
||||
def name2unicode(name):
|
||||
"""Converts Adobe glyph names to Unicode numbers."""
|
||||
if name in charname2unicode:
|
||||
return charname2unicode[name]
|
||||
if name in glyphname2unicode:
|
||||
return glyphname2unicode[name]
|
||||
m = STRIP_NAME.search(name)
|
||||
if not m: raise KeyError(name)
|
||||
return int(m.group(0))
|
||||
return unichr(int(m.group(0)))
|
||||
|
||||
|
||||
## EncodingDB
|
||||
|
@ -27,7 +27,7 @@ class EncodingDB(object):
|
|||
win2unicode = {}
|
||||
pdf2unicode = {}
|
||||
for (name,std,mac,win,pdf) in ENCODING:
|
||||
c = unichr(name2unicode(name))
|
||||
c = name2unicode(name)
|
||||
if std: std2unicode[std] = c
|
||||
if mac: mac2unicode[mac] = c
|
||||
if win: win2unicode[win] = c
|
||||
|
@ -51,7 +51,7 @@ class EncodingDB(object):
|
|||
cid = x
|
||||
elif isinstance(x, PSLiteral):
|
||||
try:
|
||||
cid2unicode[cid] = unichr(name2unicode(x.name))
|
||||
cid2unicode[cid] = name2unicode(x.name)
|
||||
except KeyError:
|
||||
pass
|
||||
cid += 1
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -212,6 +212,7 @@ class LTChar(LTItem, LTText):
|
|||
self.adv = font.char_width(cid) * fontsize * scaling
|
||||
try:
|
||||
text = font.to_unichr(cid)
|
||||
assert isinstance(text, unicode), text
|
||||
except PDFUnicodeNotDefined:
|
||||
text = '?'
|
||||
(a,b,c,d,e,f) = self.matrix
|
||||
|
|
|
@ -159,7 +159,7 @@ class TagExtractor(PDFDevice):
|
|||
|
||||
def begin_tag(self, tag, props=None):
|
||||
s = ''
|
||||
if props:
|
||||
if isinstance(props, dict):
|
||||
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
|
||||
in sorted(props.iteritems()) )
|
||||
self.outfp.write('<%s%s>' % (enc(tag.name), s))
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
import sys
|
||||
import fileinput
|
||||
stdout = sys.stdout
|
||||
stderr = sys.stderr
|
||||
|
||||
def main(argv):
|
||||
fonts = {}
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python
|
||||
import sys
|
||||
import fileinput
|
||||
|
||||
def main(argv):
|
||||
state = 0
|
||||
for line in fileinput.input():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
if state == 1:
|
||||
state = 2
|
||||
print '}'
|
||||
print
|
||||
print line
|
||||
continue
|
||||
if state == 0:
|
||||
print
|
||||
print 'glyphname2unicode = {'
|
||||
state = 1
|
||||
(name,x) = line.split(';')
|
||||
codes = x.split(' ')
|
||||
print ' %r: u\'%s\',' % (name, ''.join( '\\u%s' % code for code in codes ))
|
||||
|
||||
if __name__ == '__main__': sys.exit(main(sys.argv))
|
Loading…
Reference in New Issue