glyphlist bug (due to my misunderstanding of spec.)
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@237 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
055b4861af
commit
4554705881
|
@ -163,7 +163,7 @@ class FileUnicodeMap(UnicodeMap):
|
||||||
assert isinstance(cid, int)
|
assert isinstance(cid, int)
|
||||||
if isinstance(code, PSLiteral):
|
if isinstance(code, PSLiteral):
|
||||||
# Interpret as an Adobe glyph name.
|
# Interpret as an Adobe glyph name.
|
||||||
self.cid2unichr[cid] = unichr(name2unicode(code.name))
|
self.cid2unichr[cid] = name2unicode(code.name)
|
||||||
elif isinstance(code, str):
|
elif isinstance(code, str):
|
||||||
# Interpret as UTF-16BE.
|
# Interpret as UTF-16BE.
|
||||||
self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore')
|
self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore')
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from psparser import PSLiteral
|
from psparser import PSLiteral
|
||||||
from glyphlist import charname2unicode
|
from glyphlist import glyphname2unicode
|
||||||
from latin_enc import ENCODING
|
from latin_enc import ENCODING
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,11 +11,11 @@ from latin_enc import ENCODING
|
||||||
STRIP_NAME = re.compile(r'[0-9]+')
|
STRIP_NAME = re.compile(r'[0-9]+')
|
||||||
def name2unicode(name):
|
def name2unicode(name):
|
||||||
"""Converts Adobe glyph names to Unicode numbers."""
|
"""Converts Adobe glyph names to Unicode numbers."""
|
||||||
if name in charname2unicode:
|
if name in glyphname2unicode:
|
||||||
return charname2unicode[name]
|
return glyphname2unicode[name]
|
||||||
m = STRIP_NAME.search(name)
|
m = STRIP_NAME.search(name)
|
||||||
if not m: raise KeyError(name)
|
if not m: raise KeyError(name)
|
||||||
return int(m.group(0))
|
return unichr(int(m.group(0)))
|
||||||
|
|
||||||
|
|
||||||
## EncodingDB
|
## EncodingDB
|
||||||
|
@ -27,7 +27,7 @@ class EncodingDB(object):
|
||||||
win2unicode = {}
|
win2unicode = {}
|
||||||
pdf2unicode = {}
|
pdf2unicode = {}
|
||||||
for (name,std,mac,win,pdf) in ENCODING:
|
for (name,std,mac,win,pdf) in ENCODING:
|
||||||
c = unichr(name2unicode(name))
|
c = name2unicode(name)
|
||||||
if std: std2unicode[std] = c
|
if std: std2unicode[std] = c
|
||||||
if mac: mac2unicode[mac] = c
|
if mac: mac2unicode[mac] = c
|
||||||
if win: win2unicode[win] = c
|
if win: win2unicode[win] = c
|
||||||
|
@ -51,7 +51,7 @@ class EncodingDB(object):
|
||||||
cid = x
|
cid = x
|
||||||
elif isinstance(x, PSLiteral):
|
elif isinstance(x, PSLiteral):
|
||||||
try:
|
try:
|
||||||
cid2unicode[cid] = unichr(name2unicode(x.name))
|
cid2unicode[cid] = name2unicode(x.name)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
cid += 1
|
cid += 1
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -212,6 +212,7 @@ class LTChar(LTItem, LTText):
|
||||||
self.adv = font.char_width(cid) * fontsize * scaling
|
self.adv = font.char_width(cid) * fontsize * scaling
|
||||||
try:
|
try:
|
||||||
text = font.to_unichr(cid)
|
text = font.to_unichr(cid)
|
||||||
|
assert isinstance(text, unicode), text
|
||||||
except PDFUnicodeNotDefined:
|
except PDFUnicodeNotDefined:
|
||||||
text = '?'
|
text = '?'
|
||||||
(a,b,c,d,e,f) = self.matrix
|
(a,b,c,d,e,f) = self.matrix
|
||||||
|
|
|
@ -159,7 +159,7 @@ class TagExtractor(PDFDevice):
|
||||||
|
|
||||||
def begin_tag(self, tag, props=None):
|
def begin_tag(self, tag, props=None):
|
||||||
s = ''
|
s = ''
|
||||||
if props:
|
if isinstance(props, dict):
|
||||||
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
|
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
|
||||||
in sorted(props.iteritems()) )
|
in sorted(props.iteritems()) )
|
||||||
self.outfp.write('<%s%s>' % (enc(tag.name), s))
|
self.outfp.write('<%s%s>' % (enc(tag.name), s))
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys
|
import sys
|
||||||
import fileinput
|
import fileinput
|
||||||
stdout = sys.stdout
|
|
||||||
stderr = sys.stderr
|
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
fonts = {}
|
fonts = {}
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
import fileinput
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
state = 0
|
||||||
|
for line in fileinput.input():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith('#'):
|
||||||
|
if state == 1:
|
||||||
|
state = 2
|
||||||
|
print '}'
|
||||||
|
print
|
||||||
|
print line
|
||||||
|
continue
|
||||||
|
if state == 0:
|
||||||
|
print
|
||||||
|
print 'glyphname2unicode = {'
|
||||||
|
state = 1
|
||||||
|
(name,x) = line.split(';')
|
||||||
|
codes = x.split(' ')
|
||||||
|
print ' %r: u\'%s\',' % (name, ''.join( '\\u%s' % code for code in codes ))
|
||||||
|
|
||||||
|
if __name__ == '__main__': sys.exit(main(sys.argv))
|
Loading…
Reference in New Issue