386 lines
9.2 KiB
Python
386 lines
9.2 KiB
Python
#!/usr/bin/env python
|
|
import sys
|
|
stderr = sys.stderr
|
|
from struct import pack, unpack
|
|
from utils import choplist, nunpack
|
|
from psparser import PSException, PSSyntaxError, PSTypeError, \
|
|
PSLiteral, PSKeyword, literal_name, keyword_name, \
|
|
PSStackParser
|
|
try:
|
|
import cdb
|
|
except ImportError:
|
|
import pycdb as cdb
|
|
|
|
|
|
## CMap
|
|
##
|
|
class CMap:
|
|
|
|
def __init__(self, debug=0):
|
|
self.debug = debug
|
|
self.code2cid = {}
|
|
self.cid2code = {}
|
|
self.attrs = {}
|
|
return
|
|
|
|
def __repr__(self):
|
|
return '<CMap: %s>' % self.attrs.get('CMapName')
|
|
|
|
def update(self, code2cid=None, cid2code=None):
|
|
if code2cid:
|
|
self.code2cid.update(code2cid)
|
|
if cid2code:
|
|
self.cid2code.update(cid2code)
|
|
return self
|
|
|
|
def copycmap(self, cmap):
|
|
self.code2cid.update(cmap.getall_code2cid())
|
|
self.cid2code.update(cmap.getall_cid2code())
|
|
return self
|
|
|
|
def register_code2cid(self, code, cid):
|
|
assert isinstance(code, str)
|
|
assert isinstance(cid, int)
|
|
self.code2cid[code] = cid
|
|
return self
|
|
|
|
def register_cid2code(self, cid, code):
|
|
from glyphlist import charname2unicode
|
|
assert isinstance(cid, int)
|
|
if isinstance(code, PSLiteral):
|
|
code = pack('>H', charname2unicode[code.name])
|
|
self.cid2code[cid] = code
|
|
return self
|
|
|
|
def decode(self, bytes):
|
|
if self.debug:
|
|
print >>stderr, 'decode: %r, %r' % (self, bytes)
|
|
x = ''
|
|
for c in bytes:
|
|
if x:
|
|
if x+c in self.code2cid:
|
|
yield self.code2cid[x+c]
|
|
x = ''
|
|
elif c in self.code2cid:
|
|
yield self.code2cid[c]
|
|
else:
|
|
x = c
|
|
return
|
|
|
|
def is_vertical(self):
|
|
return self.attrs.get('WMode', '0') == '1'
|
|
|
|
def tocid(self, code):
|
|
return self.code2cid.get(code)
|
|
def tocode(self, cid):
|
|
return self.cid2code.get(cid)
|
|
|
|
def getall_attrs(self):
|
|
return self.attrs.iteritems()
|
|
def getall_code2cid(self):
|
|
return self.code2cid.iteritems()
|
|
def getall_cid2code(self):
|
|
return self.cid2code.iteritems()
|
|
|
|
|
|
## CDBCMap
|
|
##
|
|
class CDBCMap(CMap):
|
|
|
|
def __init__(self, cdbname, debug=0):
|
|
CMap.__init__(self, debug=debug)
|
|
self.cdbname = cdbname
|
|
self.db = cdb.init(cdbname)
|
|
return
|
|
|
|
def __repr__(self):
|
|
return '<CDBCMap: %s (%r)>' % (self.db['/CMapName'], self.cdbname)
|
|
|
|
def tocid(self, code):
|
|
k = 'c'+code
|
|
if not self.db.has_key(k):
|
|
return None
|
|
return unpack('>L', self.db[k])
|
|
def tocode(self, cid):
|
|
k = 'i'+pack('>L', cid)
|
|
if not self.db.has_key(k):
|
|
return None
|
|
return self.db[k]
|
|
|
|
def is_vertical(self):
|
|
return (self.db.has_key('/WMode') and
|
|
self.db['/WMode'] == '1')
|
|
|
|
def getall(self, c):
|
|
while 1:
|
|
x = self.db.each()
|
|
if not x: break
|
|
(k,v) = x
|
|
if k.startswith(c):
|
|
yield (k[1:], unpack('>L', v)[0])
|
|
return
|
|
|
|
def getall_attrs(self):
|
|
while 1:
|
|
x = self.db.each()
|
|
if not x: break
|
|
(k,v) = x
|
|
if k.startswith('/'):
|
|
yield (k[1:], eval(v)[0])
|
|
return
|
|
|
|
def getall_cid2code(self):
|
|
return self.getall('i')
|
|
def getall_code2cid(self):
|
|
return self.getall('c')
|
|
|
|
def decode(self, bytes):
|
|
if self.debug:
|
|
print >>stderr, 'decode: %r, %r' % (self, bytes)
|
|
x = ''
|
|
for c in bytes:
|
|
if x:
|
|
if x+c in self.code2cid:
|
|
yield self.code2cid[x+c]
|
|
elif self.db.has_key('c'+x+c):
|
|
(dest,) = unpack('>L', self.db['c'+x+c])
|
|
self.code2cid[x+c] = dest
|
|
yield dest
|
|
x = ''
|
|
elif c in self.code2cid:
|
|
yield self.code2cid[c]
|
|
elif self.db.has_key('c'+c):
|
|
(dest,) = unpack('>L', self.db['c'+c])
|
|
self.code2cid[c] = dest
|
|
yield dest
|
|
else:
|
|
x = c
|
|
return
|
|
|
|
|
|
## CMapDB
|
|
##
|
|
class CMapDB:
|
|
|
|
CMAP_ALIAS = {
|
|
}
|
|
|
|
debug = 0
|
|
dirname = None
|
|
cdbdirname = None
|
|
cmapdb = {}
|
|
|
|
@classmethod
|
|
def initialize(klass, dirname, cdbdirname=None, debug=0):
|
|
klass.dirname = dirname
|
|
klass.cdbdirname = cdbdirname or dirname
|
|
klass.debug = debug
|
|
return
|
|
|
|
@classmethod
|
|
def get_cmap(klass, cmapname):
|
|
import os.path
|
|
cmapname = klass.CMAP_ALIAS.get(cmapname, cmapname)
|
|
if cmapname in klass.cmapdb:
|
|
cmap = klass.cmapdb[cmapname]
|
|
else:
|
|
fname = os.path.join(klass.dirname, cmapname)
|
|
cdbname = os.path.join(klass.cdbdirname, cmapname+'.cmap.cdb')
|
|
if os.path.exists(cdbname):
|
|
if 1 <= klass.debug:
|
|
print >>stderr, 'Opening: CDBCMap %r...' % cdbname
|
|
cmap = CDBCMap(cdbname)
|
|
elif os.path.exists(fname):
|
|
if 1 <= klass.debug:
|
|
print >>stderr, 'Reading: CMap %r...' % fname
|
|
cmap = CMap()
|
|
fp = file(fname)
|
|
CMapParser(cmap, fp).parse()
|
|
fp.close()
|
|
else:
|
|
raise KeyError(cmapname)
|
|
klass.cmapdb[cmapname] = cmap
|
|
return cmap
|
|
|
|
|
|
## CMapParser
|
|
##
|
|
class CMapParser(PSStackParser):
|
|
|
|
def __init__(self, cmap, fp, debug=0):
|
|
PSStackParser.__init__(self, fp, debug=debug)
|
|
self.cmap = cmap
|
|
self.in_cmap = False
|
|
return
|
|
|
|
def do_token(self, _, token):
|
|
name = token.name
|
|
if name == 'begincmap':
|
|
self.in_cmap = True
|
|
self.popall()
|
|
return
|
|
elif name == 'endcmap':
|
|
self.in_cmap = False
|
|
return
|
|
if not self.in_cmap: return
|
|
#
|
|
if name == 'def':
|
|
try:
|
|
(k,v) = self.pop(2)
|
|
self.cmap.attrs[literal_name(k)] = v
|
|
except PSSyntaxError:
|
|
pass
|
|
return
|
|
|
|
if name == 'usecmap':
|
|
try:
|
|
(cmapname,) = self.pop(1)
|
|
self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname)))
|
|
except PSSyntaxError:
|
|
pass
|
|
return
|
|
|
|
if name == 'begincodespacerange':
|
|
self.popall()
|
|
return
|
|
if name == 'endcodespacerange':
|
|
if 1 <= self.debug:
|
|
print >>stderr, 'codespace: %r' % self.partobj
|
|
self.popall()
|
|
return
|
|
|
|
if name == 'begincidrange':
|
|
self.popall()
|
|
return
|
|
if name == 'endcidrange':
|
|
for (s,e,cid) in choplist(3, self.partobj):
|
|
assert isinstance(s, str)
|
|
assert isinstance(e, str)
|
|
assert isinstance(cid, int)
|
|
assert len(s) == len(e)
|
|
sprefix = s[:-4]
|
|
eprefix = e[:-4]
|
|
assert sprefix == eprefix
|
|
svar = s[-4:]
|
|
evar = e[-4:]
|
|
s1 = nunpack(svar)
|
|
e1 = nunpack(evar)
|
|
vlen = len(svar)
|
|
assert s1 <= e1
|
|
for i in xrange(e1-s1+1):
|
|
x = sprefix+pack('>L',s1+i)[-vlen:]
|
|
self.cmap.register_code2cid(x, cid+i)
|
|
self.popall()
|
|
return
|
|
|
|
if name == 'begincidchar':
|
|
self.popall()
|
|
return
|
|
if name == 'endcidchar':
|
|
for (cid,code) in choplist(2, self.partobj):
|
|
assert isinstance(code, str)
|
|
assert isinstance(cid, str)
|
|
self.cmap.register_code2cid(code, nunpack(cid))
|
|
self.popall()
|
|
return
|
|
|
|
if name == 'beginbfrange':
|
|
self.popall()
|
|
return
|
|
if name == 'endbfrange':
|
|
for (s,e,code) in choplist(3, self.partobj):
|
|
assert isinstance(s, str)
|
|
assert isinstance(e, str)
|
|
assert len(s) == len(e)
|
|
s1 = nunpack(s)
|
|
e1 = nunpack(e)
|
|
assert s1 <= e1
|
|
if isinstance(code, list):
|
|
for i in xrange(e1-s1+1):
|
|
self.cmap.register_cid2code(s1+i, code[i])
|
|
else:
|
|
var = code[-4:]
|
|
base = nunpack(var)
|
|
prefix = code[:-4]
|
|
vlen = len(var)
|
|
for i in xrange(e1-s1+1):
|
|
x = prefix+pack('>L',base+i)[-vlen:]
|
|
self.cmap.register_cid2code(s1+i, x)
|
|
self.popall()
|
|
return
|
|
|
|
if name == 'beginbfchar':
|
|
self.popall()
|
|
return
|
|
if name == 'endbfchar':
|
|
for (cid,code) in choplist(2, self.partobj):
|
|
assert isinstance(cid, str)
|
|
assert isinstance(code, str)
|
|
self.cmap.register_cid2code(nunpack(cid), code)
|
|
self.popall()
|
|
return
|
|
|
|
if name == 'beginnotdefrange':
|
|
self.popall()
|
|
return
|
|
if name == 'endnotdefrange':
|
|
if 1 <= self.debug:
|
|
print >>stderr, 'notdefrange: %r' % self.partobj
|
|
self.popall()
|
|
return
|
|
|
|
return
|
|
|
|
|
|
## FontMetricsDB
|
|
##
|
|
class FontMetricsDB:
|
|
from fontmetrics import FONT_METRICS
|
|
|
|
@classmethod
|
|
def get_metrics(klass, fontname):
|
|
return klass.FONT_METRICS[fontname]
|
|
|
|
|
|
## EncodingDB
|
|
##
|
|
class EncodingDB:
|
|
|
|
from glyphlist import charname2unicode
|
|
from latin_enc import ENCODING
|
|
|
|
std2unicode = {}
|
|
mac2unicode = {}
|
|
win2unicode = {}
|
|
pdf2unicode = {}
|
|
for (name,std,mac,win,pdf) in ENCODING:
|
|
c = unichr(charname2unicode[name])
|
|
if std: std2unicode[std] = c
|
|
if mac: mac2unicode[mac] = c
|
|
if win: win2unicode[win] = c
|
|
if pdf: pdf2unicode[pdf] = c
|
|
|
|
encodings = {
|
|
'StandardEncoding': std2unicode,
|
|
'MacRomanEncoding': mac2unicode,
|
|
'WinAnsiEncoding': win2unicode,
|
|
'PDFDocEncoding': pdf2unicode,
|
|
}
|
|
|
|
@classmethod
|
|
def get_encoding(klass, name, diff=None):
|
|
cid2unicode = klass.encodings.get(name, klass.std2unicode)
|
|
if diff:
|
|
cid2unicode = cid2unicode.copy()
|
|
cid = 0
|
|
for x in diff:
|
|
if isinstance(x, int):
|
|
cid = x
|
|
elif isinstance(x, PSLiteral):
|
|
try:
|
|
cid2unicode[cid] = unichr(EncodingDB.charname2unicode[x.name])
|
|
except KeyError:
|
|
pass
|
|
cid += 1
|
|
return cid2unicode
|