pdfminer.six/tools/conv_cmap.py

62 lines
1.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python
import sys, os.path
stderr = sys.stderr
def dumpcdb(cmap, cdbfile, verbose=1):
from struct import pack, unpack
try:
import cdb
except ImportError:
import pdfminer.pycdb as cdb
m = cdb.cdbmake(cdbfile, cdbfile+'.tmp')
if verbose:
print >>stderr, 'Writing: %r...' % cdbfile
for (k,v) in cmap.getall_attrs():
m.add('/'+k, repr(v))
for (code,cid) in cmap.getall_code2cid():
m.add('c'+code, pack('>L',cid))
for (cid,code) in cmap.getall_cid2code():
m.add('i'+pack('>L',cid), code)
m.finish()
return
def convert_cmap(files, cmapdir, cdbcmapdir, force=False):
from pdfminer.cmap import CMapDB
CMapDB.initialize(cmapdir)
for fname in files:
if fname.endswith('.upr'): continue
cmapname = os.path.basename(fname)
cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb')
if not force and os.path.exists(cdbname):
print >>stderr, 'Skipping: %r' % cdbname
continue
print >>stderr, 'Reading: %r...' % fname
cmap = CMapDB.get_cmap(cmapname)
dumpcdb(cmap, cdbname)
return
def main(argv):
import getopt
def usage():
print 'usage: %s [-c cmapdir] [-C cdbcmapdir] [-f] file ...' % argv[0]
return 100
try:
(opts, args) = getopt.getopt(argv[1:], 'c:C:f')
except getopt.GetoptError:
return usage()
if not args: usage()
cmapdir = 'CMap'
cdbcmapdir = 'CDBCMap'
force = False
for (k, v) in opts:
if k == '-f': force = True
elif k == '-c': cmapdir = v
elif k == '-C': cdbcmapdir = v
if not os.path.isdir(cmapdir):
raise ValueError('not directory: %r' % cmapdir)
if not os.path.isdir(cdbcmapdir):
raise ValueError('not directory: %r' % cdbcmapdir)
return convert_cmap(args, cmapdir, cdbcmapdir, force=force)
if __name__ == '__main__': sys.exit(main(sys.argv))