pdfminer.six/pdfminer/cmapdb.py

430 lines
12 KiB
Python
Raw Normal View History

""" Adobe character mapping (CMap) support.
CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).
More information is available on the Adobe website:
http://opensource.adobe.com/wiki/display/cmap/CMap+Resources
"""
import sys
import os
import os.path
import gzip
2014-06-25 10:55:41 +00:00
try:
import cPickle as pickle
except ImportError:
import pickle as pickle
2011-03-02 14:43:03 +00:00
import struct
2014-06-14 03:00:49 +00:00
import logging
2014-06-26 09:12:39 +00:00
from .psparser import PSStackParser
from .psparser import PSSyntaxError
from .psparser import PSEOF
from .psparser import PSLiteral
from .psparser import literal_name
2014-06-30 10:15:21 +00:00
from .psparser import KWD
2014-06-26 09:12:39 +00:00
from .encodingdb import name2unicode
from .utils import choplist
from .utils import nunpack
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
import six
2016-05-20 19:12:05 +00:00
log = logging.getLogger(__name__)
2013-11-07 08:35:04 +00:00
class CMapError(Exception):
pass
class CMapBase(object):
debug = 0
def __init__(self, **kwargs):
self.attrs = kwargs.copy()
return
def is_vertical(self):
return self.attrs.get('WMode', 0) != 0
def set_attr(self, k, v):
self.attrs[k] = v
return
def add_code2cid(self, code, cid):
return
def add_cid2unichr(self, cid, code):
return
def use_cmap(self, cmap):
return
class CMap(CMapBase):
def __init__(self, **kwargs):
CMapBase.__init__(self, **kwargs)
self.code2cid = {}
return
def __repr__(self):
return '<CMap: %s>' % self.attrs.get('CMapName')
def use_cmap(self, cmap):
assert isinstance(cmap, CMap), str(type(cmap))
2013-11-07 08:35:04 +00:00
def copy(dst, src):
for (k, v) in six.iteritems(src):
if isinstance(v, dict):
d = {}
dst[k] = d
copy(d, v)
else:
dst[k] = v
copy(self.code2cid, cmap.code2cid)
return
def decode(self, code):
2016-05-20 19:12:05 +00:00
log.debug('decode: %r, %r', self, code)
d = self.code2cid
2014-09-03 13:26:08 +00:00
for i in six.iterbytes(code):
2014-09-11 21:32:43 +00:00
if i in d:
d = d[i]
if isinstance(d, int):
yield d
d = self.code2cid
else:
d = self.code2cid
return
def dump(self, out=sys.stdout, code2cid=None, code=None):
if code2cid is None:
code2cid = self.code2cid
code = ()
for (k, v) in sorted(six.iteritems(code2cid)):
c = code+(k,)
if isinstance(v, int):
2013-11-07 08:35:04 +00:00
out.write('code %r = cid %d\n' % (c, v))
else:
self.dump(out=out, code2cid=v, code=c)
return
2013-11-07 07:14:53 +00:00
class IdentityCMap(CMapBase):
def decode(self, code):
2013-11-26 12:35:16 +00:00
n = len(code)//2
if n:
2011-03-02 14:43:03 +00:00
return struct.unpack('>%dH' % n, code)
else:
return ()
2013-11-07 07:14:53 +00:00
class IdentityCMapByte(IdentityCMap):
def decode(self, code):
n = len(code)
if n:
return struct.unpack('>%dB' % n, code)
else:
return ()
2019-08-20 11:18:40 +00:00
class UnicodeMap(CMapBase):
def __init__(self, **kwargs):
CMapBase.__init__(self, **kwargs)
self.cid2unichr = {}
return
def __repr__(self):
return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
def get_unichr(self, cid):
2016-05-20 19:12:05 +00:00
log.debug('get_unichr: %r, %r', self, cid)
return self.cid2unichr[cid]
def dump(self, out=sys.stdout):
for (k, v) in sorted(six.iteritems(self.cid2unichr)):
2013-11-07 08:35:04 +00:00
out.write('cid %d = unicode %r\n' % (k, v))
return
class FileCMap(CMap):
def add_code2cid(self, code, cid):
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
assert isinstance(code, str) and isinstance(cid, int),\
str((type(code), type(cid)))
d = self.code2cid
for c in code[:-1]:
c = ord(c)
if c in d:
d = d[c]
else:
t = {}
d[c] = t
2013-11-07 08:35:04 +00:00
d = t
c = ord(code[-1])
d[c] = cid
return
class FileUnicodeMap(UnicodeMap):
2013-11-07 07:14:53 +00:00
def add_cid2unichr(self, cid, code):
assert isinstance(cid, int), str(type(cid))
if isinstance(code, PSLiteral):
# Interpret as an Adobe glyph name.
self.cid2unichr[cid] = name2unicode(code.name)
2014-09-07 16:41:04 +00:00
elif isinstance(code, bytes):
# Interpret as UTF-16BE.
2014-09-07 16:41:04 +00:00
self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore')
elif isinstance(code, int):
2014-09-07 16:41:04 +00:00
self.cid2unichr[cid] = six.unichr(code)
else:
raise TypeError(code)
return
class PyCMap(CMap):
def __init__(self, name, module):
CMap.__init__(self, CMapName=name)
self.code2cid = module.CODE2CID
if module.IS_VERTICAL:
self.attrs['WMode'] = 1
return
class PyUnicodeMap(UnicodeMap):
2013-11-07 07:14:53 +00:00
def __init__(self, name, module, vertical):
UnicodeMap.__init__(self, CMapName=name)
if vertical:
self.cid2unichr = module.CID2UNICHR_V
self.attrs['WMode'] = 1
else:
self.cid2unichr = module.CID2UNICHR_H
return
class CMapDB(object):
_cmap_cache = {}
_umap_cache = {}
2013-11-07 07:14:53 +00:00
2013-11-07 08:35:04 +00:00
class CMapNotFound(CMapError):
pass
@classmethod
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
def _load_data(cls, name):
name = name.replace("\0", "")
filename = '%s.pickle.gz' % name
2016-05-20 19:12:05 +00:00
log.info('loading: %r', name)
2013-11-07 08:35:04 +00:00
cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
os.path.join(os.path.dirname(__file__), 'cmap'),)
2011-07-31 08:05:07 +00:00
for directory in cmap_paths:
path = os.path.join(directory, filename)
if os.path.exists(path):
gzfile = gzip.open(path)
try:
return type(str(name), (), pickle.loads(gzfile.read()))
finally:
gzfile.close()
else:
raise CMapDB.CMapNotFound(name)
@classmethod
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
def get_cmap(cls, name):
if name == 'Identity-H':
return IdentityCMap(WMode=0)
elif name == 'Identity-V':
return IdentityCMap(WMode=1)
elif name == 'OneByteIdentityH':
return IdentityCMapByte(WMode=0)
elif name == 'OneByteIdentityV':
return IdentityCMapByte(WMode=1)
try:
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
return cls._cmap_cache[name]
except KeyError:
pass
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
data = cls._load_data(name)
cls._cmap_cache[name] = cmap = PyCMap(name, data)
return cmap
@classmethod
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
def get_unicode_map(cls, name, vertical=False):
try:
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
return cls._umap_cache[name][vertical]
except KeyError:
pass
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
data = cls._load_data('to-unicode-%s' % name)
cls._umap_cache[name] = [PyUnicodeMap(name, data, v)
for v in (False, True)]
return cls._umap_cache[name][vertical]
class CMapParser(PSStackParser):
def __init__(self, cmap, fp):
PSStackParser.__init__(self, fp)
self.cmap = cmap
2013-10-17 12:40:43 +00:00
# some ToUnicode maps don't have "begincmap" keyword.
self._in_cmap = True
return
def run(self):
try:
self.nextobject()
except PSEOF:
pass
return
2014-06-30 10:15:21 +00:00
KEYWORD_BEGINCMAP = KWD(b'begincmap')
KEYWORD_ENDCMAP = KWD(b'endcmap')
KEYWORD_USECMAP = KWD(b'usecmap')
KEYWORD_DEF = KWD(b'def')
KEYWORD_BEGINCODESPACERANGE = KWD(b'begincodespacerange')
KEYWORD_ENDCODESPACERANGE = KWD(b'endcodespacerange')
KEYWORD_BEGINCIDRANGE = KWD(b'begincidrange')
KEYWORD_ENDCIDRANGE = KWD(b'endcidrange')
KEYWORD_BEGINCIDCHAR = KWD(b'begincidchar')
KEYWORD_ENDCIDCHAR = KWD(b'endcidchar')
KEYWORD_BEGINBFRANGE = KWD(b'beginbfrange')
KEYWORD_ENDBFRANGE = KWD(b'endbfrange')
KEYWORD_BEGINBFCHAR = KWD(b'beginbfchar')
KEYWORD_ENDBFCHAR = KWD(b'endbfchar')
KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange')
KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange')
def do_keyword(self, pos, token):
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_BEGINCMAP:
self._in_cmap = True
self.popall()
return
2014-06-30 10:15:21 +00:00
elif token is self.KEYWORD_ENDCMAP:
self._in_cmap = False
return
2013-11-07 08:35:04 +00:00
if not self._in_cmap:
return
#
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_DEF:
try:
2013-11-07 08:35:04 +00:00
((_, k), (_, v)) = self.pop(2)
self.cmap.set_attr(literal_name(k), v)
except PSSyntaxError:
pass
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_USECMAP:
try:
2013-11-07 08:35:04 +00:00
((_, cmapname),) = self.pop(1)
self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
except PSSyntaxError:
pass
except CMapDB.CMapNotFound:
pass
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_BEGINCODESPACERANGE:
self.popall()
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_ENDCODESPACERANGE:
self.popall()
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_BEGINCIDRANGE:
self.popall()
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_ENDCIDRANGE:
2013-11-07 08:35:04 +00:00
objs = [obj for (__, obj) in self.popall()]
for (s, e, cid) in choplist(3, objs):
if (not isinstance(s, str) or not isinstance(e, str) or
2013-11-07 08:35:04 +00:00
not isinstance(cid, int) or len(s) != len(e)):
continue
sprefix = s[:-4]
eprefix = e[:-4]
2013-11-07 08:35:04 +00:00
if sprefix != eprefix:
continue
svar = s[-4:]
evar = e[-4:]
s1 = nunpack(svar)
e1 = nunpack(evar)
vlen = len(svar)
2014-09-07 16:36:12 +00:00
for i in range(e1-s1+1):
2013-11-07 08:35:04 +00:00
x = sprefix+struct.pack('>L', s1+i)[-vlen:]
self.cmap.add_code2cid(x, cid+i)
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_BEGINCIDCHAR:
self.popall()
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_ENDCIDCHAR:
2013-11-07 08:35:04 +00:00
objs = [obj for (__, obj) in self.popall()]
for (cid, code) in choplist(2, objs):
if isinstance(code, str) and isinstance(cid, str):
self.cmap.add_code2cid(code, nunpack(cid))
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_BEGINBFRANGE:
self.popall()
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_ENDBFRANGE:
2013-11-07 08:35:04 +00:00
objs = [obj for (__, obj) in self.popall()]
for (s, e, code) in choplist(3, objs):
2014-09-07 16:41:04 +00:00
if (not isinstance(s, bytes) or not isinstance(e, bytes) or
2013-11-07 08:35:04 +00:00
len(s) != len(e)):
Enforce pep8 coding-style (#345) * Code Refractor: Use code-style enforcement #312 * Add flake8 to travis-ci * Remove python 2 3 comment on six library. 891 errors > 870 errors. * Remove class and functions comments that consist of just the name. 870 errors > 855 errors. * Fix flake8 errors in pdftypes.py. 855 errors > 833 errors. * Moving flake8 testing from .travis.yml to tox.ini to ensure local testing before commiting * Cleanup pdfinterp.py and add documentation from PDF Reference * Cleanup pdfpage.py * Cleanup pdffont.py * Clean psparser.py * Cleanup high_level.py * Cleanup layout.py * Cleanup pdfparser.py * Cleanup pdfcolor.py * Cleanup rijndael.py * Cleanup converter.py * Rename klass to cls if it is the class variable, to be more consistent with standard practice * Cleanup cmap.py * Cleanup pdfdevice.py * flake8 ignore fontmetrics.py * Cleanup test_pdfminer_psparser.py * Fix flake8 in pdfdocument.py; 339 errors to go * Fix flake8 utils.py; 326 errors togo * pep8 correction for few files in /tools/ 328 > 160 to go (#342) * pep8 correction for few files in /tools/ 328 > 160 to go * pep8 correction: 160 > 5 to go * Fix ascii85.py errors * Fix error in getting index from target that does not exists * Remove commented print lines * Fix flake8 error in pdfinterp.py * Fix python2 specific error by removing argument from print statement * Ignore invalid python2 syntax * Update contributing.md * Added changelog * Remove unused import Co-authored-by: Fakabbir Amin <f4amin@gmail.com>
2019-12-29 20:20:20 +00:00
continue
s1 = nunpack(s)
e1 = nunpack(e)
if isinstance(code, list):
2014-09-07 16:36:12 +00:00
for i in range(e1-s1+1):
self.cmap.add_cid2unichr(s1+i, code[i])
else:
var = code[-4:]
base = nunpack(var)
prefix = code[:-4]
vlen = len(var)
2014-09-07 16:36:12 +00:00
for i in range(e1-s1+1):
2013-11-07 08:35:04 +00:00
x = prefix+struct.pack('>L', base+i)[-vlen:]
self.cmap.add_cid2unichr(s1+i, x)
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_BEGINBFCHAR:
self.popall()
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_ENDBFCHAR:
2013-11-07 08:35:04 +00:00
objs = [obj for (__, obj) in self.popall()]
for (cid, code) in choplist(2, objs):
2014-09-07 16:41:04 +00:00
if isinstance(cid, bytes) and isinstance(code, bytes):
self.cmap.add_cid2unichr(nunpack(cid), code)
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_BEGINNOTDEFRANGE:
self.popall()
return
2014-06-30 10:15:21 +00:00
if token is self.KEYWORD_ENDNOTDEFRANGE:
self.popall()
return
self.push((pos, token))
return
2013-11-07 08:35:04 +00:00
def main(argv):
args = argv[1:]
for fname in args:
fp = open(fname, 'rb')
cmap = FileUnicodeMap()
CMapParser(cmap, fp).run()
fp.close()
cmap.dump()
return
2019-08-20 11:18:40 +00:00
2013-11-07 08:35:04 +00:00
if __name__ == '__main__':
sys.exit(main(sys.argv))