Raise a `KeyError` with a useful message if `unicode2name()` does not match any glyph name. Use this message to log debug statements.

pull/263/head
Pieter Marsman 2019-07-16 08:52:24 +02:00
parent 0fb83366b6
commit 6f362f53fe
2 changed files with 24 additions and 10 deletions

View File

@ -9,6 +9,8 @@ from .psparser import PSLiteral
HEXADECIMAL = re.compile(r'[0-9a-fA-F]+') HEXADECIMAL = re.compile(r'[0-9a-fA-F]+')
log = logging.getLogger(__name__)
def name2unicode(name): def name2unicode(name):
"""Converts Adobe glyph names to Unicode numbers. """Converts Adobe glyph names to Unicode numbers.
@ -32,22 +34,32 @@ def name2unicode(name):
elif name.startswith('uni'): elif name.startswith('uni'):
name_without_uni = name.strip('uni') name_without_uni = name.strip('uni')
if HEXADECIMAL.match(name_without_uni) and len(name_without_uni) % 4 == 0: if HEXADECIMAL.match(name_without_uni) and len(name_without_uni) % 4 == 0:
unicode_digits = [int(name_without_uni[i:i + 4], base=16) for i in range(0, len(name_without_uni), 4)] unicode_digits = [int(name_without_uni[i:i + 4], base=16) for i in range(0, len(name_without_uni), 4)]
if any([55295 < digit < 57344 for digit in unicode_digits]): for digit in unicode_digits:
raise KeyError raise_key_error_for_invalid_unicode(digit)
characters = map(six.unichr, unicode_digits) characters = map(six.unichr, unicode_digits)
return ''.join(characters) return ''.join(characters)
elif name.startswith('u'): elif name.startswith('u'):
name_without_u = name.strip('u') name_without_u = name.strip('u')
if HEXADECIMAL.match(name_without_u) and 4 <= len(name_without_u) <= 6: if HEXADECIMAL.match(name_without_u) and 4 <= len(name_without_u) <= 6:
unicode_digit = int(name_without_u, base=16) unicode_digit = int(name_without_u, base=16)
if 55295 < unicode_digit < 57344: raise_key_error_for_invalid_unicode(unicode_digit)
raise KeyError
return six.unichr(unicode_digit) return six.unichr(unicode_digit)
raise KeyError raise KeyError('Could not convert unicode name "%s" to character because it does not match specification' % name)
def raise_key_error_for_invalid_unicode(unicode_digit):
"""Unicode values should not be in the range D800 through DFFF because that is used for surrogate pairs in UTF-16
:raises KeyError if unicode digit is invalid
"""
if 55295 < unicode_digit < 57344:
raise KeyError('Unicode digit %d is invalid because it is in the range D800 through DFFF' % unicode_digit)
class EncodingDB(object): class EncodingDB(object):
@ -86,7 +98,7 @@ class EncodingDB(object):
elif isinstance(x, PSLiteral): elif isinstance(x, PSLiteral):
try: try:
cid2unicode[cid] = name2unicode(x.name) cid2unicode[cid] = name2unicode(x.name)
except KeyError: except KeyError as e:
pass log.debug(str(e))
cid += 1 cid += 1
return cid2unicode return cid2unicode

View File

@ -1,4 +1,4 @@
import logging
import struct import struct
import sys import sys
from io import BytesIO from io import BytesIO
@ -31,6 +31,8 @@ from .utils import choplist
from .utils import isnumber from .utils import isnumber
from .utils import nunpack from .utils import nunpack
log = logging.getLogger(__name__)
def get_widths(seq): def get_widths(seq):
widths = {} widths = {}
@ -124,8 +126,8 @@ class Type1FontHeaderParser(PSStackParser):
break break
try: try:
self._cid2unicode[cid] = name2unicode(name) self._cid2unicode[cid] = name2unicode(name)
except KeyError: except KeyError as e:
pass log.debug(str(e))
return self._cid2unicode return self._cid2unicode
def do_keyword(self, pos, token): def do_keyword(self, pos, token):