Change implementation of name2unicode such that it follows the Adobe Glyph specs (with allowing lowercase)

Pieter Marsman 2019-07-14 15:16:42 +02:00
parent 5d7ac7e88a
commit f0392f8049
1 changed files with 41 additions and 16 deletions

View File

@ -1,28 +1,53 @@
import re
from .psparser import PSLiteral
import six # Python 2+3 compatibility
from .glyphlist import glyphname2unicode
from .latin_enc import ENCODING
from .psparser import PSLiteral
import six # Python 2+3 compatibility
STRIP_NAME = re.compile(r'[0-9]+')
HEXADECIMAL = re.compile(r'[0-9a-fA-F]+')
## name2unicode
def name2unicode(name):
"""Converts Adobe glyph names to Unicode numbers."""
if name in glyphname2unicode:
return glyphname2unicode[name]
m =
if not m:
raise KeyError(name)
return six.unichr(int(
def name2unicode(name: str):
"""Converts Adobe glyph names to Unicode numbers.
:returns unicode character if name resembles something, empty string if not
full_stop = u'\u002E'
name = name.split(full_stop)[0]
components = name.split('_')
if len(components) > 1:
return ''.join(map(name2unicode, components))
if name in glyphname2unicode:
return glyphname2unicode.get(name)
elif name.startswith('uni'):
name_without_uni = name.strip('uni')
if HEXADECIMAL.match(name_without_uni) and len(name_without_uni) % 4 == 0:
unicode_digits = [int(name_without_uni[i:i + 4], base=16) for i in range(0, len(name_without_uni), 4)]
if any([55295 < digit < 57344 for digit in unicode_digits]):
return ''
characters = map(six.unichr, unicode_digits)
return ''.join(characters)
elif name.startswith('u'):
name_without_u = name.strip('u')
if HEXADECIMAL.match(name_without_u) and 4 <= len(name_without_u) <= 6:
unicode_digit = int(name_without_u, base=16)
if 55295 < unicode_digit < 57344:
return ''
return six.unichr(unicode_digit)
return ''
## EncodingDB
class EncodingDB(object):
std2unicode = {}