Add lowercase adobe glyph name tests

2019-07-14 15:20:25 +02:00 · 2019-07-14 15:20:25 +02:00 · fdb7e54862
parent 33cc9861ae
commit fdb7e54862
1 changed files with 49 additions and 3 deletions
--- a/tests/test_encodingdb.py
+++ b/tests/test_encodingdb.py
@ -1,5 +1,8 @@
 """
 Tests based on the Adobe Glyph List Specification (https://github.com/adobe-type-tools/agl-specification#2-the-mapping)
+
+While not in the specification, lowercase unicode often occurs in pdf's. Therefore lowercase unittest variants are
+added.
 """
 from pdfminer.encodingdb import name2unicode

@ -14,14 +17,28 @@ def test_name2unicode_uni():
    assert u'\u013B' == name2unicode('uni013B')


+def test_name2unicode_uni_lowercase():
+    """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
+    assert u'\u013B' == name2unicode('uni013b')
+
+
 def test_name2unicode_uni_with_sequence_of_digits():
    """The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308"""
    assert u'\u20AC\u0308' == name2unicode('uni20AC0308')


+def test_name2unicode_uni_with_sequence_of_digits_lowercase():
+    """The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308"""
+    assert u'\u20AC\u0308' == name2unicode('uni20ac0308')
+
+
 def test_name2unicode_uni_empty_string():
-    """The name "uni20ac" has a single component, which is mapped to an empty string"""
-    assert u'' == name2unicode('uni20ac')
+    """The name "uni20ac" has a single component, which is mapped to a €.
+
+    According to the specification this should be mapped to an empty string, but we also want to support lowercase
+    hexadecimals
+    """
+    assert u'€' == name2unicode('uni20ac')


 def test_name2unicode_uni_empty_string_long():
@ -34,24 +51,53 @@ def test_name2unicode_uni_empty_string_long():
    assert u'' == name2unicode('uniD801DC0C')


+def test_name2unicode_uni_empty_string_long_lowercase():
+    """The name "uniD801DC0C" has a single component, which is mapped to an empty string
+
+    Neither D801 nor DC0C are in the appropriate set. This form cannot be used to map to the character which is
+    expressed as D801 DC0C in UTF-16, specifically U+1040C. This character can be correctly mapped by using the
+    glyph name "u1040C."""
+    assert u'' == name2unicode('uniD801DC0C')
+
+
 def test_name2unicode_uni_pua():
    """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB."""
    assert u'\uF6FB' == name2unicode('uniF6FB')


+def test_name2unicode_uni_pua_lowercase():
+    """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB."""
+    assert u'\uF6FB' == name2unicode('unif6fb')
+
+
 def test_name2unicode_u_with_4_digits():
    """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
    assert u'\u013B' == name2unicode('u013B')


+def test_name2unicode_u_with_4_digits_lowercase():
+    """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
+    assert u'\u013B' == name2unicode('u013b')
+
+
 def test_name2unicode_u_with_5_digits():
    """The name "u1040C" has a single component, which is mapped to the string U+1040C"""
    assert u'\U0001040C' == name2unicode('u1040C')


+def test_name2unicode_u_with_5_digits_lowercase():
+    """The name "u1040C" has a single component, which is mapped to the string U+1040C"""
+    assert u'\U0001040C' == name2unicode('u1040c')
+
+
 def test_name2unicode_multiple_components():
    """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the string U+013B U+20AC U+0308 U+1040C"""
-    assert u'\u013B\u20AC\U0001040C' == name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate')
+    assert u'\u013B\u20AC\u0308\U0001040C' == name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate')
+
+
+def test_name2unicode_multiple_components_lowercase():
+    """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the string U+013B U+20AC U+0308 U+1040C"""
+    assert u'\u013B\u20AC\u0308\U0001040C' == name2unicode('Lcommaaccent_uni20ac0308_u1040c.alternate')


 def test_name2unicode_foo():