Add some (failing) unittests for name2unicode based on the examples in the Adobe Glyph List Specification
parent
127d587431
commit
ec5218a05f
|
@ -0,0 +1,69 @@
|
||||||
|
"""
|
||||||
|
Tests based on the Adobe Glyph List Specification (https://github.com/adobe-type-tools/agl-specification#2-the-mapping)
|
||||||
|
"""
|
||||||
|
from pdfminer.encodingdb import name2unicode
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_name_in_agl():
|
||||||
|
"""The name "Lcommaaccent" has a single component, which is mapped to the string U+013B by AGL"""
|
||||||
|
assert u'\u013B' == name2unicode('Lcommaaccent')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni():
|
||||||
|
"""The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
|
||||||
|
assert u'\u013B' == name2unicode('uni013B')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_with_sequence_of_digits():
|
||||||
|
"""The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308"""
|
||||||
|
assert u'\u20AC\u0308' == name2unicode('uni20AC0308')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_empty_string():
|
||||||
|
"""The name "uni20ac" has a single component, which is mapped to an empty string"""
|
||||||
|
assert u'' == name2unicode('uni20ac')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_empty_string_long():
|
||||||
|
"""The name "uniD801DC0C" has a single component, which is mapped to an empty string
|
||||||
|
|
||||||
|
Neither D801 nor DC0C are in the appropriate set. This form cannot be used to map to the character which is
|
||||||
|
expressed as D801 DC0C in UTF-16, specifically U+1040C. This character can be correctly mapped by using the
|
||||||
|
glyph name "u1040C.
|
||||||
|
"""
|
||||||
|
assert u'' == name2unicode('uniD801DC0C')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_uni_pua():
|
||||||
|
""""Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB."""
|
||||||
|
assert u'\uF6FB' == name2unicode('uniF6FB')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_u_with_4_digits():
|
||||||
|
"""The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B"""
|
||||||
|
assert u'\u013B' == name2unicode('u013B')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_u_with_5_digits():
|
||||||
|
"""The name "u1040C" has a single component, which is mapped to the string U+1040C"""
|
||||||
|
assert u'\U0001040C' == name2unicode('u1040C')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_multiple_components():
|
||||||
|
"""The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the string U+013B U+20AC U+0308 U+1040C"""
|
||||||
|
assert u'\u013B\u20AC\U0001040C' == name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_foo():
|
||||||
|
"""The name 'foo' maps to an empty string, because 'foo' is not in AGL, and because it does not start with a 'u.'"""
|
||||||
|
assert u'' == name2unicode('foo')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_notdef():
|
||||||
|
"""The name ".notdef" is reduced to an empty string (step 1) and mapped to an empty string (step 3)"""
|
||||||
|
assert u'' == name2unicode('.notdef')
|
||||||
|
|
||||||
|
|
||||||
|
def test_name2unicode_pua_ogoneksmall():
|
||||||
|
""""Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB."""
|
||||||
|
assert u'\uF6FB' == name2unicode('Ogoneksmall')
|
Loading…
Reference in New Issue