Catch ValueError when converting font encoding differences to characters (#389)

* Catch ValueError when calling `name2unicode` when a unicode value cannot be parsed * Add test for catching ValueError and KeyError when font encoding differences are invalid * Added line to CHANGELOG.md
2020-03-16 20:12:45 +01:00 · 2020-03-16 20:12:45 +01:00 · 9d7fe2d9ee
parent a087d6dfc8
commit 9d7fe2d9ee
3 changed files with 14 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## [Unreleased]
 ### Fixed
 - Ignore ValueError when converting font encoding differences ([#389](https://github.com/pdfminer/pdfminer.six/pull/389))
 - Grouping of text lines outside of parent container bounding box ([#386](https://github.com/pdfminer/pdfminer.six/pull/386))
 ## [20200124] - 2020-01-24
--- a/pdfminer/encodingdb.py
+++ b/pdfminer/encodingdb.py
@ -106,7 +106,7 @@ class EncodingDB:
                elif isinstance(x, PSLiteral):
                    try:
                        cid2unicode[cid] = name2unicode(x.name)
-                    except KeyError as e:
+                    except (KeyError, ValueError) as e:
                        log.debug(str(e))
                    cid += 1
        return cid2unicode
--- a/tests/test_encodingdb.py
+++ b/tests/test_encodingdb.py
@ -6,7 +6,8 @@ Therefore lowercase unittest variants are added.
 """
 from nose.tools import assert_raises
-from pdfminer.encodingdb import name2unicode
+from pdfminer.encodingdb import name2unicode, EncodingDB
 from pdfminer.psparser import PSLiteral
 def test_name2unicode_name_in_agl():
@ -145,3 +146,12 @@ def test_name2unicode_pua_ogoneksmall():
 def test_name2unicode_overflow_error():
    assert_raises(KeyError, name2unicode, '226215240241240240240240')
 def test_get_encoding_with_invalid_differences():
    """Invalid differences should be silently ignored
    Regression test for https://github.com/pdfminer/pdfminer.six/issues/385
    """
    invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')]
    EncodingDB.get_encoding('StandardEncoding', invalid_differences)