Catch ValueError when converting font encoding differences to characters (#389)

* Catch ValueError when calling `name2unicode` when a unicode value cannot be parsed

* Add test for catching ValueError and KeyError when font encoding differences are invalid

* Added line to CHANGELOG.md
pull/393/head
Pieter Marsman 2020-03-16 20:12:45 +01:00 committed by GitHub
parent a087d6dfc8
commit 9d7fe2d9ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 14 additions and 2 deletions

View File

@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [Unreleased] ## [Unreleased]
### Fixed ### Fixed
- Ignore ValueError when converting font encoding differences ([#389](https://github.com/pdfminer/pdfminer.six/pull/389))
- Grouping of text lines outside of parent container bounding box ([#386](https://github.com/pdfminer/pdfminer.six/pull/386)) - Grouping of text lines outside of parent container bounding box ([#386](https://github.com/pdfminer/pdfminer.six/pull/386))
## [20200124] - 2020-01-24 ## [20200124] - 2020-01-24

View File

@ -106,7 +106,7 @@ class EncodingDB:
elif isinstance(x, PSLiteral): elif isinstance(x, PSLiteral):
try: try:
cid2unicode[cid] = name2unicode(x.name) cid2unicode[cid] = name2unicode(x.name)
except KeyError as e: except (KeyError, ValueError) as e:
log.debug(str(e)) log.debug(str(e))
cid += 1 cid += 1
return cid2unicode return cid2unicode

View File

@ -6,7 +6,8 @@ Therefore lowercase unittest variants are added.
""" """
from nose.tools import assert_raises from nose.tools import assert_raises
from pdfminer.encodingdb import name2unicode from pdfminer.encodingdb import name2unicode, EncodingDB
from pdfminer.psparser import PSLiteral
def test_name2unicode_name_in_agl(): def test_name2unicode_name_in_agl():
@ -145,3 +146,12 @@ def test_name2unicode_pua_ogoneksmall():
def test_name2unicode_overflow_error(): def test_name2unicode_overflow_error():
assert_raises(KeyError, name2unicode, '226215240241240240240240') assert_raises(KeyError, name2unicode, '226215240241240240240240')
def test_get_encoding_with_invalid_differences():
"""Invalid differences should be silently ignored
Regression test for https://github.com/pdfminer/pdfminer.six/issues/385
"""
invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')]
EncodingDB.get_encoding('StandardEncoding', invalid_differences)