From fa400431f571de9197ab72ffc5dd9f7d76826474 Mon Sep 17 00:00:00 2001 From: Fakabbir Amin Date: Wed, 17 Jul 2019 11:38:00 +0530 Subject: [PATCH] Adds Test, Removes Unnecessary Assumptions --- pdfminer/pdffont.py | 24 ++++------------- tests/test_pdfencoding.py | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 19 deletions(-) create mode 100644 tests/test_pdfencoding.py diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index f5b8942..a09c5c4 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -128,14 +128,7 @@ class Type1FontHeaderParser(PSStackParser): NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-', None, '-') -CMAP_ENCODER = { - 'DLIdent-H': 'Identity-H', - 'OneByteIdentityH': 'Identity-H', - 'Identity-H': 'Identity-H', - 'DLIdent-V': 'Identity-V', - 'OneByteIdentityV': 'Identity-V', - 'Identity-V': 'Identity-V' -} +IDENTITY_ENCODER = ('Identity-H', 'Identity-V') ## CFFFont ## (Format specified in Adobe Technical Note: #5176 @@ -724,21 +717,14 @@ class PDFCIDFont(PDFFont): cmap_name = 'unknown' if type(cmap_name) is PDFStream: if 'CMapName' in cmap_name: - cmap_key = cmap_name.get('CMapName').cmap_name - try: - cmap_name = CMAP_ENCODER[cmap_key] - except: - cmap_name = cmap_key - raise PDFFontError('Unidentified encoding mentioned. %s is not supported' % cmap_name) + cmap_name = cmap_name.get('CMapName').name else: if strict: - raise PDFFontError('Encoding is unspecified') + raise PDFFontError('CMapName unspecified for encoding') cmap_name = 'unknown' - try: + if cmap_name in IDENTITY_ENCODER: self.cmap = CMapDB.get_cmap(cmap_name) - except CMapDB.CMapNotFound as e: - if strict: - raise PDFFontError(e) + else: self.cmap = CMap() def __repr__(self): diff --git a/tests/test_pdfencoding.py b/tests/test_pdfencoding.py new file mode 100644 index 0000000..4725615 --- /dev/null +++ b/tests/test_pdfencoding.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- + +import nose, logging, os +from pdfminer.cmapdb import IdentityCMap, CMap +from pdfminer.pdffont import PDFCIDFont +from pdfminer.pdftypes import PDFStream +from pdfminer.psparser import PSLiteral + +# 'DLIdent-H': 'Identity-H', +# 'OneByteIdentityH': 'Identity-H', +# 'Identity-H': 'Identity-H', +# 'DLIdent-V': 'Identity-V', +# 'OneByteIdentityV': 'Identity-V', +# 'Identity-V': 'Identity-V' + +class TestPDFEncoding(): + + def test_cmapname_onebyteidentityV(self): + stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityV')}, '') + spec = {'Encoding': stream} + font = PDFCIDFont(None, spec) + assert isinstance(font.cmap, CMap) + + def test_cmapname_onebyteidentityH(self): + stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityH')}, '') + spec = {'Encoding': stream} + font = PDFCIDFont(None, spec) + assert isinstance(font.cmap, CMap) + + def test_cmapname_V(self): + stream = PDFStream({'CMapName': PSLiteral('V')}, '') + spec = {'Encoding': stream} + font = PDFCIDFont(None, spec) + assert isinstance(font.cmap, CMap) + + def test_cmapname_H(self): + stream = PDFStream({'CMapName': PSLiteral('H')}, '') + spec = {'Encoding': stream} + font = PDFCIDFont(None, spec) + assert isinstance(font.cmap, CMap) + + def test_encoding_identityH(self): + spec = {'Encoding': PSLiteral('Identity-H')} + font = PDFCIDFont(None, spec) + assert isinstance(font.cmap, IdentityCMap) + + def test_encoding_identityV(self): + spec = {'Encoding': PSLiteral('Identity-V')} + font = PDFCIDFont(None, spec) + assert isinstance(font.cmap, IdentityCMap) + + +if __name__ == '__main__': + nose.runmodule()