diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 83110e7..1681a8d 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -1,5 +1,3 @@ - - """ Adobe character mapping (CMap) support. CMaps provide the mapping between character codes and Unicode @@ -40,8 +38,6 @@ class CMapError(Exception): pass -## CMapBase -## class CMapBase(object): debug = 0 @@ -67,8 +63,6 @@ class CMapBase(object): return -## CMap -## class CMap(CMapBase): def __init__(self, **kwargs): @@ -119,8 +113,6 @@ class CMap(CMapBase): return -## IdentityCMap -## class IdentityCMap(CMapBase): def decode(self, code): @@ -140,8 +132,7 @@ class IdentityCMapByte(IdentityCMap): else: return () -## UnicodeMap -## + class UnicodeMap(CMapBase): def __init__(self, **kwargs): @@ -162,8 +153,6 @@ class UnicodeMap(CMapBase): return -## FileCMap -## class FileCMap(CMap): def add_code2cid(self, code, cid): @@ -182,8 +171,6 @@ class FileCMap(CMap): return -## FileUnicodeMap -## class FileUnicodeMap(UnicodeMap): def add_cid2unichr(self, cid, code): @@ -201,8 +188,6 @@ class FileUnicodeMap(UnicodeMap): return -## PyCMap -## class PyCMap(CMap): def __init__(self, name, module): @@ -213,8 +198,6 @@ class PyCMap(CMap): return -## PyUnicodeMap -## class PyUnicodeMap(UnicodeMap): def __init__(self, name, module, vertical): @@ -227,8 +210,6 @@ class PyUnicodeMap(UnicodeMap): return -## CMapDB -## class CMapDB(object): _cmap_cache = {} @@ -284,8 +265,6 @@ class CMapDB(object): return umaps[vertical] -## CMapParser -## class CMapParser(PSStackParser): def __init__(self, cmap, fp): @@ -373,7 +352,6 @@ class CMapParser(PSStackParser): s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) - #assert s1 <= e1, str((s1, e1)) for i in range(e1-s1+1): x = sprefix+struct.pack('>L', s1+i)[-vlen:] self.cmap.add_code2cid(x, cid+i) @@ -400,7 +378,6 @@ class CMapParser(PSStackParser): continue s1 = nunpack(s) e1 = nunpack(e) - #assert s1 <= e1, str((s1, e1)) if isinstance(code, list): for i in range(e1-s1+1): self.cmap.add_cid2unichr(s1+i, code[i]) @@ -435,17 +412,16 @@ class CMapParser(PSStackParser): return -# test def main(argv): args = argv[1:] for fname in args: fp = open(fname, 'rb') cmap = FileUnicodeMap() - #cmap = FileCMap() CMapParser(cmap, fp).run() fp.close() cmap.dump() return + if __name__ == '__main__': sys.exit(main(sys.argv)) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 4bfd6ed..e94b383 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -35,7 +35,6 @@ from .utils import nunpack log = logging.getLogger(__name__) - def get_widths(seq): widths = {} r = [] @@ -54,10 +53,6 @@ def get_widths(seq): widths[i] = w r = [] return widths -#assert get_widths([1]) == {} -#assert get_widths([1,2,3]) == {1:3, 2:3} -#assert get_widths([1,[2,3],6,[7,8]]) == {1:2,2:3, 6:7,7:8} - def get_widths2(seq): widths = {} @@ -77,13 +72,8 @@ def get_widths2(seq): widths[i] = (w, (vx, vy)) r = [] return widths -#assert get_widths2([1]) == {} -#assert get_widths2([1,2,3,4,5]) == {1:(3, (4,5)), 2:(3, (4,5))} -#assert get_widths2([1,[2,3,4,5],6,[7,8,9]]) == {1:(2, (3,4)), 6:(7, (8,9))} -## FontMetricsDB -## class FontMetricsDB(object): @classmethod @@ -91,8 +81,6 @@ class FontMetricsDB(object): return FONT_METRICS[fontname] -## Type1FontHeaderParser -## class Type1FontHeaderParser(PSStackParser): KEYWORD_BEGIN = KWD(b'begin') @@ -142,6 +130,10 @@ class Type1FontHeaderParser(PSStackParser): NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-', None, '-') +""" +Note: DLIdent-* isn't found in PDF Reference but is been kept as +it is harmless and have possibility of been a type. (induced from bug report/PR) +""" IDENTITY_ENCODER = {'Identity-H':'Identity-H', 'Identity-V':'Identity-V', 'DLIdent-H':'Identity-H', @@ -150,10 +142,6 @@ IDENTITY_ENCODER = {'Identity-H':'Identity-H', 'OneByteIdentityV':'OneByteIdentityV', } -## CFFFont -## (Format specified in Adobe Technical Note: #5176 -## "The Compact Font Format Specification") -## def getdict(data): d = {} fp = BytesIO(data) @@ -281,6 +269,7 @@ class CFFFont(object): 'Light', 'Medium', 'Regular', 'Roman', 'Semibold', ) + class INDEX(object): def __init__(self, fp): @@ -381,9 +370,6 @@ class CFFFont(object): assert False, str(('Unhandled', format)) else: raise ValueError('unsupported charset format: %r' % format) - #print self.code2gid - #print self.name2gid - #assert 0 return def getstr(self, sid): @@ -392,8 +378,6 @@ class CFFFont(object): return self.string_index[sid-len(self.STANDARD_STRINGS)] -## TrueTypeFont -## class TrueTypeFont(object): class CMapNotFound(Exception): @@ -479,8 +463,6 @@ class TrueTypeFont(object): return unicode_map -## Fonts -## class PDFFontError(PDFException): pass @@ -492,7 +474,6 @@ LITERAL_STANDARD_ENCODING = LIT('StandardEncoding') LITERAL_TYPE1C = LIT('Type1C') -# PDFFont class PDFFont(object): def __init__(self, descriptor, widths, default_width=None): @@ -557,7 +538,6 @@ class PDFFont(object): return sum(self.char_width(cid) for cid in self.decode(s)) -# PDFSimpleFont class PDFSimpleFont(PDFFont): def __init__(self, descriptor, widths, spec): @@ -594,7 +574,6 @@ class PDFSimpleFont(PDFFont): raise PDFUnicodeNotDefined(None, cid) -# PDFType1Font class PDFType1Font(PDFSimpleFont): def __init__(self, rsrcmgr, spec): @@ -626,14 +605,12 @@ class PDFType1Font(PDFSimpleFont): return '' % self.basefont -# PDFTrueTypeFont class PDFTrueTypeFont(PDFType1Font): def __repr__(self): return '' % self.basefont -# PDFType3Font class PDFType3Font(PDFSimpleFont): def __init__(self, rsrcmgr, spec): @@ -656,7 +633,6 @@ class PDFType3Font(PDFSimpleFont): return '' -# PDFCIDFont class PDFCIDFont(PDFFont): def __init__(self, rsrcmgr, spec, strict=settings.STRICT): @@ -721,9 +697,9 @@ class PDFCIDFont(PDFFont): """ For certain PDFs, Encoding Type isn't mentioned as an attribute of Encoding but as an attribute of CMapName, where CMapName is an - attribure of spec['Encoding']. - The horizaontal/vertical modes are mentioned with diffrent name - such as 'DLIdent-H/V','OneByteIdentityH/V','Identity-H/V' + attribute of spec['Encoding']. + The horizontal/vertical modes are mentioned with different name + such as 'DLIdent-H/V','OneByteIdentityH/V','Identity-H/V'. """ try: spec_encoding = spec['Encoding'] @@ -771,16 +747,14 @@ class PDFCIDFont(PDFFont): except KeyError: raise PDFUnicodeNotDefined(self.cidcoding, cid) - -# main def main(argv): for fname in argv[1:]: fp = open(fname, 'rb') - #font = TrueTypeFont(fname, fp) font = CFFFont(fname, fp) print (font) fp.close() return + if __name__ == '__main__': sys.exit(main(sys.argv))