commit
48b25939c2
|
@ -16,6 +16,7 @@ from . import settings
|
||||||
from .psparser import PSLiteral
|
from .psparser import PSLiteral
|
||||||
from .psparser import literal_name
|
from .psparser import literal_name
|
||||||
from .pdftypes import PDFException
|
from .pdftypes import PDFException
|
||||||
|
from .pdftypes import PDFStream
|
||||||
from .pdftypes import resolve1
|
from .pdftypes import resolve1
|
||||||
from .pdftypes import int_value
|
from .pdftypes import int_value
|
||||||
from .pdftypes import num_value
|
from .pdftypes import num_value
|
||||||
|
@ -127,7 +128,7 @@ class Type1FontHeaderParser(PSStackParser):
|
||||||
|
|
||||||
|
|
||||||
NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-', None, '-')
|
NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-', None, '-')
|
||||||
|
IDENTITY_ENCODER = ('Identity-H', 'Identity-V')
|
||||||
|
|
||||||
## CFFFont
|
## CFFFont
|
||||||
## (Format specified in Adobe Technical Note: #5176
|
## (Format specified in Adobe Technical Note: #5176
|
||||||
|
@ -648,18 +649,8 @@ class PDFCIDFont(PDFFont):
|
||||||
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
|
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
|
||||||
self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"),
|
self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"),
|
||||||
resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1"))
|
resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1"))
|
||||||
try:
|
self.cmap = self.get_cmap_from_spec(spec, strict)
|
||||||
name = literal_name(spec['Encoding'])
|
|
||||||
except KeyError:
|
|
||||||
if strict:
|
|
||||||
raise PDFFontError('Encoding is unspecified')
|
|
||||||
name = 'unknown'
|
|
||||||
try:
|
|
||||||
self.cmap = CMapDB.get_cmap(name)
|
|
||||||
except CMapDB.CMapNotFound as e:
|
|
||||||
if strict:
|
|
||||||
raise PDFFontError(e)
|
|
||||||
self.cmap = CMap()
|
|
||||||
try:
|
try:
|
||||||
descriptor = dict_value(spec['FontDescriptor'])
|
descriptor = dict_value(spec['FontDescriptor'])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
@ -706,6 +697,36 @@ class PDFCIDFont(PDFFont):
|
||||||
PDFFont.__init__(self, descriptor, widths, default_width=default_width)
|
PDFFont.__init__(self, descriptor, widths, default_width=default_width)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def get_cmap_from_spec(self, spec, strict):
|
||||||
|
"""
|
||||||
|
For certain PDFs, Encoding Type isn't mentioned as an attribute of
|
||||||
|
Encoding but as an attribute of CMapName, where CMapName is an
|
||||||
|
attribure of spec['Encoding'].
|
||||||
|
The horizaontal/vertical modes are mentioned with diffrent name
|
||||||
|
such as 'DLIdent-H/V','OneByteIdentityH/V','Identity-H/V'
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
spec_encoding = spec['Encoding']
|
||||||
|
if hasattr(spec_encoding, 'name'):
|
||||||
|
cmap_name = literal_name(spec['Encoding'])
|
||||||
|
else:
|
||||||
|
cmap_name = literal_name(spec_encoding['CMapName'])
|
||||||
|
except KeyError:
|
||||||
|
if strict:
|
||||||
|
raise PDFFontError('Encoding is unspecified')
|
||||||
|
cmap_name = 'unknown'
|
||||||
|
if type(cmap_name) is PDFStream:
|
||||||
|
if 'CMapName' in cmap_name:
|
||||||
|
cmap_name = cmap_name.get('CMapName').name
|
||||||
|
else:
|
||||||
|
if strict:
|
||||||
|
raise PDFFontError('CMapName unspecified for encoding')
|
||||||
|
cmap_name = 'unknown'
|
||||||
|
if cmap_name in IDENTITY_ENCODER:
|
||||||
|
return CMapDB.get_cmap(cmap_name)
|
||||||
|
else:
|
||||||
|
return CMap()
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<PDFCIDFont: basefont=%r, cidcoding=%r>' % (self.basefont, self.cidcoding)
|
return '<PDFCIDFont: basefont=%r, cidcoding=%r>' % (self.basefont, self.cidcoding)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import nose, logging, os
|
||||||
|
from pdfminer.cmapdb import IdentityCMap, CMap
|
||||||
|
from pdfminer.pdffont import PDFCIDFont
|
||||||
|
from pdfminer.pdftypes import PDFStream
|
||||||
|
from pdfminer.psparser import PSLiteral
|
||||||
|
|
||||||
|
class TestPDFEncoding():
|
||||||
|
|
||||||
|
def test_cmapname_onebyteidentityV(self):
|
||||||
|
stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityV')}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, CMap)
|
||||||
|
|
||||||
|
def test_cmapname_onebyteidentityH(self):
|
||||||
|
stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityH')}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, CMap)
|
||||||
|
|
||||||
|
def test_cmapname_V(self):
|
||||||
|
stream = PDFStream({'CMapName': PSLiteral('V')}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, CMap)
|
||||||
|
|
||||||
|
def test_cmapname_H(self):
|
||||||
|
stream = PDFStream({'CMapName': PSLiteral('H')}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, CMap)
|
||||||
|
|
||||||
|
def test_encoding_identityH(self):
|
||||||
|
spec = {'Encoding': PSLiteral('Identity-H')}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, IdentityCMap)
|
||||||
|
|
||||||
|
def test_encoding_identityV(self):
|
||||||
|
spec = {'Encoding': PSLiteral('Identity-V')}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, IdentityCMap)
|
||||||
|
|
||||||
|
def test_encoding_identityH_as_PSLiteral_stream(self):
|
||||||
|
stream = PDFStream({'CMapName':PSLiteral('Identity-H')}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, IdentityCMap)
|
||||||
|
|
||||||
|
def test_encoding_identityV_as_PSLiteral_stream(self):
|
||||||
|
stream = PDFStream({'CMapName':PSLiteral('Identity-V')}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, IdentityCMap)
|
||||||
|
|
||||||
|
def test_encoding_identityH_as_stream(self):
|
||||||
|
stream = PDFStream({'CMapName':'Identity-H'}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, IdentityCMap)
|
||||||
|
|
||||||
|
def test_encoding_identityV_as_stream(self):
|
||||||
|
stream = PDFStream({'CMapName':'Identity-V'}, '')
|
||||||
|
spec = {'Encoding': stream}
|
||||||
|
font = PDFCIDFont(None, spec)
|
||||||
|
assert isinstance(font.cmap, IdentityCMap)
|
||||||
|
|
||||||
|
def test_font_without_spec(self):
|
||||||
|
font = PDFCIDFont(None, {})
|
||||||
|
assert isinstance(font.cmap, CMap)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
nose.runmodule()
|
Loading…
Reference in New Issue