diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py index b4b449c..10e1008 100644 --- a/pdfminer/glyphlist.py +++ b/pdfminer/glyphlist.py @@ -7,7 +7,7 @@ Unicode characters instead of using decimal/hex character code. The following data was taken by - $ wget http://www.adobe.com/devnet/opentype/archives/glyphlist.txt + $ wget https://partners.adobe.com/public/developer/en/opentype/glyphlist.txt $ python tools/conv_glyphlist.py glyphlist.txt > glyphlist.py """ diff --git a/pdfminer/latin_enc.py b/pdfminer/latin_enc.py index 41d219c..bb0c1eb 100644 --- a/pdfminer/latin_enc.py +++ b/pdfminer/latin_enc.py @@ -162,6 +162,7 @@ ENCODING = [ ('mu', None, 181, 181, 181), ('multiply', None, None, 215, 215), ('n', 110, 110, 110, 110), + ('nbspace', None, 202, 160, None), ('nine', 57, 57, 57, 57), ('ntilde', None, 150, 241, 241), ('numbersign', 35, 35, 35, 35), diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index caad157..54acfaa 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -229,7 +229,7 @@ class PDFStream(PDFObject): if not isinstance(filters, list): filters = [filters] if not isinstance(params, list): - params = [params] + params = [params] * len(filters) return zip(filters, params) def decode(self): diff --git a/pdfminer/utils.py b/pdfminer/utils.py index e5bd6bf..e2638d4 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -46,8 +46,9 @@ def compatible_encode_method(bytesorstring, encoding='utf-8', erraction='ignore' def apply_png_predictor(pred, colors, columns, bitspercomponent, data): if bitspercomponent != 8: # unsupported - raise ValueError(bitspercomponent) - nbytes = colors*columns*bitspercomponent//8 + raise ValueError("Unsupported `bitspercomponent': %d" % + bitspercomponent) + nbytes = colors * columns * bitspercomponent // 8 i = 0 buf = b'' line0 = b'\x00' * columns @@ -86,7 +87,7 @@ def apply_png_predictor(pred, colors, columns, bitspercomponent, data): line2 += six.int2byte(c) else: # unsupported - raise ValueError(ft) + raise ValueError("Unsupported predictor value: %d" % ft) buf += line2 line0 = line2 return buf diff --git a/setup.py b/setup.py index 7f465a5..2859607 100644 --- a/setup.py +++ b/setup.py @@ -4,15 +4,19 @@ from setuptools import setup from pdfminer import __version__ import sys +requires = ['six', 'pycrypto'] +if sys.version_info >= (3, 0): + requires.append('chardet') + setup( name='pdfminer.six', version=__version__, - packages=['pdfminer',], + packages=['pdfminer'], package_data={'pdfminer': ['cmap/*.pickle.gz']}, - install_requires=['six', 'chardet'] if sys.version_info >= (3, 0) else ['six'], + install_requires=requires, description='PDF parser and analyzer', long_description='''fork of PDFMiner using six for Python 2+3 compatibility - + PDFMiner is a tool for extracting information from PDF documents. Unlike other PDF-related tools, it focuses entirely on getting and analyzing text data. PDFMiner allows to obtain