From 1dbe9ff7e7a71b0eaad39963ea8514b88d26c7c6 Mon Sep 17 00:00:00 2001 From: Ashley Blackmore Date: Wed, 18 Feb 2015 18:35:53 +0100 Subject: [PATCH 1/4] Update setup.py Install missing pycrypto lib --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index c9962fe..51779e7 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,9 @@ PDF parser that can be used for other purposes instead of text analysis.''', author='Yusuke Shinyama', author_email='yusuke at cs dot nyu dot edu', url='http://euske.github.io/pdfminer/index.html', + install_requires=[ + 'pycrypto', + ], packages=[ 'pdfminer', ], From 9af4fe85e1427ec12be57d4ec7604a1973d26288 Mon Sep 17 00:00:00 2001 From: Pablo Castellano Date: Sun, 14 Jun 2015 17:01:03 +0200 Subject: [PATCH 2/4] README: Changed line about Python 3 support --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 30aa5db..82a0d35 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Features How to Install -------------- - * Install Python 2.6 or newer. (**Python 3 is not supported.**) + * Install Python 2.6 or newer. (**For Python 3 support have a look at [pdfminer.six](https://github.com/goulu/pdfminer)**). * Download the source code. * Unpack it. * Run `setup.py`: From 63c9378b8b2f9d9d09c4686cf654ef68294e7764 Mon Sep 17 00:00:00 2001 From: Ivan Pozdeev Date: Mon, 10 Aug 2015 03:14:51 +0300 Subject: [PATCH 3/4] make ValueError's descriptive --- pdfminer/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index b53c1c1..307c5e7 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -11,7 +11,7 @@ from sys import maxint as INF def apply_png_predictor(pred, colors, columns, bitspercomponent, data): if bitspercomponent != 8: # unsupported - raise ValueError(bitspercomponent) + raise ValueError("Unsupported `bitspercomponent': %d"%bitspercomponent) nbytes = colors*columns*bitspercomponent//8 i = 0 buf = b'' @@ -43,7 +43,7 @@ def apply_png_predictor(pred, colors, columns, bitspercomponent, data): line2 += chr(c) else: # unsupported - raise ValueError(ft) + raise ValueError("Unsupported predictor value: %d"%ft) buf += line2 line0 = line2 return buf From 63bb3caec28113354afb23739a400ea2f3a6aff1 Mon Sep 17 00:00:00 2001 From: lucanaso Date: Wed, 9 Dec 2015 16:47:32 +0100 Subject: [PATCH 4/4] Fixed for rendering non breaking spaces (cid:160) As stated in the PDF specification ISO 32000-1, table in Annex D.2 Latin Character Set and Encodings page 653 to 656 (available here: http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf): "The SPACE character shall also be encoded as 312 in MacRomanEncoding and as 240 in WinAnsiEncoding. This duplicate code shall signify a nonbreaking space; it shall be typographically the same as (U+003A) SPACE." The duplicate key was missing, therefore PDFMiner was returning the string "(cid:160)". This fix adds the duplicate key in latin_enc.py glyphlist.py does not need to be modified as it already contains a key for non breaking space https://github.com/lucanaso/pdfminer/blob/master/pdfminer/glyphlist.py#L2755. --- pdfminer/latin_enc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pdfminer/latin_enc.py b/pdfminer/latin_enc.py index 41d219c..52dadc1 100644 --- a/pdfminer/latin_enc.py +++ b/pdfminer/latin_enc.py @@ -162,6 +162,7 @@ ENCODING = [ ('mu', None, 181, 181, 181), ('multiply', None, None, 215, 215), ('n', 110, 110, 110, 110), + ('nbspace', None, 202, 160, None), ('nine', 57, 57, 57, 57), ('ntilde', None, 150, 241, 241), ('numbersign', 35, 35, 35, 35),