From 63bb3caec28113354afb23739a400ea2f3a6aff1 Mon Sep 17 00:00:00 2001 From: lucanaso Date: Wed, 9 Dec 2015 16:47:32 +0100 Subject: [PATCH] Fixed for rendering non breaking spaces (cid:160) As stated in the PDF specification ISO 32000-1, table in Annex D.2 Latin Character Set and Encodings page 653 to 656 (available here: http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf): "The SPACE character shall also be encoded as 312 in MacRomanEncoding and as 240 in WinAnsiEncoding. This duplicate code shall signify a nonbreaking space; it shall be typographically the same as (U+003A) SPACE." The duplicate key was missing, therefore PDFMiner was returning the string "(cid:160)". This fix adds the duplicate key in latin_enc.py glyphlist.py does not need to be modified as it already contains a key for non breaking space https://github.com/lucanaso/pdfminer/blob/master/pdfminer/glyphlist.py#L2755. --- pdfminer/latin_enc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pdfminer/latin_enc.py b/pdfminer/latin_enc.py index 41d219c..52dadc1 100644 --- a/pdfminer/latin_enc.py +++ b/pdfminer/latin_enc.py @@ -162,6 +162,7 @@ ENCODING = [ ('mu', None, 181, 181, 181), ('multiply', None, None, 215, 215), ('n', 110, 110, 110, 110), + ('nbspace', None, 202, 160, None), ('nine', 57, 57, 57, 57), ('ntilde', None, 150, 241, 241), ('numbersign', 35, 35, 35, 35),