From 2ede124142a73175ae5dd4e4cf334bbbcc831a0a Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sat, 3 Nov 2018 22:52:29 -0700 Subject: [PATCH] Interpet font Descent as a negative number even if specified as positive The PDF RM specifies that Descent should be negative. Fonts that claim to have a positive Descent (not that it would make sense) always seem to be wrong about this claim. --- pdfminer/pdffont.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index feb8557..ea75b34 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -488,6 +488,13 @@ class PDFFont(object): self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0))) self.hscale = self.vscale = .001 + + # PDF RM 9.8.1 specifies /Descent should always be a negative number. + # PScript5.dll seems to produce Descent with a positive number, but + # text analysis will be wrong if this is taken as correct. So force + # descent to negative. + if self.descent > 0: + self.descent = -self.descent return def __repr__(self): @@ -503,9 +510,11 @@ class PDFFont(object): return bytearray(bytes) # map(ord, bytes) def get_ascent(self): + """Ascent above the baseline, in text space units""" return self.ascent * self.vscale def get_descent(self): + """Descent below the baseline, in text space units; always negative""" return self.descent * self.vscale def get_width(self):