From 3f18a74e9cdc6398d3d5a6827175cb551b509316 Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Mon, 7 Sep 2009 14:25:15 +0000 Subject: [PATCH] fontsize now referring to bbox git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@131 1aa58f4a-7d42-0410-adbc-911cccaed67c --- pdfminer/converter.py | 10 +++++++--- pdfminer/layout.py | 17 ++++++++++++++--- pdfminer/pdffont.py | 7 ++++++- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 4bc08c8..e9c2d79 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -3,7 +3,7 @@ import sys from pdfminer.pdfdevice import PDFDevice, PDFTextDevice from pdfminer.pdffont import PDFUnicodeNotDefined from pdfminer.layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextItem, LTTextBox, LTTextLine -from pdfminer.utils import apply_matrix_pt, enc +from pdfminer.utils import apply_matrix_pt, mult_matrix, enc ## TagExtractor @@ -96,7 +96,7 @@ class PDFPageAggregator(PDFTextDevice): def begin_figure(self, name, bbox, matrix): self.stack.append(self.cur_item) - self.cur_item = LTFigure(name, bbox, matrix) + self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm)) return def end_figure(self, _): @@ -173,7 +173,7 @@ class SGMLConverter(PDFConverter): elif isinstance(item, LTRect): self.outfp.write('' % (item.linewidth, item.get_bbox())) elif isinstance(item, LTFigure): - self.outfp.write('
\n' % (item.id)) + self.outfp.write('
\n' % (item.id, item.get_bbox())) for child in item: render(child) self.outfp.write('
\n') @@ -259,6 +259,10 @@ class HTMLConverter(PDFConverter): self.write_rect('blue', 1, item.x0, self.yoffset-item.y1, item.width, item.height) for child in item: render(child) + elif isinstance(item, LTFigure): + self.write_rect('green', 1, item.x0, self.yoffset-item.y1, item.width, item.height) + for child in item: + render(child) return page = PDFConverter.end_page(self, page) render(page) diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 7ea0c8a..bbbda88 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import sys -from pdfminer.utils import apply_matrix_norm, bsearch +from pdfminer.utils import apply_matrix_norm, apply_matrix_pt, bsearch INF = sys.maxint @@ -271,7 +271,8 @@ class LTTextItem(LayoutItem, LTText): self.text = ''.join( char for (char,_) in chars ) adv = sum( font.char_width(cid) for (_,cid) in chars ) adv = (adv * fontsize + len(chars)*charspace) * scaling - size = (font.get_ascent() - font.get_descent()) * fontsize + #size = (font.get_ascent() - font.get_descent()) * fontsize + size = font.get_size() * fontsize if not self.vertical: # horizontal text self.vertical = False @@ -319,8 +320,18 @@ class LTTextItem(LayoutItem, LTText): class LTFigure(LayoutContainer): def __init__(self, id, bbox, matrix): - LayoutContainer.__init__(self, id, bbox) + (x,y,w,h) = bbox + x0 = y0 = INF + x1 = y1 = -INF + for (p,q) in ((x,y),(x+w,y),(x,y+h),(x+w,y+h)): + (p,q) = apply_matrix_pt(matrix, (p,q)) + x0 = min(x0, p) + x1 = max(x1, p) + y0 = min(y0, q) + y1 = max(y1, q) + bbox = (x0,y0,x1,y1) self.matrix = matrix + LayoutContainer.__init__(self, id, bbox) return def __repr__(self): diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 5cfa663..e737e40 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -330,6 +330,9 @@ class PDFFont(object): self.default_width = default_width or descriptor.get('MissingWidth', 0) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) + self.size = self.bbox[3]-self.bbox[1] + if self.size == 0: + self.size = self.ascent - self.descent self.hscale = self.vscale = .001 return @@ -349,7 +352,9 @@ class PDFFont(object): return self.ascent * self.vscale def get_descent(self): return self.descent * self.vscale - + def get_size(self): + return (self.bbox[3] - self.bbox[1]) * self.vscale + def char_width(self, cid): return self.widths.get(cid, self.default_width) * self.hscale