fontsize now referring to bbox

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@131 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2009-09-07 14:25:15 +00:00
parent 68e02b57af
commit 3f18a74e9c
3 changed files with 27 additions and 7 deletions

View File

@ -3,7 +3,7 @@ import sys
from pdfminer.pdfdevice import PDFDevice, PDFTextDevice from pdfminer.pdfdevice import PDFDevice, PDFTextDevice
from pdfminer.pdffont import PDFUnicodeNotDefined from pdfminer.pdffont import PDFUnicodeNotDefined
from pdfminer.layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextItem, LTTextBox, LTTextLine from pdfminer.layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextItem, LTTextBox, LTTextLine
from pdfminer.utils import apply_matrix_pt, enc from pdfminer.utils import apply_matrix_pt, mult_matrix, enc
## TagExtractor ## TagExtractor
@ -96,7 +96,7 @@ class PDFPageAggregator(PDFTextDevice):
def begin_figure(self, name, bbox, matrix): def begin_figure(self, name, bbox, matrix):
self.stack.append(self.cur_item) self.stack.append(self.cur_item)
self.cur_item = LTFigure(name, bbox, matrix) self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
return return
def end_figure(self, _): def end_figure(self, _):
@ -173,7 +173,7 @@ class SGMLConverter(PDFConverter):
elif isinstance(item, LTRect): elif isinstance(item, LTRect):
self.outfp.write('<rect linewidth="%d" bbox="%s" />' % (item.linewidth, item.get_bbox())) self.outfp.write('<rect linewidth="%d" bbox="%s" />' % (item.linewidth, item.get_bbox()))
elif isinstance(item, LTFigure): elif isinstance(item, LTFigure):
self.outfp.write('<figure id="%s">\n' % (item.id)) self.outfp.write('<figure id="%s" bbox="%s">\n' % (item.id, item.get_bbox()))
for child in item: for child in item:
render(child) render(child)
self.outfp.write('</figure>\n') self.outfp.write('</figure>\n')
@ -259,6 +259,10 @@ class HTMLConverter(PDFConverter):
self.write_rect('blue', 1, item.x0, self.yoffset-item.y1, item.width, item.height) self.write_rect('blue', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
for child in item: for child in item:
render(child) render(child)
elif isinstance(item, LTFigure):
self.write_rect('green', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
for child in item:
render(child)
return return
page = PDFConverter.end_page(self, page) page = PDFConverter.end_page(self, page)
render(page) render(page)

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
from pdfminer.utils import apply_matrix_norm, bsearch from pdfminer.utils import apply_matrix_norm, apply_matrix_pt, bsearch
INF = sys.maxint INF = sys.maxint
@ -271,7 +271,8 @@ class LTTextItem(LayoutItem, LTText):
self.text = ''.join( char for (char,_) in chars ) self.text = ''.join( char for (char,_) in chars )
adv = sum( font.char_width(cid) for (_,cid) in chars ) adv = sum( font.char_width(cid) for (_,cid) in chars )
adv = (adv * fontsize + len(chars)*charspace) * scaling adv = (adv * fontsize + len(chars)*charspace) * scaling
size = (font.get_ascent() - font.get_descent()) * fontsize #size = (font.get_ascent() - font.get_descent()) * fontsize
size = font.get_size() * fontsize
if not self.vertical: if not self.vertical:
# horizontal text # horizontal text
self.vertical = False self.vertical = False
@ -319,8 +320,18 @@ class LTTextItem(LayoutItem, LTText):
class LTFigure(LayoutContainer): class LTFigure(LayoutContainer):
def __init__(self, id, bbox, matrix): def __init__(self, id, bbox, matrix):
LayoutContainer.__init__(self, id, bbox) (x,y,w,h) = bbox
x0 = y0 = INF
x1 = y1 = -INF
for (p,q) in ((x,y),(x+w,y),(x,y+h),(x+w,y+h)):
(p,q) = apply_matrix_pt(matrix, (p,q))
x0 = min(x0, p)
x1 = max(x1, p)
y0 = min(y0, q)
y1 = max(y1, q)
bbox = (x0,y0,x1,y1)
self.matrix = matrix self.matrix = matrix
LayoutContainer.__init__(self, id, bbox)
return return
def __repr__(self): def __repr__(self):

View File

@ -330,6 +330,9 @@ class PDFFont(object):
self.default_width = default_width or descriptor.get('MissingWidth', 0) self.default_width = default_width or descriptor.get('MissingWidth', 0)
self.leading = num_value(descriptor.get('Leading', 0)) self.leading = num_value(descriptor.get('Leading', 0))
self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
self.size = self.bbox[3]-self.bbox[1]
if self.size == 0:
self.size = self.ascent - self.descent
self.hscale = self.vscale = .001 self.hscale = self.vscale = .001
return return
@ -349,6 +352,8 @@ class PDFFont(object):
return self.ascent * self.vscale return self.ascent * self.vscale
def get_descent(self): def get_descent(self):
return self.descent * self.vscale return self.descent * self.vscale
def get_size(self):
return (self.bbox[3] - self.bbox[1]) * self.vscale
def char_width(self, cid): def char_width(self, cid):
return self.widths.get(cid, self.default_width) * self.hscale return self.widths.get(cid, self.default_width) * self.hscale