fontsize now referring to bbox
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@131 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
68e02b57af
commit
3f18a74e9c
|
@ -3,7 +3,7 @@ import sys
|
||||||
from pdfminer.pdfdevice import PDFDevice, PDFTextDevice
|
from pdfminer.pdfdevice import PDFDevice, PDFTextDevice
|
||||||
from pdfminer.pdffont import PDFUnicodeNotDefined
|
from pdfminer.pdffont import PDFUnicodeNotDefined
|
||||||
from pdfminer.layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextItem, LTTextBox, LTTextLine
|
from pdfminer.layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextItem, LTTextBox, LTTextLine
|
||||||
from pdfminer.utils import apply_matrix_pt, enc
|
from pdfminer.utils import apply_matrix_pt, mult_matrix, enc
|
||||||
|
|
||||||
|
|
||||||
## TagExtractor
|
## TagExtractor
|
||||||
|
@ -96,7 +96,7 @@ class PDFPageAggregator(PDFTextDevice):
|
||||||
|
|
||||||
def begin_figure(self, name, bbox, matrix):
|
def begin_figure(self, name, bbox, matrix):
|
||||||
self.stack.append(self.cur_item)
|
self.stack.append(self.cur_item)
|
||||||
self.cur_item = LTFigure(name, bbox, matrix)
|
self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
|
||||||
return
|
return
|
||||||
|
|
||||||
def end_figure(self, _):
|
def end_figure(self, _):
|
||||||
|
@ -173,7 +173,7 @@ class SGMLConverter(PDFConverter):
|
||||||
elif isinstance(item, LTRect):
|
elif isinstance(item, LTRect):
|
||||||
self.outfp.write('<rect linewidth="%d" bbox="%s" />' % (item.linewidth, item.get_bbox()))
|
self.outfp.write('<rect linewidth="%d" bbox="%s" />' % (item.linewidth, item.get_bbox()))
|
||||||
elif isinstance(item, LTFigure):
|
elif isinstance(item, LTFigure):
|
||||||
self.outfp.write('<figure id="%s">\n' % (item.id))
|
self.outfp.write('<figure id="%s" bbox="%s">\n' % (item.id, item.get_bbox()))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
self.outfp.write('</figure>\n')
|
self.outfp.write('</figure>\n')
|
||||||
|
@ -259,6 +259,10 @@ class HTMLConverter(PDFConverter):
|
||||||
self.write_rect('blue', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
|
self.write_rect('blue', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
|
elif isinstance(item, LTFigure):
|
||||||
|
self.write_rect('green', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
|
||||||
|
for child in item:
|
||||||
|
render(child)
|
||||||
return
|
return
|
||||||
page = PDFConverter.end_page(self, page)
|
page = PDFConverter.end_page(self, page)
|
||||||
render(page)
|
render(page)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys
|
import sys
|
||||||
from pdfminer.utils import apply_matrix_norm, bsearch
|
from pdfminer.utils import apply_matrix_norm, apply_matrix_pt, bsearch
|
||||||
INF = sys.maxint
|
INF = sys.maxint
|
||||||
|
|
||||||
|
|
||||||
|
@ -271,7 +271,8 @@ class LTTextItem(LayoutItem, LTText):
|
||||||
self.text = ''.join( char for (char,_) in chars )
|
self.text = ''.join( char for (char,_) in chars )
|
||||||
adv = sum( font.char_width(cid) for (_,cid) in chars )
|
adv = sum( font.char_width(cid) for (_,cid) in chars )
|
||||||
adv = (adv * fontsize + len(chars)*charspace) * scaling
|
adv = (adv * fontsize + len(chars)*charspace) * scaling
|
||||||
size = (font.get_ascent() - font.get_descent()) * fontsize
|
#size = (font.get_ascent() - font.get_descent()) * fontsize
|
||||||
|
size = font.get_size() * fontsize
|
||||||
if not self.vertical:
|
if not self.vertical:
|
||||||
# horizontal text
|
# horizontal text
|
||||||
self.vertical = False
|
self.vertical = False
|
||||||
|
@ -319,8 +320,18 @@ class LTTextItem(LayoutItem, LTText):
|
||||||
class LTFigure(LayoutContainer):
|
class LTFigure(LayoutContainer):
|
||||||
|
|
||||||
def __init__(self, id, bbox, matrix):
|
def __init__(self, id, bbox, matrix):
|
||||||
LayoutContainer.__init__(self, id, bbox)
|
(x,y,w,h) = bbox
|
||||||
|
x0 = y0 = INF
|
||||||
|
x1 = y1 = -INF
|
||||||
|
for (p,q) in ((x,y),(x+w,y),(x,y+h),(x+w,y+h)):
|
||||||
|
(p,q) = apply_matrix_pt(matrix, (p,q))
|
||||||
|
x0 = min(x0, p)
|
||||||
|
x1 = max(x1, p)
|
||||||
|
y0 = min(y0, q)
|
||||||
|
y1 = max(y1, q)
|
||||||
|
bbox = (x0,y0,x1,y1)
|
||||||
self.matrix = matrix
|
self.matrix = matrix
|
||||||
|
LayoutContainer.__init__(self, id, bbox)
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
|
@ -330,6 +330,9 @@ class PDFFont(object):
|
||||||
self.default_width = default_width or descriptor.get('MissingWidth', 0)
|
self.default_width = default_width or descriptor.get('MissingWidth', 0)
|
||||||
self.leading = num_value(descriptor.get('Leading', 0))
|
self.leading = num_value(descriptor.get('Leading', 0))
|
||||||
self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
|
self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
|
||||||
|
self.size = self.bbox[3]-self.bbox[1]
|
||||||
|
if self.size == 0:
|
||||||
|
self.size = self.ascent - self.descent
|
||||||
self.hscale = self.vscale = .001
|
self.hscale = self.vscale = .001
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -349,6 +352,8 @@ class PDFFont(object):
|
||||||
return self.ascent * self.vscale
|
return self.ascent * self.vscale
|
||||||
def get_descent(self):
|
def get_descent(self):
|
||||||
return self.descent * self.vscale
|
return self.descent * self.vscale
|
||||||
|
def get_size(self):
|
||||||
|
return (self.bbox[3] - self.bbox[1]) * self.vscale
|
||||||
|
|
||||||
def char_width(self, cid):
|
def char_width(self, cid):
|
||||||
return self.widths.get(cid, self.default_width) * self.hscale
|
return self.widths.get(cid, self.default_width) * self.hscale
|
||||||
|
|
Loading…
Reference in New Issue