html layout output fix
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@267 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
1904b61355
commit
edbd3764a7
|
@ -189,6 +189,20 @@ class TextConverter(PDFConverter):
|
||||||
##
|
##
|
||||||
class HTMLConverter(PDFConverter):
|
class HTMLConverter(PDFConverter):
|
||||||
|
|
||||||
|
RECT_COLORS = {
|
||||||
|
'char': 'green',
|
||||||
|
'figure': 'yellow',
|
||||||
|
'textline': 'magenta',
|
||||||
|
'polygon': 'black',
|
||||||
|
'textbox': 'cyan',
|
||||||
|
'textgroup': 'red',
|
||||||
|
'page': 'gray',
|
||||||
|
}
|
||||||
|
TEXT_COLORS = {
|
||||||
|
'char': 'black',
|
||||||
|
'textbox': 'black',
|
||||||
|
}
|
||||||
|
|
||||||
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
|
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
|
||||||
scale=1, showpageno=True, pagepad=50, outdir=None):
|
scale=1, showpageno=True, pagepad=50, outdir=None):
|
||||||
PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
|
PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
|
||||||
|
@ -204,25 +218,29 @@ class HTMLConverter(PDFConverter):
|
||||||
return
|
return
|
||||||
|
|
||||||
def write_rect(self, color, width, x, y, w, h):
|
def write_rect(self, color, width, x, y, w, h):
|
||||||
self.outfp.write('<span style="position:absolute; border: %s %dpx solid; '
|
color = self.RECT_COLORS.get(color)
|
||||||
'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' %
|
if color is not None:
|
||||||
(color, width,
|
self.outfp.write('<span style="position:absolute; border: %s %dpx solid; '
|
||||||
x*self.scale, (self.yoffset-y)*self.scale,
|
'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' %
|
||||||
w*self.scale, h*self.scale))
|
(color, width,
|
||||||
|
x*self.scale, (self.yoffset-y)*self.scale,
|
||||||
|
w*self.scale, h*self.scale))
|
||||||
return
|
return
|
||||||
|
|
||||||
def write_text(self, text, x, y, size):
|
def write_text(self, color, text, x, y, size):
|
||||||
self.outfp.write('<span style="position:absolute; left:%dpx; top:%dpx; font-size:%dpx;">' %
|
color = self.TEXT_COLORS.get(color)
|
||||||
(x*self.scale, (self.yoffset-y)*self.scale, size*self.scale))
|
if color is not None:
|
||||||
self.write(text)
|
self.outfp.write('<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;">' %
|
||||||
self.outfp.write('</span>\n')
|
(color, x*self.scale, (self.yoffset-y)*self.scale, size*self.scale))
|
||||||
|
self.write(text)
|
||||||
|
self.outfp.write('</span>\n')
|
||||||
return
|
return
|
||||||
|
|
||||||
def receive_layout(self, ltpage):
|
def receive_layout(self, ltpage):
|
||||||
def render(item):
|
def render(item):
|
||||||
if isinstance(item, LTPage):
|
if isinstance(item, LTPage):
|
||||||
self.yoffset += item.y1
|
self.yoffset += item.y1
|
||||||
self.write_rect('gray', 1, item.x0, item.y1, item.width, item.height)
|
self.write_rect('page', 1, item.x0, item.y1, item.width, item.height)
|
||||||
if self.showpageno:
|
if self.showpageno:
|
||||||
self.outfp.write('<div style="position:absolute; top:%dpx;">' %
|
self.outfp.write('<div style="position:absolute; top:%dpx;">' %
|
||||||
((self.yoffset-item.y1)*self.scale))
|
((self.yoffset-item.y1)*self.scale))
|
||||||
|
@ -230,23 +248,21 @@ class HTMLConverter(PDFConverter):
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
elif isinstance(item, LTChar):
|
elif isinstance(item, LTChar):
|
||||||
self.write_text(item.text, item.x0, item.y1, item.get_size())
|
self.write_rect('char', 1, item.x0, item.y1, item.width, item.height)
|
||||||
if self.debug:
|
self.write_text('char', item.text, item.x0, item.y1, item.size)
|
||||||
self.write_rect('green', 1, item.x0, item.y1, item.width, item.height)
|
|
||||||
elif isinstance(item, LTPolygon):
|
elif isinstance(item, LTPolygon):
|
||||||
self.write_rect('black', 1, item.x0, item.y1, item.width, item.height)
|
self.write_rect('polygon', 1, item.x0, item.y1, item.width, item.height)
|
||||||
elif isinstance(item, LTTextLine):
|
elif isinstance(item, LTTextLine):
|
||||||
self.write_rect('magenta', 1, item.x0, item.y1, item.width, item.height)
|
self.write_rect('textline', 1, item.x0, item.y1, item.width, item.height)
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
elif isinstance(item, LTTextBox):
|
elif isinstance(item, LTTextBox):
|
||||||
self.write_rect('cyan', 1, item.x0, item.y1, item.width, item.height)
|
self.write_rect('textbox', 1, item.x0, item.y1, item.width, item.height)
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
if self.debug:
|
self.write_text('textbox', str(item.index+1), item.x0, item.y1, 20)
|
||||||
self.write_text(str(item.index+1), item.x0, item.y1, 20)
|
|
||||||
elif isinstance(item, LTFigure):
|
elif isinstance(item, LTFigure):
|
||||||
self.write_rect('yellow', 1, item.x0, item.y1, item.width, item.height)
|
self.write_rect('figure', 1, item.x0, item.y1, item.width, item.height)
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
elif isinstance(item, LTImage):
|
elif isinstance(item, LTImage):
|
||||||
|
@ -262,7 +278,7 @@ class HTMLConverter(PDFConverter):
|
||||||
if self.debug and ltpage.layout:
|
if self.debug and ltpage.layout:
|
||||||
def show_layout(item):
|
def show_layout(item):
|
||||||
if isinstance(item, LTTextGroup):
|
if isinstance(item, LTTextGroup):
|
||||||
self.write_rect('red', 1, item.x0, item.y1, item.width, item.height)
|
self.write_rect('textgroup', 1, item.x0, item.y1, item.width, item.height)
|
||||||
for child in item:
|
for child in item:
|
||||||
show_layout(child)
|
show_layout(child)
|
||||||
return
|
return
|
||||||
|
@ -326,7 +342,7 @@ class XMLConverter(PDFConverter):
|
||||||
self.outfp.write('</textbox>\n')
|
self.outfp.write('</textbox>\n')
|
||||||
elif isinstance(item, LTChar):
|
elif isinstance(item, LTChar):
|
||||||
self.outfp.write('<text font="%s" bbox="%s" size="%.3f">' %
|
self.outfp.write('<text font="%s" bbox="%s" size="%.3f">' %
|
||||||
(enc(item.font.fontname), bbox2str(item.bbox), item.get_size()))
|
(enc(item.font.fontname), bbox2str(item.bbox), item.size))
|
||||||
self.write(item.text)
|
self.write(item.text)
|
||||||
self.outfp.write('</text>\n')
|
self.outfp.write('</text>\n')
|
||||||
elif isinstance(item, LTText):
|
elif isinstance(item, LTText):
|
||||||
|
|
|
@ -225,6 +225,10 @@ class LTChar(LTItem, LTText):
|
||||||
bbox = (apply_matrix_pt(self.matrix, bll) +
|
bbox = (apply_matrix_pt(self.matrix, bll) +
|
||||||
apply_matrix_pt(self.matrix, bur))
|
apply_matrix_pt(self.matrix, bur))
|
||||||
LTItem.__init__(self, bbox)
|
LTItem.__init__(self, bbox)
|
||||||
|
if self.font.is_vertical():
|
||||||
|
self.size = self.width
|
||||||
|
else:
|
||||||
|
self.size = self.height
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
@ -236,9 +240,6 @@ class LTChar(LTItem, LTText):
|
||||||
else:
|
else:
|
||||||
return '<char %r>' % self.text
|
return '<char %r>' % self.text
|
||||||
|
|
||||||
def get_size(self):
|
|
||||||
return max(self.width, self.height)
|
|
||||||
|
|
||||||
def is_compatible(self, obj):
|
def is_compatible(self, obj):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue