code cleanup and more debugging options

pull/1/head
Yusuke Shinyama 2011-02-14 23:42:05 +09:00
parent f00f1dbd04
commit 7dbb664db3
2 changed files with 25 additions and 19 deletions

View File

@ -199,21 +199,24 @@ class HTMLConverter(PDFConverter):
RECT_COLORS = { RECT_COLORS = {
#'char': 'green', #'char': 'green',
#'figure': 'yellow', 'figure': 'yellow',
#'textline': 'magenta', 'textline': 'magenta',
#'textbox': 'cyan', 'textbox': 'cyan',
#'textgroup': 'red', 'textgroup': 'red',
'polygon': 'black', 'polygon': 'black',
'page': 'gray', 'page': 'gray',
} }
TEXT_COLORS = { TEXT_COLORS = {
#'textbox': 'blue', 'textbox': 'blue',
'char': 'black', 'char': 'black',
} }
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
scale=1, fontscale=0.7, layoutmode='normal', showpageno=True, pagemargin=50, scale=1, fontscale=0.7, layoutmode='normal', showpageno=True,
outdir=None): pagemargin=50, outdir=None,
rect_colors={'polygon':'black', 'page':'gray'},
text_colors={'char':'black'}):
PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams) PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
self.scale = scale self.scale = scale
self.fontscale = fontscale self.fontscale = fontscale
@ -221,9 +224,12 @@ class HTMLConverter(PDFConverter):
self.showpageno = showpageno self.showpageno = showpageno
self.pagemargin = pagemargin self.pagemargin = pagemargin
self.outdir = outdir self.outdir = outdir
self.yoffset = self.pagemargin self.rect_colors = rect_colors
self.rect_colors = self.RECT_COLORS self.text_colors = text_colors
self.text_colors = self.TEXT_COLORS if self.debug:
self.rect_colors.update(self.RECT_COLORS)
self.text_colors.update(self.TEXT_COLORS)
self._yoffset = self.pagemargin
self._font = None self._font = None
self._fontstack = [] self._fontstack = []
self.write_header() self.write_header()
@ -255,7 +261,7 @@ class HTMLConverter(PDFConverter):
self.write('<span style="position:absolute; border: %s %dpx solid; ' self.write('<span style="position:absolute; border: %s %dpx solid; '
'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % 'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' %
(color, borderwidth, (color, borderwidth,
x*self.scale, (self.yoffset-y)*self.scale, x*self.scale, (self._yoffset-y)*self.scale,
w*self.scale, h*self.scale)) w*self.scale, h*self.scale))
return return
@ -269,7 +275,7 @@ class HTMLConverter(PDFConverter):
self.write('<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" ' self.write('<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" '
'width="%d" height="%d" />\n' % 'width="%d" height="%d" />\n' %
(enc(name), borderwidth, (enc(name), borderwidth,
x*self.scale, (self.yoffset-y)*self.scale, x*self.scale, (self._yoffset-y)*self.scale,
w*self.scale, h*self.scale)) w*self.scale, h*self.scale))
return return
@ -277,7 +283,7 @@ class HTMLConverter(PDFConverter):
color = self.text_colors.get(color) color = self.text_colors.get(color)
if color is not None: if color is not None:
self.write('<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;">' % self.write('<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;">' %
(color, x*self.scale, (self.yoffset-y)*self.scale, size*self.scale*self.fontscale)) (color, x*self.scale, (self._yoffset-y)*self.scale, size*self.scale*self.fontscale))
self.write_text(text) self.write_text(text)
self.write('</span>\n') self.write('</span>\n')
return return
@ -288,7 +294,7 @@ class HTMLConverter(PDFConverter):
self.write('<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; ' self.write('<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; '
'left:%dpx; top:%dpx; width:%dpx; height:%dpx;">' % 'left:%dpx; top:%dpx; width:%dpx; height:%dpx;">' %
(color, borderwidth, writing_mode, (color, borderwidth, writing_mode,
x*self.scale, (self.yoffset-y)*self.scale, x*self.scale, (self._yoffset-y)*self.scale,
w*self.scale, h*self.scale)) w*self.scale, h*self.scale))
return return
@ -323,11 +329,11 @@ class HTMLConverter(PDFConverter):
return return
def render(item): def render(item):
if isinstance(item, LTPage): if isinstance(item, LTPage):
self.yoffset += item.y1 self._yoffset += item.y1
self.place_border('page', 1, item) self.place_border('page', 1, item)
if self.showpageno: if self.showpageno:
self.write('<div style="position:absolute; top:%dpx;">' % self.write('<div style="position:absolute; top:%dpx;">' %
((self.yoffset-item.y1)*self.scale)) ((self._yoffset-item.y1)*self.scale))
self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid)) self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
for child in item: for child in item:
render(child) render(child)
@ -373,7 +379,7 @@ class HTMLConverter(PDFConverter):
self.write_text(item.text) self.write_text(item.text)
return return
render(ltpage) render(ltpage)
self.yoffset += self.pagemargin self._yoffset += self.pagemargin
return return
def close(self): def close(self):

View File

@ -55,10 +55,10 @@ def main(argv):
elif k == '-c': codec = v elif k == '-c': codec = v
elif k == '-s': scale = float(v) elif k == '-s': scale = float(v)
# #
#PDFDocument.debug = debug
#PDFParser.debug = debug
CMapDB.debug = debug CMapDB.debug = debug
PDFResourceManager.debug = debug PDFResourceManager.debug = debug
PDFDocument.debug = debug
PDFParser.debug = debug
PDFPageInterpreter.debug = debug PDFPageInterpreter.debug = debug
PDFDevice.debug = debug PDFDevice.debug = debug
# #