code cleanup
parent
cd412308bd
commit
b2d13db29a
|
@ -17,9 +17,9 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
||||||
|
|
||||||
def __init__(self, rsrcmgr, pageno=1, laparams=None):
|
def __init__(self, rsrcmgr, pageno=1, laparams=None):
|
||||||
PDFTextDevice.__init__(self, rsrcmgr)
|
PDFTextDevice.__init__(self, rsrcmgr)
|
||||||
self.laparams = laparams
|
|
||||||
self.pageno = pageno
|
self.pageno = pageno
|
||||||
self.stack = []
|
self.laparams = laparams
|
||||||
|
self._stack = []
|
||||||
return
|
return
|
||||||
|
|
||||||
def begin_page(self, page, ctm):
|
def begin_page(self, page, ctm):
|
||||||
|
@ -31,7 +31,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
||||||
return
|
return
|
||||||
|
|
||||||
def end_page(self, page):
|
def end_page(self, page):
|
||||||
assert not self.stack
|
assert not self._stack
|
||||||
assert isinstance(self.cur_item, LTPage)
|
assert isinstance(self.cur_item, LTPage)
|
||||||
if self.laparams is not None:
|
if self.laparams is not None:
|
||||||
self.cur_item.analyze(self.laparams)
|
self.cur_item.analyze(self.laparams)
|
||||||
|
@ -40,7 +40,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
||||||
return
|
return
|
||||||
|
|
||||||
def begin_figure(self, name, bbox, matrix):
|
def begin_figure(self, name, bbox, matrix):
|
||||||
self.stack.append(self.cur_item)
|
self._stack.append(self.cur_item)
|
||||||
self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
|
self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
||||||
assert isinstance(self.cur_item, LTFigure)
|
assert isinstance(self.cur_item, LTFigure)
|
||||||
if self.laparams is not None:
|
if self.laparams is not None:
|
||||||
self.cur_item.analyze(self.laparams)
|
self.cur_item.analyze(self.laparams)
|
||||||
self.cur_item = self.stack.pop()
|
self.cur_item = self._stack.pop()
|
||||||
self.cur_item.add(fig)
|
self.cur_item.add(fig)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -222,6 +222,8 @@ class HTMLConverter(PDFConverter):
|
||||||
self.pagemargin = pagemargin
|
self.pagemargin = pagemargin
|
||||||
self.outdir = outdir
|
self.outdir = outdir
|
||||||
self.yoffset = self.pagemargin
|
self.yoffset = self.pagemargin
|
||||||
|
self.rect_colors = self.RECT_COLORS
|
||||||
|
self.text_colors = self.TEXT_COLORS
|
||||||
self._font = None
|
self._font = None
|
||||||
self._fontstack = []
|
self._fontstack = []
|
||||||
self.write_header()
|
self.write_header()
|
||||||
|
@ -248,7 +250,7 @@ class HTMLConverter(PDFConverter):
|
||||||
return
|
return
|
||||||
|
|
||||||
def place_rect(self, color, borderwidth, x, y, w, h):
|
def place_rect(self, color, borderwidth, x, y, w, h):
|
||||||
color = self.RECT_COLORS.get(color)
|
color = self.rect_colors.get(color)
|
||||||
if color is not None:
|
if color is not None:
|
||||||
self.write('<span style="position:absolute; border: %s %dpx solid; '
|
self.write('<span style="position:absolute; border: %s %dpx solid; '
|
||||||
'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' %
|
'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' %
|
||||||
|
@ -272,7 +274,7 @@ class HTMLConverter(PDFConverter):
|
||||||
return
|
return
|
||||||
|
|
||||||
def place_text(self, color, text, x, y, size):
|
def place_text(self, color, text, x, y, size):
|
||||||
color = self.TEXT_COLORS.get(color)
|
color = self.text_colors.get(color)
|
||||||
if color is not None:
|
if color is not None:
|
||||||
self.write('<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;">' %
|
self.write('<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;">' %
|
||||||
(color, x*self.scale, (self.yoffset-y)*self.scale, size*self.scale*self.fontscale))
|
(color, x*self.scale, (self.yoffset-y)*self.scale, size*self.scale*self.fontscale))
|
||||||
|
|
|
@ -119,12 +119,13 @@ class PDFTextDevice(PDFDevice):
|
||||||
##
|
##
|
||||||
class TagExtractor(PDFDevice):
|
class TagExtractor(PDFDevice):
|
||||||
|
|
||||||
def __init__(self, rsrcmgr, outfp, codec='utf-8'):
|
def __init__(self, rsrcmgr, outfp, codec='utf-8', debug=0):
|
||||||
PDFDevice.__init__(self, rsrcmgr)
|
PDFDevice.__init__(self, rsrcmgr)
|
||||||
self.outfp = outfp
|
self.outfp = outfp
|
||||||
self.codec = codec
|
self.codec = codec
|
||||||
|
self.debug = debug
|
||||||
self.pageno = 0
|
self.pageno = 0
|
||||||
self.stack = []
|
self._stack = []
|
||||||
return
|
return
|
||||||
|
|
||||||
def render_string(self, textstate, seq):
|
def render_string(self, textstate, seq):
|
||||||
|
@ -158,16 +159,16 @@ class TagExtractor(PDFDevice):
|
||||||
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
|
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
|
||||||
in sorted(props.iteritems()) )
|
in sorted(props.iteritems()) )
|
||||||
self.outfp.write('<%s%s>' % (enc(tag.name), s))
|
self.outfp.write('<%s%s>' % (enc(tag.name), s))
|
||||||
self.stack.append(tag)
|
self._stack.append(tag)
|
||||||
return
|
return
|
||||||
|
|
||||||
def end_tag(self):
|
def end_tag(self):
|
||||||
assert self.stack
|
assert self._stack
|
||||||
tag = self.stack.pop(-1)
|
tag = self._stack.pop(-1)
|
||||||
self.outfp.write('</%s>' % enc(tag.name))
|
self.outfp.write('</%s>' % enc(tag.name))
|
||||||
return
|
return
|
||||||
|
|
||||||
def do_tag(self, tag, props=None):
|
def do_tag(self, tag, props=None):
|
||||||
self.begin_tag(tag, props)
|
self.begin_tag(tag, props)
|
||||||
self.stack.pop(-1)
|
self._stack.pop(-1)
|
||||||
return
|
return
|
||||||
|
|
|
@ -191,10 +191,14 @@ class ObjIdRange(object):
|
||||||
##
|
##
|
||||||
class Plane(object):
|
class Plane(object):
|
||||||
|
|
||||||
def __init__(self, objs):
|
def __init__(self, objs=None):
|
||||||
self._idxs = {}
|
self._idxs = {}
|
||||||
self._xobjs = []
|
self._xobjs = []
|
||||||
self._yobjs = []
|
self._yobjs = []
|
||||||
|
if objs is not None:
|
||||||
|
for obj in objs:
|
||||||
|
self.add(obj)
|
||||||
|
self.finish()
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
Loading…
Reference in New Issue