code cleanup

pull/1/head
Yusuke Shinyama 2011-05-14 15:51:40 +09:00
parent 038ce4cd0c
commit 0c41b8348e
2 changed files with 36 additions and 22 deletions

View File

@ -329,11 +329,11 @@ class HTMLConverter(PDFConverter):
return
def receive_layout(self, ltpage):
def show_layout(item):
def show_group(item):
if isinstance(item, LTTextGroup):
self.place_border('textgroup', 1, item)
for child in item:
show_layout(child)
show_group(child)
return
def render(item):
if isinstance(item, LTPage):
@ -345,8 +345,9 @@ class HTMLConverter(PDFConverter):
self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
for child in item:
render(child)
if item.layout:
show_layout(item.layout)
if item.groups is not None:
for group in item.groups:
show_group(group)
elif isinstance(item, LTCurve):
self.place_border('curve', 1, item)
elif isinstance(item, LTFigure):
@ -419,14 +420,14 @@ class XMLConverter(PDFConverter):
return
def receive_layout(self, ltpage):
def show_layout(item):
def show_group(item):
if isinstance(item, LTTextBox):
self.outfp.write('<textbox id="%d" bbox="%s" />\n' %
(item.index, bbox2str(item.bbox)))
elif isinstance(item, LTTextGroup):
self.outfp.write('<textgroup bbox="%s">\n' % bbox2str(item.bbox))
for child in item:
show_layout(child)
show_group(child)
self.outfp.write('</textgroup>\n')
return
def render(item):
@ -435,9 +436,10 @@ class XMLConverter(PDFConverter):
(item.pageid, bbox2str(item.bbox), item.rotate))
for child in item:
render(child)
if item.layout:
if item.groups is not None:
self.outfp.write('<layout>\n')
show_layout(item.layout)
for group in item.groups:
show_group(group)
self.outfp.write('</layout>\n')
self.outfp.write('</page>\n')
elif isinstance(item, LTLine):

View File

@ -4,6 +4,24 @@ from utils import INF, Plane, get_bound, uniq, csort, fsplit
from utils import bbox2str, matrix2str, apply_matrix_pt
## IndexAssigner
##
class IndexAssigner(object):
def __init__(self, index=0):
self.index = index
return
def run(self, obj):
if isinstance(obj, LTTextBox):
obj.index = self.index
self.index += 1
elif isinstance(obj, LTTextGroup):
for x in obj:
self.run(obj)
return
## LAParams
##
class LAParams(object):
@ -438,7 +456,7 @@ class LTLayoutContainer(LTContainer):
def __init__(self, bbox):
LTContainer.__init__(self, bbox)
self.layout = None
self.groups = None
return
def analyze(self, laparams):
@ -455,20 +473,14 @@ class LTLayoutContainer(LTContainer):
obj.analyze(laparams)
textboxes = list(self.get_textboxes(laparams, textlines))
assert len(textlines) == sum( len(box._objs) for box in textboxes )
top = self.group_textboxes(laparams, textboxes)
top.analyze(laparams)
def assign_index(obj, i):
if isinstance(obj, LTTextBox):
obj.index = i
i += 1
elif isinstance(obj, LTTextGroup):
for x in obj:
i = assign_index(x, i)
return i
assign_index(top, 0)
groups = self.group_textboxes(laparams, textboxes)
assigner = IndexAssigner()
for group in groups:
group.analyze(laparams)
assigner.run(group)
textboxes.sort(key=lambda box:box.index)
self._objs = textboxes + otherobjs + empties
self.layout = top
self.groups = groups
return
def get_textlines(self, laparams, objs):
@ -614,7 +626,7 @@ class LTLayoutContainer(LTContainer):
dists.sort()
plane.add(group)
assert len(plane) == 1
return list(plane)[0]
return list(plane)
## LTFigure