code cleanup
parent
038ce4cd0c
commit
0c41b8348e
|
@ -329,11 +329,11 @@ class HTMLConverter(PDFConverter):
|
|||
return
|
||||
|
||||
def receive_layout(self, ltpage):
|
||||
def show_layout(item):
|
||||
def show_group(item):
|
||||
if isinstance(item, LTTextGroup):
|
||||
self.place_border('textgroup', 1, item)
|
||||
for child in item:
|
||||
show_layout(child)
|
||||
show_group(child)
|
||||
return
|
||||
def render(item):
|
||||
if isinstance(item, LTPage):
|
||||
|
@ -345,8 +345,9 @@ class HTMLConverter(PDFConverter):
|
|||
self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
|
||||
for child in item:
|
||||
render(child)
|
||||
if item.layout:
|
||||
show_layout(item.layout)
|
||||
if item.groups is not None:
|
||||
for group in item.groups:
|
||||
show_group(group)
|
||||
elif isinstance(item, LTCurve):
|
||||
self.place_border('curve', 1, item)
|
||||
elif isinstance(item, LTFigure):
|
||||
|
@ -419,14 +420,14 @@ class XMLConverter(PDFConverter):
|
|||
return
|
||||
|
||||
def receive_layout(self, ltpage):
|
||||
def show_layout(item):
|
||||
def show_group(item):
|
||||
if isinstance(item, LTTextBox):
|
||||
self.outfp.write('<textbox id="%d" bbox="%s" />\n' %
|
||||
(item.index, bbox2str(item.bbox)))
|
||||
elif isinstance(item, LTTextGroup):
|
||||
self.outfp.write('<textgroup bbox="%s">\n' % bbox2str(item.bbox))
|
||||
for child in item:
|
||||
show_layout(child)
|
||||
show_group(child)
|
||||
self.outfp.write('</textgroup>\n')
|
||||
return
|
||||
def render(item):
|
||||
|
@ -435,9 +436,10 @@ class XMLConverter(PDFConverter):
|
|||
(item.pageid, bbox2str(item.bbox), item.rotate))
|
||||
for child in item:
|
||||
render(child)
|
||||
if item.layout:
|
||||
if item.groups is not None:
|
||||
self.outfp.write('<layout>\n')
|
||||
show_layout(item.layout)
|
||||
for group in item.groups:
|
||||
show_group(group)
|
||||
self.outfp.write('</layout>\n')
|
||||
self.outfp.write('</page>\n')
|
||||
elif isinstance(item, LTLine):
|
||||
|
|
|
@ -4,6 +4,24 @@ from utils import INF, Plane, get_bound, uniq, csort, fsplit
|
|||
from utils import bbox2str, matrix2str, apply_matrix_pt
|
||||
|
||||
|
||||
## IndexAssigner
|
||||
##
|
||||
class IndexAssigner(object):
|
||||
|
||||
def __init__(self, index=0):
|
||||
self.index = index
|
||||
return
|
||||
|
||||
def run(self, obj):
|
||||
if isinstance(obj, LTTextBox):
|
||||
obj.index = self.index
|
||||
self.index += 1
|
||||
elif isinstance(obj, LTTextGroup):
|
||||
for x in obj:
|
||||
self.run(obj)
|
||||
return
|
||||
|
||||
|
||||
## LAParams
|
||||
##
|
||||
class LAParams(object):
|
||||
|
@ -438,7 +456,7 @@ class LTLayoutContainer(LTContainer):
|
|||
|
||||
def __init__(self, bbox):
|
||||
LTContainer.__init__(self, bbox)
|
||||
self.layout = None
|
||||
self.groups = None
|
||||
return
|
||||
|
||||
def analyze(self, laparams):
|
||||
|
@ -455,20 +473,14 @@ class LTLayoutContainer(LTContainer):
|
|||
obj.analyze(laparams)
|
||||
textboxes = list(self.get_textboxes(laparams, textlines))
|
||||
assert len(textlines) == sum( len(box._objs) for box in textboxes )
|
||||
top = self.group_textboxes(laparams, textboxes)
|
||||
top.analyze(laparams)
|
||||
def assign_index(obj, i):
|
||||
if isinstance(obj, LTTextBox):
|
||||
obj.index = i
|
||||
i += 1
|
||||
elif isinstance(obj, LTTextGroup):
|
||||
for x in obj:
|
||||
i = assign_index(x, i)
|
||||
return i
|
||||
assign_index(top, 0)
|
||||
groups = self.group_textboxes(laparams, textboxes)
|
||||
assigner = IndexAssigner()
|
||||
for group in groups:
|
||||
group.analyze(laparams)
|
||||
assigner.run(group)
|
||||
textboxes.sort(key=lambda box:box.index)
|
||||
self._objs = textboxes + otherobjs + empties
|
||||
self.layout = top
|
||||
self.groups = groups
|
||||
return
|
||||
|
||||
def get_textlines(self, laparams, objs):
|
||||
|
@ -614,7 +626,7 @@ class LTLayoutContainer(LTContainer):
|
|||
dists.sort()
|
||||
plane.add(group)
|
||||
assert len(plane) == 1
|
||||
return list(plane)[0]
|
||||
return list(plane)
|
||||
|
||||
|
||||
## LTFigure
|
||||
|
|
Loading…
Reference in New Issue