code cleanup
parent
038ce4cd0c
commit
0c41b8348e
|
@ -329,11 +329,11 @@ class HTMLConverter(PDFConverter):
|
||||||
return
|
return
|
||||||
|
|
||||||
def receive_layout(self, ltpage):
|
def receive_layout(self, ltpage):
|
||||||
def show_layout(item):
|
def show_group(item):
|
||||||
if isinstance(item, LTTextGroup):
|
if isinstance(item, LTTextGroup):
|
||||||
self.place_border('textgroup', 1, item)
|
self.place_border('textgroup', 1, item)
|
||||||
for child in item:
|
for child in item:
|
||||||
show_layout(child)
|
show_group(child)
|
||||||
return
|
return
|
||||||
def render(item):
|
def render(item):
|
||||||
if isinstance(item, LTPage):
|
if isinstance(item, LTPage):
|
||||||
|
@ -345,8 +345,9 @@ class HTMLConverter(PDFConverter):
|
||||||
self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
|
self.write('<a name="%s">Page %s</a></div>\n' % (item.pageid, item.pageid))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
if item.layout:
|
if item.groups is not None:
|
||||||
show_layout(item.layout)
|
for group in item.groups:
|
||||||
|
show_group(group)
|
||||||
elif isinstance(item, LTCurve):
|
elif isinstance(item, LTCurve):
|
||||||
self.place_border('curve', 1, item)
|
self.place_border('curve', 1, item)
|
||||||
elif isinstance(item, LTFigure):
|
elif isinstance(item, LTFigure):
|
||||||
|
@ -419,14 +420,14 @@ class XMLConverter(PDFConverter):
|
||||||
return
|
return
|
||||||
|
|
||||||
def receive_layout(self, ltpage):
|
def receive_layout(self, ltpage):
|
||||||
def show_layout(item):
|
def show_group(item):
|
||||||
if isinstance(item, LTTextBox):
|
if isinstance(item, LTTextBox):
|
||||||
self.outfp.write('<textbox id="%d" bbox="%s" />\n' %
|
self.outfp.write('<textbox id="%d" bbox="%s" />\n' %
|
||||||
(item.index, bbox2str(item.bbox)))
|
(item.index, bbox2str(item.bbox)))
|
||||||
elif isinstance(item, LTTextGroup):
|
elif isinstance(item, LTTextGroup):
|
||||||
self.outfp.write('<textgroup bbox="%s">\n' % bbox2str(item.bbox))
|
self.outfp.write('<textgroup bbox="%s">\n' % bbox2str(item.bbox))
|
||||||
for child in item:
|
for child in item:
|
||||||
show_layout(child)
|
show_group(child)
|
||||||
self.outfp.write('</textgroup>\n')
|
self.outfp.write('</textgroup>\n')
|
||||||
return
|
return
|
||||||
def render(item):
|
def render(item):
|
||||||
|
@ -435,9 +436,10 @@ class XMLConverter(PDFConverter):
|
||||||
(item.pageid, bbox2str(item.bbox), item.rotate))
|
(item.pageid, bbox2str(item.bbox), item.rotate))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
if item.layout:
|
if item.groups is not None:
|
||||||
self.outfp.write('<layout>\n')
|
self.outfp.write('<layout>\n')
|
||||||
show_layout(item.layout)
|
for group in item.groups:
|
||||||
|
show_group(group)
|
||||||
self.outfp.write('</layout>\n')
|
self.outfp.write('</layout>\n')
|
||||||
self.outfp.write('</page>\n')
|
self.outfp.write('</page>\n')
|
||||||
elif isinstance(item, LTLine):
|
elif isinstance(item, LTLine):
|
||||||
|
|
|
@ -4,6 +4,24 @@ from utils import INF, Plane, get_bound, uniq, csort, fsplit
|
||||||
from utils import bbox2str, matrix2str, apply_matrix_pt
|
from utils import bbox2str, matrix2str, apply_matrix_pt
|
||||||
|
|
||||||
|
|
||||||
|
## IndexAssigner
|
||||||
|
##
|
||||||
|
class IndexAssigner(object):
|
||||||
|
|
||||||
|
def __init__(self, index=0):
|
||||||
|
self.index = index
|
||||||
|
return
|
||||||
|
|
||||||
|
def run(self, obj):
|
||||||
|
if isinstance(obj, LTTextBox):
|
||||||
|
obj.index = self.index
|
||||||
|
self.index += 1
|
||||||
|
elif isinstance(obj, LTTextGroup):
|
||||||
|
for x in obj:
|
||||||
|
self.run(obj)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
## LAParams
|
## LAParams
|
||||||
##
|
##
|
||||||
class LAParams(object):
|
class LAParams(object):
|
||||||
|
@ -438,7 +456,7 @@ class LTLayoutContainer(LTContainer):
|
||||||
|
|
||||||
def __init__(self, bbox):
|
def __init__(self, bbox):
|
||||||
LTContainer.__init__(self, bbox)
|
LTContainer.__init__(self, bbox)
|
||||||
self.layout = None
|
self.groups = None
|
||||||
return
|
return
|
||||||
|
|
||||||
def analyze(self, laparams):
|
def analyze(self, laparams):
|
||||||
|
@ -455,20 +473,14 @@ class LTLayoutContainer(LTContainer):
|
||||||
obj.analyze(laparams)
|
obj.analyze(laparams)
|
||||||
textboxes = list(self.get_textboxes(laparams, textlines))
|
textboxes = list(self.get_textboxes(laparams, textlines))
|
||||||
assert len(textlines) == sum( len(box._objs) for box in textboxes )
|
assert len(textlines) == sum( len(box._objs) for box in textboxes )
|
||||||
top = self.group_textboxes(laparams, textboxes)
|
groups = self.group_textboxes(laparams, textboxes)
|
||||||
top.analyze(laparams)
|
assigner = IndexAssigner()
|
||||||
def assign_index(obj, i):
|
for group in groups:
|
||||||
if isinstance(obj, LTTextBox):
|
group.analyze(laparams)
|
||||||
obj.index = i
|
assigner.run(group)
|
||||||
i += 1
|
|
||||||
elif isinstance(obj, LTTextGroup):
|
|
||||||
for x in obj:
|
|
||||||
i = assign_index(x, i)
|
|
||||||
return i
|
|
||||||
assign_index(top, 0)
|
|
||||||
textboxes.sort(key=lambda box:box.index)
|
textboxes.sort(key=lambda box:box.index)
|
||||||
self._objs = textboxes + otherobjs + empties
|
self._objs = textboxes + otherobjs + empties
|
||||||
self.layout = top
|
self.groups = groups
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_textlines(self, laparams, objs):
|
def get_textlines(self, laparams, objs):
|
||||||
|
@ -614,7 +626,7 @@ class LTLayoutContainer(LTContainer):
|
||||||
dists.sort()
|
dists.sort()
|
||||||
plane.add(group)
|
plane.add(group)
|
||||||
assert len(plane) == 1
|
assert len(plane) == 1
|
||||||
return list(plane)[0]
|
return list(plane)
|
||||||
|
|
||||||
|
|
||||||
## LTFigure
|
## LTFigure
|
||||||
|
|
Loading…
Reference in New Issue