Fixed: crash with negative layout bbox.

pull/1/head
Yusuke Shinyama 2013-11-09 15:10:14 +09:00
parent ab5cdd8642
commit c8b6d4112a
3 changed files with 14 additions and 10 deletions

View File

@ -472,7 +472,8 @@ class LTLayoutContainer(LTContainer):
self.groups = None self.groups = None
return return
def get_textlines(self, laparams, objs): # group_objects: group text object to textlines.
def group_objects(self, laparams, objs):
obj0 = None obj0 = None
line = None line = None
for obj1 in objs: for obj1 in objs:
@ -538,13 +539,14 @@ class LTLayoutContainer(LTContainer):
yield line yield line
return return
def get_textboxes(self, laparams, lines): # group_textlines: group neighboring lines to textboxes.
def group_textlines(self, laparams, lines):
plane = Plane(self.bbox) plane = Plane(self.bbox)
plane.extend(lines) plane.extend(lines)
boxes = {} boxes = {}
for line in lines: for line in lines:
neighbors = line.find_neighbors(plane, laparams.line_margin) neighbors = line.find_neighbors(plane, laparams.line_margin)
assert line in neighbors, line if line not in neighbors: continue
members = [] members = []
for obj1 in neighbors: for obj1 in neighbors:
members.append(obj1) members.append(obj1)
@ -559,6 +561,7 @@ class LTLayoutContainer(LTContainer):
boxes[obj] = box boxes[obj] = box
done = set() done = set()
for line in lines: for line in lines:
if line not in boxes: continue
box = boxes[line] box = boxes[line]
if box in done: if box in done:
continue continue
@ -567,6 +570,7 @@ class LTLayoutContainer(LTContainer):
yield box yield box
return return
# group_textboxes: group textboxes hierarchically.
def group_textboxes(self, laparams, boxes): def group_textboxes(self, laparams, boxes):
assert boxes assert boxes
@ -633,18 +637,16 @@ class LTLayoutContainer(LTContainer):
def analyze(self, laparams): def analyze(self, laparams):
# textobjs is a list of LTChar objects, i.e. # textobjs is a list of LTChar objects, i.e.
# it has all the individual characters in the page. # it has all the individual characters in the page.
(textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar), self._objs) (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar), self)
for obj in otherobjs: for obj in otherobjs:
obj.analyze(laparams) obj.analyze(laparams)
if not textobjs: if not textobjs:
return return
textlines = list(self.get_textlines(laparams, textobjs)) textlines = list(self.group_objects(laparams, textobjs))
assert len(textobjs) <= sum(len(line._objs) for line in textlines)
(empties, textlines) = fsplit(lambda obj: obj.is_empty(), textlines) (empties, textlines) = fsplit(lambda obj: obj.is_empty(), textlines)
for obj in empties: for obj in empties:
obj.analyze(laparams) obj.analyze(laparams)
textboxes = list(self.get_textboxes(laparams, textlines)) textboxes = list(self.group_textlines(laparams, textlines))
assert len(textlines) == sum(len(box._objs) for box in textboxes)
if textboxes: if textboxes:
self.groups = self.group_textboxes(laparams, textboxes) self.groups = self.group_textboxes(laparams, textboxes)
assigner = IndexAssigner() assigner = IndexAssigner()

View File

@ -259,6 +259,8 @@ class Plane(object):
return obj in self._objs return obj in self._objs
def _getrange(self, (x0, y0, x1, y1)): def _getrange(self, (x0, y0, x1, y1)):
if (x1 <= self.x0 or self.x1 <= x0 or
y1 <= self.y0 or self.y1 <= y0): return
x0 = max(self.x0, x0) x0 = max(self.x0, x0)
y0 = max(self.y0, y0) y0 = max(self.y0, y0)
x1 = min(self.x1, x1) x1 = min(self.x1, x1)

View File

@ -3750,14 +3750,14 @@
</textgroup> </textgroup>
</textgroup> </textgroup>
<textgroup bbox="313.198,119.636,540.091,607.829"> <textgroup bbox="313.198,119.636,540.091,607.829">
<textgroup bbox="313.198,336.837,540.091,607.829">
<textbox id="11" bbox="313.198,513.386,540.091,607.829" /> <textbox id="11" bbox="313.198,513.386,540.091,607.829" />
<textgroup bbox="313.198,119.636,540.091,512.573">
<textbox id="12" bbox="313.198,336.837,540.091,512.573" /> <textbox id="12" bbox="313.198,336.837,540.091,512.573" />
</textgroup>
<textbox id="13" bbox="313.198,119.636,540.091,336.023" /> <textbox id="13" bbox="313.198,119.636,540.091,336.023" />
</textgroup> </textgroup>
</textgroup> </textgroup>
</textgroup> </textgroup>
</textgroup>
</layout> </layout>
</page> </page>
</pages> </pages>