Added: a simpler ordering mode when 1<F.

pull/55/head
Yusuke Shinyama 2016-09-26 18:06:34 +09:00
parent 44977b6726
commit 8150458718
2 changed files with 11 additions and 3 deletions

View File

@ -9,7 +9,7 @@
<div align=right class=lastmod> <div align=right class=lastmod>
<!-- hhmts start --> <!-- hhmts start -->
Last Modified: Wed Jun 25 10:27:52 UTC 2014 Last Modified: Mon Sep 26 09:04:15 UTC 2016
<!-- hhmts end --> <!-- hhmts end -->
</div> </div>
@ -268,6 +268,7 @@ are M = 2.0, L = 0.5, and W = 0.1, respectively.
<dd> Specifies how much a horizontal and vertical position of a text matters <dd> Specifies how much a horizontal and vertical position of a text matters
when determining a text order. The value should be within the range of when determining a text order. The value should be within the range of
-1.0 (only horizontal position matters) to +1.0 (only vertical position matters). -1.0 (only horizontal position matters) to +1.0 (only vertical position matters).
When this value is out of the range (e.g. +2), a simpler ordering rule is used.
The default value is 0.5. The default value is 0.5.
<p> <p>
<dt> <code>-C</code> <dt> <code>-C</code>

View File

@ -676,13 +676,20 @@ class LTLayoutContainer(LTContainer):
for obj in empties: for obj in empties:
obj.analyze(laparams) obj.analyze(laparams)
textboxes = list(self.group_textlines(laparams, textlines)) textboxes = list(self.group_textlines(laparams, textlines))
if textboxes: if -1 <= laparams.boxes_flow and laparams.boxes_flow <= +1 and textboxes:
self.groups = self.group_textboxes(laparams, textboxes) self.groups = self.group_textboxes(laparams, textboxes)
assigner = IndexAssigner() assigner = IndexAssigner()
for group in self.groups: for group in self.groups:
group.analyze(laparams) group.analyze(laparams)
assigner.run(group) assigner.run(group)
textboxes.sort(key=lambda box: box.index) textboxes.sort(key=lambda box: box.index)
else:
def getkey(box):
if isinstance(box, LTTextBoxVertical):
return (0, -box.x1, box.y0)
else:
return (1, box.y0, box.x0)
textboxes.sort(key=getkey)
self._objs = textboxes + otherobjs + empties self._objs = textboxes + otherobjs + empties
return return
@ -725,4 +732,4 @@ class LTPage(LTLayoutContainer):
def __repr__(self): def __repr__(self):
return ('<%s(%r) %s rotate=%r>' % return ('<%s(%r) %s rotate=%r>' %
(self.__class__.__name__, self.pageid, (self.__class__.__name__, self.pageid,
bbox2str(self.bbox), self.rotate)) bbox2str(self.bbox), self.rotate))