diff --git a/docs/index.html b/docs/index.html index 837b7e1..1e13cf8 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@
-Last Modified: Wed Jun 25 10:27:52 UTC 2014 +Last Modified: Mon Sep 26 09:04:15 UTC 2016
@@ -268,6 +268,7 @@ are M = 2.0, L = 0.5, and W = 0.1, respectively.
Specifies how much a horizontal and vertical position of a text matters when determining a text order. The value should be within the range of -1.0 (only horizontal position matters) to +1.0 (only vertical position matters). +When this value is out of the range (e.g. +2), a simpler ordering rule is used. The default value is 0.5.

-C diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 9426ad3..6477eff 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -676,13 +676,20 @@ class LTLayoutContainer(LTContainer): for obj in empties: obj.analyze(laparams) textboxes = list(self.group_textlines(laparams, textlines)) - if textboxes: + if -1 <= laparams.boxes_flow and laparams.boxes_flow <= +1 and textboxes: self.groups = self.group_textboxes(laparams, textboxes) assigner = IndexAssigner() for group in self.groups: group.analyze(laparams) assigner.run(group) textboxes.sort(key=lambda box: box.index) + else: + def getkey(box): + if isinstance(box, LTTextBoxVertical): + return (0, -box.x1, box.y0) + else: + return (1, box.y0, box.x0) + textboxes.sort(key=getkey) self._objs = textboxes + otherobjs + empties return @@ -725,4 +732,4 @@ class LTPage(LTLayoutContainer): def __repr__(self): return ('<%s(%r) %s rotate=%r>' % (self.__class__.__name__, self.pageid, - bbox2str(self.bbox), self.rotate)) \ No newline at end of file + bbox2str(self.bbox), self.rotate))