diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index b963443..c7ba6b5 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -329,11 +329,11 @@ class HTMLConverter(PDFConverter):
return
def receive_layout(self, ltpage):
- def show_layout(item):
+ def show_group(item):
if isinstance(item, LTTextGroup):
self.place_border('textgroup', 1, item)
for child in item:
- show_layout(child)
+ show_group(child)
return
def render(item):
if isinstance(item, LTPage):
@@ -345,8 +345,9 @@ class HTMLConverter(PDFConverter):
self.write('Page %s\n' % (item.pageid, item.pageid))
for child in item:
render(child)
- if item.layout:
- show_layout(item.layout)
+ if item.groups is not None:
+ for group in item.groups:
+ show_group(group)
elif isinstance(item, LTCurve):
self.place_border('curve', 1, item)
elif isinstance(item, LTFigure):
@@ -419,14 +420,14 @@ class XMLConverter(PDFConverter):
return
def receive_layout(self, ltpage):
- def show_layout(item):
+ def show_group(item):
if isinstance(item, LTTextBox):
self.outfp.write('\n' %
(item.index, bbox2str(item.bbox)))
elif isinstance(item, LTTextGroup):
self.outfp.write('\n' % bbox2str(item.bbox))
for child in item:
- show_layout(child)
+ show_group(child)
self.outfp.write('\n')
return
def render(item):
@@ -435,9 +436,10 @@ class XMLConverter(PDFConverter):
(item.pageid, bbox2str(item.bbox), item.rotate))
for child in item:
render(child)
- if item.layout:
+ if item.groups is not None:
self.outfp.write('\n')
- show_layout(item.layout)
+ for group in item.groups:
+ show_group(group)
self.outfp.write('\n')
self.outfp.write('\n')
elif isinstance(item, LTLine):
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index db4a33c..4096330 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -4,6 +4,24 @@ from utils import INF, Plane, get_bound, uniq, csort, fsplit
from utils import bbox2str, matrix2str, apply_matrix_pt
+## IndexAssigner
+##
+class IndexAssigner(object):
+
+ def __init__(self, index=0):
+ self.index = index
+ return
+
+ def run(self, obj):
+ if isinstance(obj, LTTextBox):
+ obj.index = self.index
+ self.index += 1
+ elif isinstance(obj, LTTextGroup):
+ for x in obj:
+ self.run(obj)
+ return
+
+
## LAParams
##
class LAParams(object):
@@ -438,7 +456,7 @@ class LTLayoutContainer(LTContainer):
def __init__(self, bbox):
LTContainer.__init__(self, bbox)
- self.layout = None
+ self.groups = None
return
def analyze(self, laparams):
@@ -455,20 +473,14 @@ class LTLayoutContainer(LTContainer):
obj.analyze(laparams)
textboxes = list(self.get_textboxes(laparams, textlines))
assert len(textlines) == sum( len(box._objs) for box in textboxes )
- top = self.group_textboxes(laparams, textboxes)
- top.analyze(laparams)
- def assign_index(obj, i):
- if isinstance(obj, LTTextBox):
- obj.index = i
- i += 1
- elif isinstance(obj, LTTextGroup):
- for x in obj:
- i = assign_index(x, i)
- return i
- assign_index(top, 0)
+ groups = self.group_textboxes(laparams, textboxes)
+ assigner = IndexAssigner()
+ for group in groups:
+ group.analyze(laparams)
+ assigner.run(group)
textboxes.sort(key=lambda box:box.index)
self._objs = textboxes + otherobjs + empties
- self.layout = top
+ self.groups = groups
return
def get_textlines(self, laparams, objs):
@@ -614,7 +626,7 @@ class LTLayoutContainer(LTContainer):
dists.sort()
plane.add(group)
assert len(plane) == 1
- return list(plane)[0]
+ return list(plane)
## LTFigure