From eabe72ee634b8b215becc8b84b8e84d5fdecfbf9 Mon Sep 17 00:00:00 2001 From: Yusuke Shinyama Date: Wed, 9 Oct 2013 22:13:22 +0900 Subject: [PATCH] Prevent crash with empty layout box. --- pdfminer/layout.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 7e0f312..ecb5c6b 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -556,10 +556,12 @@ class LTLayoutContainer(LTContainer): box = boxes[line] if box in done: continue done.add(box) - yield box + if not box.is_empty(): + yield box return def group_textboxes(self, laparams, boxes): + assert boxes def dist(obj1, obj2): """A distance function between two TextBoxes. @@ -632,12 +634,13 @@ class LTLayoutContainer(LTContainer): obj.analyze(laparams) textboxes = list(self.get_textboxes(laparams, textlines)) assert len(textlines) == sum( len(box._objs) for box in textboxes ) - self.groups = self.group_textboxes(laparams, textboxes) - assigner = IndexAssigner() - for group in self.groups: - group.analyze(laparams) - assigner.run(group) - textboxes.sort(key=lambda box:box.index) + if textboxes: + self.groups = self.group_textboxes(laparams, textboxes) + assigner = IndexAssigner() + for group in self.groups: + group.analyze(laparams) + assigner.run(group) + textboxes.sort(key=lambda box:box.index) self._objs = textboxes + otherobjs + empties return