diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 640c87e..af79a85 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -534,7 +534,8 @@ class LTLayoutContainer(LTContainer): return def get_textboxes(self, laparams, lines): - plane = Plane(lines) + plane = Plane(self.bbox) + plane.extend(lines) boxes = {} for line in lines: neighbors = line.find_neighbors(plane, laparams.line_margin) @@ -596,7 +597,8 @@ class LTLayoutContainer(LTContainer): obj2 = boxes[j] dists.append((0, dist(obj1, obj2), obj1, obj2)) dists.sort() - plane = Plane(boxes) + plane = Plane(self.bbox) + plane.extend(boxes) while dists: (c,d,obj1,obj2) = dists.pop(0) if c == 0 and isany(obj1, obj2): diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 2769e20..670bca4 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -221,13 +221,11 @@ def matrix2str((a,b,c,d,e,f)): ## class Plane(object): - def __init__(self, objs=None, gridsize=50): + def __init__(self, bbox, gridsize=50): self._objs = set() self._grid = {} self.gridsize = gridsize - if objs is not None: - for obj in objs: - self.add(obj) + (self.x0, self.y0, self.x1, self.y1) = bbox return def __repr__(self): @@ -243,10 +241,20 @@ class Plane(object): return obj in self._objs def _getrange(self, (x0,y0,x1,y1)): + x0 = max(self.x0, x0) + y0 = max(self.y0, y0) + x1 = min(self.x1, x1) + y1 = min(self.y1, y1) for y in drange(y0, y1, self.gridsize): for x in drange(x0, x1, self.gridsize): yield (x,y) return + + # extend(objs) + def extend(self, objs): + for obj in objs: + self.add(obj) + return # add(obj): place an object. def add(self, obj):