From b8d516fc52296d3971c46e7cdbd473d3866b7143 Mon Sep 17 00:00:00 2001 From: Yusuke Shinyama Date: Sat, 14 May 2011 14:16:40 +0900 Subject: [PATCH] extended Plane class. --- pdfminer/utils.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 0d531c3..2892963 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -196,7 +196,7 @@ class ObjIdRange(object): ## Plane ## -## A data structure for objects placed on a plane. +## A set-like data structure for objects placed on a plane. ## Can efficiently find objects in a certain rectangular area. ## It maintains two parallel lists of objects, each of ## which is sorted by its x or y coordinate. @@ -204,7 +204,8 @@ class ObjIdRange(object): class Plane(object): def __init__(self, objs=None, gridsize=50): - self._objs = {} + self._objs = set() + self._grid = {} self.gridsize = gridsize if objs is not None: for obj in objs: @@ -214,6 +215,15 @@ class Plane(object): def __repr__(self): return ('' % list(self)) + def __iter__(self): + return iter(self._objs) + + def __len__(self): + return len(self._objs) + + def __contains__(self, obj): + return obj in self._objs + def _getrange(self, (x0,y0,x1,y1)): for y in drange(y0, y1, self.gridsize): for x in drange(x0, x1, self.gridsize): @@ -223,35 +233,38 @@ class Plane(object): # add(obj): place an object. def add(self, obj): for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)): - if k not in self._objs: + if k not in self._grid: r = [] - self._objs[k] = r + self._grid[k] = r else: - r = self._objs[k] + r = self._grid[k] r.append(obj) + self._objs.add(obj) return # remove(obj): displace an object. def remove(self, obj): for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)): try: - self._objs[k].remove(obj) + self._grid[k].remove(obj) except (KeyError, ValueError): pass + self._objs.remove(obj) return # find(): finds objects that are in a certain area. def find(self, (x0,y0,x1,y1)): r = set() + done = set() for k in self._getrange((x0,y0,x1,y1)): - if k not in self._objs: continue - for obj in self._objs[k]: - if obj in r: continue - r.add(obj) + if k not in self._grid: continue + for obj in self._grid[k]: + if obj in done: continue + done.add(obj) if (obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 or y1 <= obj.y0): continue - yield obj - return + r.add(obj) + return r # create_bmp