layout analysis improved

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@268 1aa58f4a-7d42-0410-adbc-911cccaed67c
2010-11-09 10:40:05 +00:00 · 2010-11-09 10:40:05 +00:00 · 9584845358
parent edbd3764a7
commit 9584845358
3 changed files with 224 additions and 209 deletions
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@ -27,28 +27,26 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        (x0,y0) = apply_matrix_pt(ctm, (x0,y0))
        (x1,y1) = apply_matrix_pt(ctm, (x1,y1))
        mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
-        self.cur_item = LTPage(self.pageno, mediabox)
+        self.cur_item = LTPage(self.pageno, mediabox, laparams=self.laparams)
        return

    def end_page(self, page):
        assert not self.stack
        assert isinstance(self.cur_item, LTPage)
-        self.cur_item.fixate()
-        self.cur_item.analyze(self.laparams)
+        self.cur_item.finish()
        self.pageno += 1
        self.receive_layout(self.cur_item)
        return

    def begin_figure(self, name, bbox, matrix):
        self.stack.append(self.cur_item)
-        self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
+        self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm), laparams=self.laparams)
        return

    def end_figure(self, _):
        fig = self.cur_item
        assert isinstance(self.cur_item, LTFigure)
-        self.cur_item.fixate()
-        self.cur_item.analyze(self.laparams)
+        self.cur_item.finish()
        self.cur_item = self.stack.pop()
        self.cur_item.add(fig)
        return
@ -175,7 +173,7 @@ class TextConverter(PDFConverter):
                for child in item:
                    render(child)
            elif isinstance(item, LTText):
-                self.write(item.get_text())
+                self.write(item.text)
            if isinstance(item, LTTextBox):
                self.write('\n')
        if self.showpageno:
@ -190,17 +188,17 @@ class TextConverter(PDFConverter):
 class HTMLConverter(PDFConverter):

    RECT_COLORS = {
-        'char': 'green',
-        'figure': 'yellow',
-        'textline': 'magenta',
-        'polygon': 'black',
+        #'char': 'green',
+        #'figure': 'yellow',
+        #'textline': 'magenta',
        'textbox': 'cyan',
        'textgroup': 'red',
+        'polygon': 'black',
        'page': 'gray',
        }
    TEXT_COLORS = {
+        'textbox': 'blue',
        'char': 'black',
-        'textbox': 'black',
        }

    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
@ -275,7 +273,7 @@ class HTMLConverter(PDFConverter):
                                      item.width*self.scale, item.height*self.scale))
            return
        render(ltpage)
-        if self.debug and ltpage.layout:
+        if ltpage.layout:
            def show_layout(item):
                if isinstance(item, LTTextGroup):
                    self.write_rect('textgroup', 1, item.x0, item.y1, item.width, item.height)
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 import sys
 from utils import apply_matrix_pt, get_bound, INF
-from utils import bsearch, bbox2str, matrix2str
+from utils import bsearch, bbox2str, matrix2str, Plane
 from pdffont import PDFUnicodeNotDefined


@ -55,8 +55,6 @@ class LTItem(object):
                (self.__class__.__name__, bbox2str(self.bbox)))

    def set_bbox(self, (x0,y0,x1,y1)):
-        if x1 < x0: (x0,x1) = (x1,x0)
-        if y1 < y0: (y0,y1) = (y1,y0)
        self.x0 = x0
        self.y0 = y0
        self.x1 = x1
@ -169,10 +167,7 @@ class LTText(object):

    def __repr__(self):
        return ('<%s %r>' %
-                (self.__class__.__name__, self.get_text()))
-
-    def get_text(self):
-        return self.text
+                (self.__class__.__name__, self.text))


 ##  LTAnon
@ -222,9 +217,13 @@ class LTChar(LTItem, LTText):
            bur = (self.adv, ty+height)
        (a,b,c,d,e,f) = self.matrix
        self.upright = (0 < a*d*scaling and b*c <= 0)
-        bbox = (apply_matrix_pt(self.matrix, bll) +
-                apply_matrix_pt(self.matrix, bur))
-        LTItem.__init__(self, bbox)
+        (x0,y0) = apply_matrix_pt(self.matrix, bll)
+        (x1,y1) = apply_matrix_pt(self.matrix, bur)
+        if x1 < x0:
+            (x0,x1) = (x1,x0)
+        if y1 < y0:
+            (y0,y1) = (y1,y0)
+        LTItem.__init__(self, (x0,y0,x1,y1))
        if self.font.is_vertical():
            self.size = self.width
        else:
@ -236,11 +235,12 @@ class LTChar(LTItem, LTText):
            return ('<%s %s matrix=%s font=%r fontsize=%.1f adv=%s text=%r>' %
                    (self.__class__.__name__, bbox2str(self.bbox), 
                     matrix2str(self.matrix), self.font, self.fontsize,
-                     self.adv, self.get_text()))
+                     self.adv, self.text))
        else:
            return '<char %r>' % self.text

    def is_compatible(self, obj):
+        """Returns True if two characters can coexist in the same line."""
        return True

    
@ -248,81 +248,80 @@ class LTChar(LTItem, LTText):
 ##
 class LTContainer(LTItem):

-    def __init__(self, objs=None, bbox=(0,0,0,0)):
+    def __init__(self, bbox):
        LTItem.__init__(self, bbox)
-        if objs:
-            self._objs = objs[:]
-        else:
-            self._objs = []
+        self._objs = []
        return

    def __iter__(self):
-        return iter(self.get_objs())
+        return iter(self._objs)

    def __len__(self):
-        return len(self.get_objs())
+        return len(self._objs)

    def add(self, obj):
        self._objs.append(obj)
        return

-    def merge(self, container):
-        self._objs.extend(container._objs)
+    def extend(self, objs):
+        for obj in objs:
+            self.add(obj)
        return

-    def get_objs(self):
-        return self._objs

-    # fixate(): determines its boundery.
-    def fixate(self):
-        if not self.width and self._objs:
-            (bx0, by0, bx1, by1) = (INF, INF, -INF, -INF)
-            for obj in self._objs:
-                bx0 = min(bx0, obj.x0)
-                by0 = min(by0, obj.y0)
-                bx1 = max(bx1, obj.x1)
-                by1 = max(by1, obj.y1)
-            self.set_bbox((bx0, by0, bx1, by1))
+##  LTExpandableContainer
+##
+class LTExpandableContainer(LTContainer):
+
+    def __init__(self):
+        LTContainer.__init__(self, (+INF,+INF,-INF,-INF))
        return

+    def add(self, obj):
+        LTContainer.add(self, obj)
+        self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0),
+                       max(self.x1, obj.x1), max(self.y1, obj.y1)))
+        return
+
+    def finish(self):
+        return self
+

 ##  LTTextLine
 ##
-class LTTextLine(LTContainer, LTText):
+class LTTextLine(LTExpandableContainer, LTText):

-    def __init__(self, laparams=None):
-        self.laparams = laparams
-        LTContainer.__init__(self)
+    def __init__(self, word_margin):
+        LTExpandableContainer.__init__(self)
+        self.word_margin = word_margin
        return

    def __repr__(self):
        return ('<%s %s %r>' %
-                (self.__class__.__name__, bbox2str(self.bbox),
-                 self.get_text()))
+                (self.__class__.__name__, bbox2str(self.bbox), self.text))

-    def get_text(self):
-        return ''.join( obj.text for obj in self.get_objs() if isinstance(obj, LTText) )
+    def finish(self):
+        LTContainer.add(self, LTAnon('\n'))
+        self.text = ''.join( obj.text for obj in self if isinstance(obj, LTText) )
+        return LTExpandableContainer.finish(self)

    def find_neighbors(self, plane, ratio):
        raise NotImplementedError

 class LTTextLineHorizontal(LTTextLine):

-    def get_objs(self):
-        if self.laparams is None:
-            for obj in self._objs:
-                yield obj
-            return
-        word_margin = self.laparams.word_margin
-        x1 = INF
-        for obj in csort(self._objs, key=lambda obj: obj.x0):
-            if isinstance(obj, LTChar) and word_margin:
-                margin = word_margin * obj.width
-                if x1 < obj.x0-margin:
-                    yield LTAnon(' ')
-            yield obj
-            x1 = obj.x1
-        yield LTAnon('\n')
+    def __init__(self, word_margin):
+        LTTextLine.__init__(self, word_margin)
+        self._x1 = +INF
+        return
+
+    def add(self, obj):
+        if isinstance(obj, LTChar) and self.word_margin:
+            margin = self.word_margin * obj.width
+            if self._x1 < obj.x0-margin:
+                LTContainer.add(self, LTAnon(' '))
+        self._x1 = obj.x1
+        LTTextLine.add(self, obj)
        return

    def find_neighbors(self, plane, ratio):
@ -332,21 +331,18 @@ class LTTextLineHorizontal(LTTextLine):
    
 class LTTextLineVertical(LTTextLine):

-    def get_objs(self):
-        if self.laparams is None:
-            for obj in self._objs:
-                yield obj
-            return
-        word_margin = self.laparams.word_margin
-        y0 = -INF
-        for obj in csort(self._objs, key=lambda obj: -obj.y1):
-            if isinstance(obj, LTChar) and word_margin:
-                margin = word_margin * obj.height
-                if obj.y1+margin < y0:
-                    yield LTAnon(' ')
-            yield obj
-            y0 = obj.y0
-        yield LTAnon('\n')
+    def __init__(self, word_margin):
+        LTTextLine.__init__(self, word_margin)
+        self._y0 = -INF
+        return
+
+    def add(self, obj):
+        if isinstance(obj, LTChar) and self.word_margin:
+            margin = self.word_margin * obj.height
+            if obj.y1+margin < self._y0:
+                LTContainer.add(self, LTAnon(' '))
+        self._y0 = obj.y0
+        LTTextLine.add(self, obj)
        return
        
    def find_neighbors(self, plane, ratio):
@ -360,110 +356,84 @@ class LTTextLineVertical(LTTextLine):
 ##  A set of text objects that are grouped within
 ##  a certain rectangular area.
 ##
-class LTTextBox(LTContainer):
+class LTTextBox(LTExpandableContainer):

-    def __init__(self, objs):
-        LTContainer.__init__(self, objs=objs)
+    def __init__(self):
+        LTExpandableContainer.__init__(self)
        self.index = None
        return

    def __repr__(self):
        return ('<%s(%s) %s %r...>' %
                (self.__class__.__name__, self.index,
-                 bbox2str(self.bbox), self.get_text()[:20]))
+                 bbox2str(self.bbox), self.text[:20]))

-    def get_text(self):
-        return ''.join( obj.get_text() for obj in self.get_objs() if isinstance(obj, LTTextLine) )
+    def finish(self):
+        self.text = ''.join( obj.text for obj in self if isinstance(obj, LTTextLine) )
+        return LTExpandableContainer.finish(self)

 class LTTextBoxHorizontal(LTTextBox):
    
-    def get_objs(self):
-        return csort(self._objs, key=lambda obj: -obj.y1)
+    def finish(self):
+        self._objs = csort(self._objs, key=lambda obj: -obj.y1)
+        return LTTextBox.finish(self)

 class LTTextBoxVertical(LTTextBox):

-    def get_objs(self):
-        return csort(self._objs, key=lambda obj: -obj.x1)
+    def finish(self):
+        self._objs = csort(self._objs, key=lambda obj: -obj.x1)
+        return LTTextBox.finish(self)


 ##  LTTextGroup
 ##
-class LTTextGroup(LTContainer):
+class LTTextGroup(LTExpandableContainer):

    def __init__(self, objs):
-        LTContainer.__init__(self, objs=objs)
-        LTContainer.fixate(self)
+        LTExpandableContainer.__init__(self)
+        self.extend(objs)
        return

 class LTTextGroupLRTB(LTTextGroup):
    
-    def get_objs(self):
+    def finish(self):
        # reorder the objects from top-left to bottom-right.
-        return csort(self._objs, key=lambda obj: obj.x0+obj.x1-(obj.y0+obj.y1))
+        self._objs = csort(self._objs, key=lambda obj: obj.x0+obj.x1-(obj.y0+obj.y1))
+        return LTTextGroup.finish(self)

 class LTTextGroupTBRL(LTTextGroup):
    
-    def get_objs(self):
+    def finish(self):
        # reorder the objects from top-right to bottom-left.
-        return csort(self._objs, key=lambda obj: -(obj.x0+obj.x1)-(obj.y0+obj.y1))
+        self._objs = csort(self._objs, key=lambda obj: -(obj.x0+obj.x1)-(obj.y0+obj.y1))
+        return LTTextGroup.finish(self)


-##  Plane
+##  LTLayoutContainer
 ##
-##  A data structure for objects placed on a plane.
-##  Can efficiently find objects in a certain rectangular area.
-##  It maintains two parallel lists of objects, each of
-##  which is sorted by its x or y coordinate.
-##
-class Plane(object):
+class LTLayoutContainer(LTContainer):

-    def __init__(self, objs):
-        self.xobjs = []
-        self.yobjs = []
-        self.idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
-        for obj in objs:
-            self.place(obj)
-        self.xobjs.sort()
-        self.yobjs.sort()
+    def __init__(self, bbox, laparams=None):
+        LTContainer.__init__(self, bbox)
+        self.laparams = laparams
+        self.layout = None
        return
        
-    # place(obj): place an object in a certain area.
-    def place(self, obj):
-        assert isinstance(obj, LTItem)
-        self.xobjs.append((obj.x0, obj))
-        self.xobjs.append((obj.x1, obj))
-        self.yobjs.append((obj.y0, obj))
-        self.yobjs.append((obj.y1, obj))
-        return
-
-    # find(): finds objects that are in a certain area.
-    def find(self, (x0,y0,x1,y1)):
-        i0 = bsearch(self.xobjs, x0)[0]
-        i1 = bsearch(self.xobjs, x1)[1]
-        xobjs = set( obj for (_,obj) in self.xobjs[i0:i1] )
-        i0 = bsearch(self.yobjs, y0)[0]
-        i1 = bsearch(self.yobjs, y1)[1]
-        yobjs = set( obj for (_,obj) in self.yobjs[i0:i1] )
-        xobjs.intersection_update(yobjs)
-        return sorted(xobjs, key=lambda obj: self.idxs[obj])
-
-
-##  LTAnalyzer
-##
-class LTAnalyzer(LTContainer):
-
-    def analyze(self, laparams=None):
+    def finish(self):
        """Perform the layout analysis."""
-        if laparams is None: return
+        if self.laparams is None: return
        # textobjs is a list of LTChar objects, i.e.
        # it has all the individual characters in the page.
-        (textobjs, otherobjs) = self.get_textobjs(self._objs, laparams)
+        (textobjs, otherobjs) = self.get_textobjs(self._objs)
        if not textobjs: return
-        textlines = list(self.get_textlines(textobjs, laparams))
-        assert sum( len(line._objs) for line in textlines ) == len(textobjs)
-        textboxes = list(self.get_textboxes(textlines, laparams))
-        assert sum( len(box._objs) for box in textboxes ) == len(textlines)
-        top = self.group_textboxes(textboxes, laparams)
+        textlines = list(self.get_textlines(textobjs,
+                                            self.laparams.line_overlap,
+                                            self.laparams.char_margin,
+                                            self.laparams.word_margin))
+        assert len(textobjs) <= sum( len(line._objs) for line in textlines )
+        textboxes = list(self.get_textboxes(textlines, self.laparams.line_margin))
+        assert len(textlines) == sum( len(box._objs) for box in textboxes )
+        top = self.group_textboxes(textboxes)
        def assign_index(obj, i):
            if isinstance(obj, LTTextBox):
                obj.index = i
@ -476,9 +446,9 @@ class LTAnalyzer(LTContainer):
        textboxes.sort(key=lambda box:box.index)
        self._objs = textboxes + otherobjs
        self.layout = top
-        return
+        return self

-    def get_textobjs(self, objs, laparams):
+    def get_textobjs(self, objs):
        """Split all the objects in the page into text-related objects and others."""
        textobjs = []
        otherobjs = []
@ -489,15 +459,15 @@ class LTAnalyzer(LTContainer):
                otherobjs.append(obj)
        return (textobjs, otherobjs)

-    def get_textlines(self, objs, laparams):
+    def get_textlines(self, objs, line_overlap, char_margin, word_margin):
        obj0 = None
        line = None
        for obj1 in objs:
            if obj0 is not None:
                k = 0
                if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and 
-                    min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1) and
-                    obj0.hdistance(obj1) < min(obj0.width, obj1.width) * laparams.char_margin):
+                    min(obj0.height, obj1.height) * line_overlap < obj0.voverlap(obj1) and
+                    obj0.hdistance(obj1) < max(obj0.width, obj1.width) * char_margin):
                    # obj0 and obj1 is horizontally aligned:
                    #
                    #   +------+ - - -
@ -510,8 +480,8 @@ class LTAnalyzer(LTContainer):
                    #        (char_margin)
                    k |= 1
                if (obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and 
-                    min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
-                    obj0.vdistance(obj1) < min(obj0.height, obj1.height) * laparams.char_margin):
+                    min(obj0.width, obj1.width) * line_overlap < obj0.hoverlap(obj1) and
+                    obj0.vdistance(obj1) < max(obj0.height, obj1.height) * char_margin):
                    # obj0 and obj1 is vertically aligned:
                    #
                    #   +------+
@ -531,59 +501,59 @@ class LTAnalyzer(LTContainer):
                     (k & 2 and isinstance(line, LTTextLineVertical)) ):
                    line.add(obj1)
                elif line is not None:
-                    line.fixate()
-                    yield line
+                    yield line.finish()
                    line = None
                else:
                    if k == 2:
-                        line = LTTextLineVertical(laparams)
+                        line = LTTextLineVertical(word_margin)
                        line.add(obj0)
                        line.add(obj1)
                    elif k == 1:
-                        line = LTTextLineHorizontal(laparams)
+                        line = LTTextLineHorizontal(word_margin)
                        line.add(obj0)
                        line.add(obj1)
                    else:
-                        line = LTTextLineHorizontal(laparams)
+                        line = LTTextLineHorizontal(word_margin)
                        line.add(obj0)
-                        line.fixate()
-                        yield line
+                        yield line.finish()
                        line = None
            obj0 = obj1
        if line is None:
-            line = LTTextLineHorizontal(laparams)
+            line = LTTextLineHorizontal(word_margin)
            line.add(obj0)
-        line.fixate()
-        yield line
+        yield line.finish()
        return

-    def get_textboxes(self, lines, laparams):
+    def get_textboxes(self, lines, line_margin):
        plane = Plane(lines)
-        groups = {}
        for line in lines:
-            neighbors = line.find_neighbors(plane, laparams.line_margin)
+            plane.add(line)
+        plane.finish()
+        boxes = {}
+        for line in lines:
+            neighbors = line.find_neighbors(plane, line_margin)
            assert line in neighbors, line
-            members = neighbors[:]
+            members = []
            for obj1 in neighbors:
-                if obj1 in groups:
-                    members.extend(groups.pop(obj1))
-            members = list(uniq(members))
+                members.append(obj1)
+                if obj1 in boxes:
+                    members.extend(boxes.pop(obj1))
            if isinstance(line, LTTextLineHorizontal):
-                group = LTTextBoxHorizontal(members)
+                box = LTTextBoxHorizontal()
            else:
-                group = LTTextBoxVertical(members)
-            for obj in members:
-                groups[obj] = group
+                box = LTTextBoxVertical()
+            for obj in uniq(members):
+                box.add(obj)
+                boxes[obj] = box
        done = set()
        for line in lines:
-            group = groups[line]
-            if group in done: continue
-            done.add(group)
-            group.fixate()
-            yield group
+            box = boxes[line]
+            if box in done: continue
+            done.add(box)
+            yield box.finish()
        return

-    def group_textboxes(self, textboxes, laparams):
+    def group_textboxes(self, boxes):
        def dist(obj1, obj2):
            """A distance function between two TextBoxes.
            
@ -599,43 +569,44 @@ class LTAnalyzer(LTContainer):
            return ((max(obj1.x1,obj2.x1) - min(obj1.x0,obj2.x0)) * 
                    (max(obj1.y1,obj2.y1) - min(obj1.y0,obj2.y0)) -
                    (obj1.width*obj1.height + obj2.width*obj2.height))
-        textboxes = textboxes[:]
+        boxes = boxes[:]
        # XXX this is slow when there're many textboxes.
-        while 2 <= len(textboxes):
+        while 2 <= len(boxes):
            mindist = INF
            minpair = None
-            textboxes = csort(textboxes, key=lambda obj: obj.width*obj.height)
-            for i in xrange(len(textboxes)):
-                for j in xrange(i+1, len(textboxes)):
-                    (obj1, obj2) = (textboxes[i], textboxes[j])
+            boxes = csort(boxes, key=lambda obj: obj.width*obj.height)
+            for i in xrange(len(boxes)):
+                for j in xrange(i+1, len(boxes)):
+                    (obj1, obj2) = (boxes[i], boxes[j])
                    d = dist(obj1, obj2)
                    if d < mindist:
                        mindist = d
                        minpair = (obj1, obj2)
            assert minpair
            (obj1, obj2) = minpair
-            textboxes.remove(obj1)
-            textboxes.remove(obj2)
-            if isinstance(obj1, LTTextBoxHorizontal):
-                group = LTTextGroupLRTB([obj1, obj2])
-            else:
+            boxes.remove(obj1)
+            boxes.remove(obj2)
+            if (isinstance(obj1, LTTextBoxVertical) or
+                isinstance(obj1, LTTextGroupTBRL)):
                group = LTTextGroupTBRL([obj1, obj2])
-            textboxes.append(group)
-        assert len(textboxes) == 1
-        return textboxes.pop()
+            else:
+                group = LTTextGroupLRTB([obj1, obj2])
+            boxes.append(group.finish())
+        assert len(boxes) == 1
+        return boxes.pop()
    

 ##  LTFigure
 ##
-class LTFigure(LTAnalyzer):
+class LTFigure(LTLayoutContainer):

-    def __init__(self, name, bbox, matrix):
+    def __init__(self, name, bbox, matrix, laparams=None):
        self.name = name
        self.matrix = matrix
        (x,y,w,h) = bbox
        bbox = get_bound( apply_matrix_pt(matrix, (p,q))
                          for (p,q) in ((x,y), (x+w,y), (x,y+h), (x+w,y+h)) )
-        LTAnalyzer.__init__(self, bbox=bbox)
+        LTLayoutContainer.__init__(self, bbox, laparams=laparams)
        return

    def __repr__(self):
@ -643,21 +614,19 @@ class LTFigure(LTAnalyzer):
                (self.__class__.__name__, self.name,
                 bbox2str(self.bbox), matrix2str(self.matrix)))

-    def analyze(self, laparams=None):
-        if laparams is not None and laparams.all_texts:
-            LTAnalyzer.analyze(self, laparams=laparams)
-        return
+    def finish(self):
+        if self.laparams is None or not self.laparams.all_texts: return
+        return LTLayoutContainer.finish(self)


 ##  LTPage
 ##
-class LTPage(LTAnalyzer):
+class LTPage(LTLayoutContainer):

-    def __init__(self, pageid, bbox, rotate=0):
-        LTAnalyzer.__init__(self, bbox=bbox)
+    def __init__(self, pageid, bbox, rotate=0, laparams=None):
+        LTLayoutContainer.__init__(self, bbox, laparams=laparams)
        self.pageid = pageid
        self.rotate = rotate
-        self.layout = None
        return

    def __repr__(self):
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@ -182,6 +182,54 @@ class ObjIdRange(object):
        return self.nobjs


+##  Plane
+##
+##  A data structure for objects placed on a plane.
+##  Can efficiently find objects in a certain rectangular area.
+##  It maintains two parallel lists of objects, each of
+##  which is sorted by its x or y coordinate.
+##
+class Plane(object):
+
+    def __init__(self, objs):
+        self._idxs = {}
+        self._xobjs = []
+        self._yobjs = []
+        return
+
+    def __repr__(self):
+        return ('<Plane objs=%r>' % list(self))
+
+    def __iter__(self):
+        return self._idxs.iterkeys()
+
+    # add(obj): place an object in a certain area.
+    def add(self, obj):
+        self._idxs[obj] = len(self._idxs)
+        self._xobjs.append((obj.x0, obj))
+        self._xobjs.append((obj.x1, obj))
+        self._yobjs.append((obj.y0, obj))
+        self._yobjs.append((obj.y1, obj))
+        return
+
+    # finish()
+    def finish(self):
+        self._xobjs.sort()
+        self._yobjs.sort()
+        return
+
+    # find(): finds objects that are in a certain area.
+    def find(self, (x0,y0,x1,y1)):
+        i0 = bsearch(self._xobjs, x0)[0]
+        i1 = bsearch(self._xobjs, x1)[1]
+        xobjs = set( obj for (_,obj) in self._xobjs[i0:i1] )
+        i0 = bsearch(self._yobjs, y0)[0]
+        i1 = bsearch(self._yobjs, y1)[1]
+        yobjs = set( obj for (_,obj) in self._yobjs[i0:i1] )
+        xobjs.intersection_update(yobjs)
+        return sorted(xobjs, key=lambda obj: self._idxs[obj])
+
+
 # create_bmp
 def create_bmp(data, bits, width, height):
    info = pack('<IiiHHIIIIII', 40, width, height, 1, bits, 0, len(data), 0, 0, 0, 0)