diff --git a/README.html b/README.html
index 035c865..2f0cd0b 100644
--- a/README.html
+++ b/README.html
@@ -17,7 +17,7 @@ Python PDF parser and analyzer
 
 <div align=right class=lastmod>
 <!-- hhmts start -->
-Last Modified: Wed Mar 25 20:59:18 JST 2009
+Last Modified: Sun Mar 29 19:09:46 JST 2009
 <!-- hhmts end -->
 </div>
 
@@ -202,6 +202,7 @@ This makes the word spacing correctly handled.
 <dd> Specifies the output format. The following formats are currently supported.
 <ul>
 <li> <code>html</code> : HTML format. (Default)
+<li> <code>text</code> : TEXT format.
 <li> <code>sgml</code> : SGML format.
 <li> <code>tag</code> : "Tagged PDF" format. A tagged PDF has its own contents annotated with
 HTML-like tags. pdf2txt tries to extract its content streams rather than inferring its text locations.
diff --git a/pdflib/cluster.py b/pdflib/cluster.py
index 2b49704..4ea3e9d 100644
--- a/pdflib/cluster.py
+++ b/pdflib/cluster.py
@@ -59,9 +59,9 @@ class Plane(object):
     return objs
 
 
-##  Clusters
+##  ClusterSet
 ##
-class Clusters(object):
+class ClusterSet(object):
 
   def __init__(self):
     self.clusters = {}
@@ -86,11 +86,12 @@ class Clusters(object):
 
 
 def cluster_pageobjs(objs, ratio):
+  idx = dict( (obj,i) for (i,obj) in enumerate(objs) )
   plane = Plane()
   for obj in objs:
     plane.add(obj.bbox, obj)
   plane.finish()
-  clusters = Clusters()
+  cset = ClusterSet()
   for obj in objs:
     (bx0,by0,bx1,by1) = obj.bbox
     margin = abs(obj.fontsize * ratio)
@@ -100,17 +101,26 @@ def cluster_pageobjs(objs, ratio):
     y1 = max(by0,by1)
     found = plane.find((x0-margin, y0-margin, x1+margin, y1+margin))
     if len(found) == 1:
-      clusters.add(found.pop())
+      cset.add(found.pop())
     else:
-      clusters.merge(found)
+      cset.merge(found)
+  clusters = sorted(cset.finish(), key=lambda objs: idx[objs[0]])
   r = []
-  for objs in clusters.finish():
+  for objs in clusters:
+    objs = sorted(objs, key=lambda obj: idx[obj])
+    h = v = 0
     (bx0,by0,bx1,by1) = objs[0].bbox
+    (lx0,ly0,_,_) = objs[0].bbox
     for obj in objs[1:]:
       (x0,y0,x1,y1) = obj.bbox
+      if len(obj.text) == 1 and abs(lx0-x0) < abs(ly0-y0):
+        v += 1
+      else:
+        h += 1
+      (lx0,ly0) = (x0,y0)
       bx0 = min(bx0, x0)
       bx1 = max(bx1, x1)
       by0 = min(by0, y0)
       by1 = max(by1, y1)
-    r.append(((bx0,by0,bx1,by1), objs))
+    r.append(((bx0,by0,bx1,by1), h < v, objs))
   return r
diff --git a/pdflib/pdf2txt.py b/pdflib/pdf2txt.py
index dd8cb81..ad72ef1 100755
--- a/pdflib/pdf2txt.py
+++ b/pdflib/pdf2txt.py
@@ -2,7 +2,7 @@
 import sys
 from pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
 from pdfinterp import PDFResourceManager, PDFPageInterpreter
-from pdfdevice import PDFDevice, FigureItem, TextItem, PDFPageAggregator
+from pdfdevice import PDFDevice, PageItem, FigureItem, TextItem, PDFPageAggregator
 from pdffont import PDFUnicodeNotDefined
 from cmap import CMapDB
 
@@ -15,6 +15,15 @@ def encprops(props, codec):
   if not props: return ''
   return ''.join( ' %s="%s"' % (enc(k,codec), enc(str(v),codec)) for (k,v) in sorted(props.iteritems()) )
 
+def get_textobjs(item, r=None):
+  if r == None: r = []
+  if isinstance(item, TextItem):
+    r.append(item)
+  elif isinstance(item, PageItem):
+    for child in item.objs:
+      get_textobjs(child, r)
+  return r
+
 
 ##  PDFConverter
 class PDFConverter(PDFPageAggregator):
@@ -73,7 +82,8 @@ class HTMLConverter(PDFConverter):
     page = PDFConverter.end_page(self, page)
     def f(item):
       if isinstance(item, FigureItem):
-        pass
+        for child in item.objs:
+          f(child)
       elif isinstance(item, TextItem):
         if item.direction == 2:
           wmode = 'tb-rl'
@@ -95,8 +105,8 @@ class HTMLConverter(PDFConverter):
     for child in page.objs:
       f(child)
     if self.cluster_margin:
-      textobjs = [ item for item in page.objs if isinstance(item, TextItem) ]
-      for ((x0,y0,x1,y1),objs) in cluster_pageobjs(textobjs, self.cluster_margin):
+      clusters = cluster_pageobjs(get_textobjs(page), self.cluster_margin)
+      for ((x0,y0,x1,y1),_,objs) in clusters:
         self.outfp.write('<span style="position:absolute; border: 1px solid red; '
                          'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % 
                        (x0*self.scale, (self.yoffset-y1)*self.scale, (x1-x0)*self.scale, (y1-y0)*self.scale))
@@ -114,7 +124,7 @@ class HTMLConverter(PDFConverter):
 ##
 class TextConverter(PDFConverter):
 
-  def __init__(self, rsrc, outfp, codec='utf-8', pagenum=True, cluster_margin=0.2, splitwords=False, hyphenation=True):
+  def __init__(self, rsrc, outfp, codec='utf-8', pagenum=True, cluster_margin=0.5, splitwords=False, hyphenation=True):
     PDFConverter.__init__(self, rsrc, outfp, codec=codec, splitwords=True)
     self.pagenum = pagenum
     self.cluster_margin = cluster_margin
@@ -125,15 +135,18 @@ class TextConverter(PDFConverter):
     from cluster import cluster_pageobjs
     page = PDFConverter.end_page(self, page)
     if self.cluster_margin:
-      textobjs = [ item for item in page.objs if isinstance(item, TextItem) ]
-      idx = dict( (obj,i) for (i,obj) in enumerate(textobjs) )
+      textobjs = get_textobjs(page)
       clusters = cluster_pageobjs(textobjs, self.cluster_margin)
-      clusters.sort(key=lambda (_,objs): idx[objs[0]])
-      for (_,objs) in clusters:
-        for item in sorted(objs, key=lambda obj:idx[obj]):
-          text = item.text
-          self.outfp.write(text.encode(self.codec, 'replace'))
-        self.outfp.write('\n')
+      for (_,vertical,objs) in clusters:
+        for (i,item) in enumerate(objs):
+          (x0,y0,x1,y1) = item.bbox
+          if (i and
+              ((not vertical and (y1 < ly0 or ly1 < y0)) or
+               (vertical and (x1 < lx0 or lx1 < x0)))):
+            self.outfp.write('\n')
+          (lx0,ly0,lx1,ly1) = (x0,y0,x1,y1)
+          self.outfp.write(item.text.encode(self.codec, 'replace'))
+        self.outfp.write('\n\n')
     else:
       for item in page.objs:
         if isinstance(item, TextItem):
@@ -243,7 +256,7 @@ def main(argv):
   codec = 'ascii'
   pagenos = set()
   maxpages = 0
-  outtype = 'text'
+  outtype = 'html'
   password = ''
   splitwords = False
   outfp = sys.stdout