diff --git a/TODO b/TODO
new file mode 100644
index 0000000..7658243
--- /dev/null
+++ b/TODO
@@ -0,0 +1,9 @@
+TODO:
+  - Code Documentation.
+  - Error handling for invalid type.
+
+  - Outlines.
+  - Named Objects. (pages)
+  - Writers.
+  - Linearized PDF.
+  - Encryption?
diff --git a/arcfour.py b/arcfour.py
new file mode 100755
index 0000000..c9c13a8
--- /dev/null
+++ b/arcfour.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+#
+#  Arcfour implementation
+#  * public domain *
+#
+
+class Arcfour:
+  
+  def __init__(self, key):
+    s = range(256)
+    j = 0
+    klen = len(key)
+    for i in xrange(256):
+      j = (j + s[i] + ord(key[i % klen])) % 256
+      (s[i], s[j]) = (s[j], s[i])
+    self.s = s
+    (self.i, self.j) = (0, 0)
+    return
+
+  def process(self, data):
+    (i, j) = (self.i, self.j)
+    s = self.s
+    r = ''
+    for c in data:
+      i = (i+1) % 256
+      j = (j+s[i]) % 256
+      (s[i], s[j]) = (s[j], s[i])
+      k = s[(s[i]+s[j]) % 256]
+      r += chr(ord(c) ^ k)
+    (self.i, self.j) = (i, j)
+    return r
+
+if __name__ == '__main__':
+  def doit(key, data):
+    cipher = Arcfour(key)
+    return ''.join( '%02X' % ord(c) for c in cipher.process(data) )
+  assert doit("Key", "Plaintext") == 'BBF316E8D940AF0AD3'
+  assert doit("Wiki", "pedia") == '1021BF0420'
+  assert doit("Secret", "Attack at dawn") == '45A01F645FC35B383552544B9BF5'
+  print 'test succeeded'
diff --git a/extent.py b/extent.py
index 67e005d..74d08b7 100755
--- a/extent.py
+++ b/extent.py
@@ -20,6 +20,9 @@ class Rect:
       self.y1 = y0+h
     return
 
+  def __repr__(self):
+    return '<Rect: (%d,%d)-(%d,%d)>' % (self.x0, self.y0, self.x1, self.y1)
+
   def overlap(self, rect):
     return not (rect.x1 <= self.x0 or self.x1 <= rect.x0 or
                 rect.y1 <= self.y0 or self.y1 <= rect.y0)
@@ -31,7 +34,7 @@ class ExtSet:
   
   def __init__(self, gridsize):
     self.gridsize = gridsize
-    self.grid = []
+    self.grid = {}
     return
   
   def cells(self, x0, x1):
@@ -45,13 +48,19 @@ class ExtSet:
   
   def add(self, x0, x1, obj):
     for i in self.cells(x0, x1):
-      self.grid[i].append(obj)
+      if i not in self.grid:
+        a = []
+        self.grid[i] = a
+      else:
+        a = self.grid[i]
+      a.append(obj)
     return
   
   def get(self, x0, x1):
     objs = set()
     for i in self.cells(x0, x1):
-      objs.update(self.grid[i])
+      if i in self.grid:
+        objs.update(self.grid[i])
     return objs
 
 def test_extset():
@@ -78,12 +87,13 @@ class ExtGrid:
     self.vext = ExtSet(gridsize)
     return
   
-  def add(self, rect):
-    self.hext.add(rect.x0, rect.x1, rect)
-    self.vext.add(rect.y0, rect.y1, rect)
+  def add(self, rect, obj):
+    self.hext.add(rect.x0, rect.x1, obj)
+    self.vext.add(rect.y0, rect.y1, obj)
     return
   
-  def get(self, rect):
-    rects = self.hext.get(rect.x0, rect.x1)
-    rects.update_intersect(self.vext.get(rect.y0, rect.y1))
-    return rects
+  def get(self, rect, getrect):
+    objs = self.hext.get(rect.x0, rect.x1)
+    objs.intersection_update(self.vext.get(rect.y0, rect.y1))
+    objs = [ obj for obj in objs if rect.overlap(getrect(obj)) ]
+    return objs
diff --git a/pdf2txt.py b/pdf2txt.py
index 9180067..a6bd115 100755
--- a/pdf2txt.py
+++ b/pdf2txt.py
@@ -7,86 +7,183 @@ from pdfinterp import PDFDevice, PDFResourceManager, \
      PDFPageInterpreter, PDFUnicodeNotDefined, \
      mult_matrix, apply_matrix
 from cmap import CMapDB
+from extent import Rect, ExtSet, ExtGrid
+
+
+##  PageItem
+##
+class PageItem:
+  
+  GRID_SIZE = 20
+  
+  def __init__(self, id, (x0,y0,x1,y1), rotate=0):
+    self.id = id
+    self.bbox = Rect(x0, y0, x1-x0, y1-y0)
+    self.rotate = rotate
+    self.grid = ExtGrid(self.GRID_SIZE)
+    self.objs = []
+    return
+  
+  def __repr__(self):
+    bbox = self.bbox
+    return ('<page id=%r bbox="%d,%d,%d,%d" rotate="%d">' %
+            (self.id, bbox.x0,bbox.y0,bbox.x1,bbox.y1, self.rotate))
+  
+  def add(self, obj):
+    self.objs.append(obj)
+    self.grid.add(obj.bbox, obj)
+    return
+  
+  def dump(self, outfp, codec):
+    outfp.write(repr(self)+'\n')
+    for obj in self.objs:
+      obj.dump(outfp, codec)
+    outfp.write('</page>\n')
+    return
+
+  def fuse(self):
+    for obj1 in self.objs:
+      f = (lambda obj: obj.bbox)
+      for rect in obj1.search_range():
+        neighbors = [ obj2 for obj2 in self.grid.get(rect, f) if obj2 is not obj1 ]
+        #print obj1.bbox, obj1.text.encode('euc-jp','ignore'), rect, [ obj.bbox for obj in neighbors ]
+    return
+
+
+##  FigureItem
+##
+class FigureItem(PageItem):
+  
+  def __repr__(self):
+    bbox = self.bbox
+    return ('<figure id=%r bbox="%d,%d,%d,%d">' %
+            (self.id, bbox.x0,bbox.y0,bbox.x1,bbox.y1))
+  
+  def dump(self, outfp, codec):
+    outfp.write(repr(self)+'\n')
+    for obj in self.objs:
+      obj.dump(outfp, codec)
+    outfp.write('</figure>\n')
+    return
+
+  def search_range(self):
+    return []
+
+
+##  TextItem
+##
+class TextItem:
+  
+  def __init__(self, matrix, font, size, width, text):
+    self.matrix = matrix
+    self.font = font
+    (a,b,c,d,tx,ty) = self.matrix
+    (self.width, self.size) = apply_matrix((a,b,c,d,0,0), (width,size))
+    self.width = abs(self.width)
+    self.origin = (tx,ty)
+    self.direction = 0
+    if not self.font.is_vertical():
+      self.direction = 1
+      (_,ascent) = apply_matrix((a,b,c,d,0,0), (0,font.ascent*size*0.001))
+      (_,descent) = apply_matrix((a,b,c,d,0,0), (0,font.descent*size*0.001))
+      self.bbox = Rect(tx, ty+descent, self.width, self.size)
+    else:
+      self.direction = 2
+      mindisp = min( d for (d,_) in text )
+      (mindisp,_) = apply_matrix((a,b,c,d,0,0), (mindisp*size*0.001,0))
+      self.bbox = Rect(tx-mindisp, ty+self.width, self.size, self.width)
+    self.text = ''.join( c for (_,c) in text )
+    return
+  
+  def __repr__(self):
+    return ('<text matrix=%r font=%r size=%r width=%r text=%r>' %
+            (self.matrix, self.font, self.size, self.width, self.text))
+  
+  def dump(self, outfp, codec):
+    (a,b,c,d,tx,ty) = self.matrix
+    outfp.write('<text x="%.3f" y="%.3f" font=%r size="%.3f" width="%.3f">' %
+                (tx, ty, self.font.fontname, self.size, self.width))
+    outfp.write(self.text.encode(codec, 'xmlcharrefreplace'))
+    outfp.write('</text>\n')
+    return
+
+  def search_range(self):
+    if self.direction == 1:
+      return [ Rect(self.bbox.x1, self.bbox.y0, self.size, self.size) ]
+    else:
+      return [ Rect(self.bbox.x0, self.bbox.y0-self.size, self.size, self.size) ]
 
 
 ##  TextConverter
 ##
 class TextConverter(PDFDevice):
 
-  def __init__(self, outfp, rsrc, codec, debug=0):
+  def __init__(self, rsrc, debug=0):
     PDFDevice.__init__(self, rsrc, debug=debug)
-    self.outfp = outfp
-    self.codec = codec
-    return
-
-  def close(self):
-    self.outfp.write('\n')
+    self.pages = []
+    self.stack = []
     return
 
   def begin_page(self, page):
-    (x0,y0,x1,y1) = page.mediabox
-    self.outfp.write('<page id="%d" mediabox="%d,%d,%d,%d" rotate="%d">' %
-                     (page.pageid, x0,y0,x1,y1, page.rotate))
+    self.context = PageItem(str(page.pageid), page.mediabox, page.rotate)
     return
   def end_page(self, _):
-    self.outfp.write('</page>\n')
+    assert not self.stack
+    self.pages.append(self.context)
     return
 
   def begin_figure(self, name, bbox):
-    (x0,y0,x1,y1) = bbox
-    self.outfp.write('<figure name="%s" bbox="%d,%d,%d,%d">\n' %
-                     (name, x0,y0,x1,y1))
+    self.stack.append(self.context)
+    self.context = FigureItem(name, bbox)
     return
   def end_figure(self, _):
-    self.outfp.write('</figure>\n')
+    fig = self.context
+    self.context = self.stack.pop()
+    self.context.add(fig)
     return
 
   def handle_undefined_char(self, cidcoding, cid):
     if self.debug:
       print >>stderr, 'undefined: %r, %r' % (cidcoding, cid)
     #return unichr(cid)
-    #return unichr(cid+32)
-    return
+    return None
 
   def render_string(self, textstate, textmatrix, size, seq):
     font = textstate.font
     spwidth = int(-font.char_width(32) * 0.6) # space width
-    buf = ''
+    text = []
     for x in seq:
       if isinstance(x, int) or isinstance(x, float):
         if not font.is_vertical() and x <= spwidth:
-          buf += ' '
+          text.append((0, ' '))
       else:
         chars = font.decode(x)
         for cid in chars:
           try:
             char = font.to_unicode(cid)
-            buf += char
+            text.append((font.char_disp(cid), char))
           except PDFUnicodeNotDefined, e:
             (cidcoding, cid) = e.args
             s = self.handle_undefined_char(cidcoding, cid)
             if s:
-              buf += s
-    (a,b,c,d,tx,ty) = mult_matrix(textmatrix, self.ctm)
-    if font.is_vertical():
-      size = -size
-      tag = 'vtext'
-    else:
-      tag = 'htext'
-    if (b != 0 or c != 0 or a <= 0 or d <= 0):
-      tag += ' skewed'
-    s = buf.encode(self.codec, 'xmlcharrefreplace')
-    (w,fs) = apply_matrix((a,b,c,d,0,0), (size,textstate.fontsize))
-    def f(x): return '%.03f' % x
-    self.outfp.write('<%s font="%s" size="%s" x="%s" y="%s" w="%s">%s</%s>\n' %
-                     (tag, font.fontname, f(fs), f(tx), f(ty), f(w), s, tag))
+              text.append(s)
+    item = TextItem(mult_matrix(textmatrix, self.ctm),
+                    font, textstate.fontsize, size, text)
+    self.context.add(item)
+    return
+
+  def dump(self, outfp, codec):
+    outfp.write('<document>\n')
+    for page in self.pages:
+      #page.fuse()
+      page.dump(outfp, codec)
+    outfp.write('</document>\n')
     return
 
 
 # pdf2txt
 def pdf2txt(outfp, rsrc, fname, pages, codec, debug=0):
-  device = TextConverter(outfp, rsrc, codec, debug=debug)
-  outfp.write('<document>\n')
+  device = TextConverter(rsrc, debug=debug)
   doc = PDFDocument(debug=debug)
   fp = file(fname)
   parser = PDFParser(doc, fp, debug=debug)
@@ -95,7 +192,7 @@ def pdf2txt(outfp, rsrc, fname, pages, codec, debug=0):
     if pages and (i not in pages): continue
     interpreter.process_page(page)
   fp.close()
-  outfp.write('</document>\n')
+  device.dump(outfp, codec)
   device.close()
   return
 
diff --git a/pdfparser.py b/pdfparser.py
index e0c9df0..308db76 100755
--- a/pdfparser.py
+++ b/pdfparser.py
@@ -4,30 +4,30 @@
 #  ver 0.1, Dec 24 2004-
 #  ver 0.2, Dec 24 2007
 
-# TODO:
-#   - Code Documentation.
-#   - Error handling for invalid type.
-
-#   - Outlines.
-#   - Named Objects. (pages)
-#   - Writers.
-#   - Linearized PDF.
-#   - Encryption?
-
 import sys
+import md5, struct
 stderr = sys.stderr
 from utils import choplist, nunpack
+from arcfour import Arcfour
 from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
      PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
      literal_name, keyword_name, \
      PSStackParser, STRICT
 
 
+def decrypt_rc4(key, objid, genno, data):
+  key += struct.pack('<L',objid)[:3]+struct.pack('<L',genno)[:2]
+  hash = md5.md5(key)
+  key = hash.digest()[:min(len(key),16)]
+  return Arcfour(key).process(data)
+
+
 ##  PDF Exceptions
 ##
 class PDFException(PSException): pass
 class PDFSyntaxError(PDFException): pass
-class PDFEncrypted(PDFException): pass
+class PDFEncryptionError(PDFException): pass
+class PDFPasswordIncorrect(PDFEncryptionError): pass
 class PDFTypeError(PDFException): pass
 class PDFValueError(PDFException): pass
 
@@ -38,6 +38,7 @@ LITERAL_XREF = PSLiteralTable.intern('XRef')
 LITERAL_PAGE = PSLiteralTable.intern('Page')
 LITERAL_PAGES = PSLiteralTable.intern('Pages')
 LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
+LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
 LITERAL_FLATE_DECODE = PSLiteralTable.intern('FlateDecode')
 KEYWORD_R = PSKeywordTable.intern('R')
 KEYWORD_OBJ = PSKeywordTable.intern('obj')
@@ -45,6 +46,7 @@ KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
 KEYWORD_STREAM = PSKeywordTable.intern('stream')
 KEYWORD_XREF = PSKeywordTable.intern('xref')
 KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
+PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
 
 
 ##  PDFObjRef
@@ -77,7 +79,7 @@ def resolve1(x):
     x = x.resolve()
   return x
 
-def resolveall(x):
+def resolve_all(x):
   '''
   Recursively resolve X and all the internals.
   Make sure there is no indirect reference within the nested object.
@@ -86,10 +88,23 @@ def resolveall(x):
   while isinstance(x, PDFObjRef):
     x = x.resolve()
   if isinstance(x, list):
-    x = [ resolveall(v) for v in x ]
+    x = [ resolve_all(v) for v in x ]
   elif isinstance(x, dict):
     for (k,v) in x.iteritems():
-      x[k] = resolveall(v)
+      x[k] = resolve_all(v)
+  return x
+
+def decipher_all(decipher, objid, genno, x):
+  '''
+  Recursively decipher X.
+  '''
+  if isinstance(x, str):
+    return decipher(objid, genno, x)
+  if isinstance(x, list):
+    x = [ decipher_all(decipher, objid, genno, v) for v in x ]
+  elif isinstance(x, dict):
+    for (k,v) in x.iteritems():
+      x[k] = decipher_all(decipher, objid, genno, v)
   return x
 
 # Type cheking
@@ -159,6 +174,13 @@ class PDFStream:
     self.rawdata = rawdata
     self.decipher = decipher
     self.data = None
+    self.objid = None
+    self.genno = None
+    return
+
+  def set_objid(self, objid, genno):
+    self.objid = objid
+    self.genno = genno
     return
   
   def __repr__(self):
@@ -168,7 +190,7 @@ class PDFStream:
     assert self.data == None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
-      data = self.decipher(data)
+      data = self.decipher(self.objid, self.genno, data)
     if 'Filter' not in self.dic:
       self.data = data
       self.rawdata = None
@@ -201,6 +223,8 @@ class PDFStream:
               buf += ent1
               ent0 = ent1
             data = buf
+      if f == LITERAL_CRYPT:
+        raise PDFEncryptionError
       else:
         if STRICT:
           raise PDFValueError('Invalid filter spec: %r' % f)
@@ -338,10 +362,11 @@ class PDFDocument:
     self.xrefs = []
     self.objs = {}
     self.parsed_objs = {}
-    self.decipher = None
     self.root = None
     self.catalog = None
     self.parser = None
+    self.encryption = None
+    self.decipher = None
     return
 
   def set_parser(self, parser):
@@ -351,20 +376,74 @@ class PDFDocument:
     for xref in self.xrefs:
       trailer = xref.trailer
       if 'Encrypt' in trailer:
-        raise PDFEncrypted
-        param = dict_value(trailer['Encrypt'])
-        self.decipher = DECRYPTOR(param)
-        self.parser.strfilter = self.decipher
+        self.encryption = (list_value(trailer['ID']),
+                           dict_value(trailer['Encrypt']))
       if 'Root' in trailer:
         self.set_root(dict_value(trailer['Root']))
         break
     else:
       raise PDFValueError('no /Root object!')
+    if self.encryption:
+      self.prepare_cipher()
+    return
+
+  def prepare_cipher(self, password=''):
+    (docid, param) = self.encryption
+    if literal_name(param['Filter']) != 'Standard':
+      raise PDFEncryptionError('unknown filter: param=%r' % param)
+    V = int_value(param.get('V', 0))
+    if not (V == 1 or V == 2):
+      raise PDFEncryptionError('unknown algorithm: param=%r' % param)
+    length = int_value(param.get('Length', 40)) # Key length (bits)
+    O = str_value(param['O'])
+    R = int_value(param['R']) # Revision
+    if 5 <= R:
+      raise PDFEncryptionError('unknown revision: %r' % R)
+    U = str_value(param['U'])
+    P = int_value(param['P'])
+    is_printable = bool(P & 4)        
+    is_modifiable = bool(P & 8)
+    is_extractable = bool(P & 16)
+    # Algorithm 3.2
+    password = (password+PASSWORD_PADDING)[:32] # 1
+    hash = md5.md5(password) # 2
+    hash.update(O) # 3
+    hash.update(struct.pack('<L', P)) # 4
+    hash.update(docid[0]) # 5
+    if 4 <= R:
+      raise NotImplementedError # 6
+    if 3 <= R:
+      # 8
+      for _ in xrange(50):
+        hash = md5.md5(hash.digest()[:length/8])
+    key = hash.digest()[:length/8]
+    if R == 2:
+      # Algorithm 3.4
+      u1 = Arcfour(key).process(password)
+    elif R == 3:
+      # Algorithm 3.5
+      hash = md5.md5(PASSWORD_PADDING) # 2
+      hash.update(docid[0]) # 3
+      x = Arcfour(key).process(hash.digest()[:16]) # 4
+      for i in xrange(1,19+1):
+        k = ''.join( chr(c ^ i) for c in key )
+        x = Arcfour(k).process(x)
+      u1 = x+x # 32bytes total
+    else:
+      raise PDFEncryptionError('unknown revision: %r' % R)
+    if R == 2:
+      is_authenticated = (u1 == U)
+    else:
+      is_authenticated = (u1[:16] == U[:16])
+    if not is_authenticated:
+      raise PDFPasswordIncorrect
+    self.decipher = (lambda objid,genno,data: decrypt_rc4(key, objid, genno, data))
     return
 
   def getobj(self, objid):
     #assert self.xrefs
     if objid in self.objs:
+      genno = 0
       obj = self.objs[objid]
     else:
       for xref in self.xrefs:
@@ -400,18 +479,26 @@ class PDFDocument:
           except PSEOF:
             pass
           self.parsed_objs[stream] = objs
+        genno = 0
         obj = objs[stream.dic['N']*2+index]
+        if isinstance(obj, PDFStream):
+          obj.set_objid(objid, 0)
       else:
         self.parser.seek(index)
         (_,objid1) = self.parser.nextobject() # objid
-        (_,genno1) = self.parser.nextobject() # genno
+        (_,genno) = self.parser.nextobject() # genno
+        assert objid1 == objid
         (_,kwd) = self.parser.nextobject()
         if kwd != KEYWORD_OBJ:
           raise PDFSyntaxError('invalid obj spec: offset=%r' % index)
         (_,obj) = self.parser.nextobject()
+        if isinstance(obj, PDFStream):
+          obj.set_objid(objid, genno)
       if 2 <= self.debug:
         print >>stderr, 'register: objid=%r: %r' % (objid, obj)
       self.objs[objid] = obj
+    if self.decipher:
+      obj = decipher_all(self.decipher, objid, genno, obj)
     return obj
   
   def get_pages(self, debug=0):
diff --git a/samples/dmca.pdf b/samples/dmca.pdf
new file mode 100644
index 0000000..90d1522
Binary files /dev/null and b/samples/dmca.pdf differ
diff --git a/samples/f1040nr.pdf b/samples/f1040nr.pdf
new file mode 100644
index 0000000..2c0a6d0
Binary files /dev/null and b/samples/f1040nr.pdf differ
diff --git a/samples/i1040nr.pdf b/samples/i1040nr.pdf
new file mode 100644
index 0000000..7f9621e
Binary files /dev/null and b/samples/i1040nr.pdf differ
diff --git a/samples/kampo.pdf b/samples/kampo.pdf
new file mode 100644
index 0000000..b41689b
Binary files /dev/null and b/samples/kampo.pdf differ
diff --git a/samples/nlp2004slides.pdf b/samples/nlp2004slides.pdf
new file mode 100644
index 0000000..ba29cd0
Binary files /dev/null and b/samples/nlp2004slides.pdf differ