diff --git a/Makefile b/Makefile
index 625ea39..5b53e22 100644
--- a/Makefile
+++ b/Makefile
@@ -30,15 +30,9 @@ commit: clean
 check:
 	cd $(PACKAGE) && make check
 
-sdist: clean
-	$(PYTHON) setup.py sdist
-
 register: clean
 	$(PYTHON) setup.py sdist upload register
 
-VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
-DISTFILE=$(PACKAGE)-$(VERSION).tar.gz
 WEBDIR=$$HOME/Site/unixuser.org/python/$(PACKAGE)
-publish: sdist
-	$(CP) dist/$(DISTFILE) $(WEBDIR)
-	$(CP) docs/*.html $(WEBDIR)/index.html
+publish:
+	$(CP) docs/*.html $(WEBDIR)
diff --git a/docs/miningpdf.html b/docs/miningpdf.html
new file mode 100644
index 0000000..7f2372f
--- /dev/null
+++ b/docs/miningpdf.html
@@ -0,0 +1,121 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<title>Mining PDF files</title>
+<style type="text/css"><!--
+blockquote { background: #eeeeee; }
+--></style>
+</head><body>
+
+<h1>Mining PDF files</h1>
+<p>
+
+<p>
+<a href="http://www.unixuser.org/~euske/python/pdfminer/index.html">Homepage</a>
+
+<div align=right class=lastmod>
+<!-- hhmts start -->
+Last Modified: Sat Nov 14 21:09:01 JST 2009
+<!-- hhmts end -->
+</div>
+
+<h2>What is PDF?</h2>
+<p>
+<h3>What PDF is ...</h3>
+<ul>
+<li> A weird mixture of texts and binaries. (Yikes!)
+<li> Generated sequentially, but needs random access to read.
+</ul>
+
+<h3>What PDF is not ...</h3>
+<ul>
+<li> Editable document format (like Word or HTML).
+<li> Nice for accessility point of view.
+</ul>
+
+<h2>Structure of PDF</h2>
+<p>
+From a data structure's point of view, PDF is a total mess in the
+computer history.  Originally, Adobe had a document format called
+PostScript (which is also more like "graphics" format rather than
+text format). It has nice graphic representation and is able to
+express commercial quality typesetting. However, it has to be for
+a specific printer and its file size tends to get bloated because
+almost everything is represented as text. PDF is Adobe's attempt
+to create a less printer dependent format with a reduced data size
+(that's why it was named "portable" document format). To some
+degree, PDF can be seen as a "compressed" version of PostScript
+with seekable index tables.  Since its drawing model and concepts
+(coordinations, color spaces, etc.) remains pretty much the same
+as its precedessor, Adobe decided to reuse the original PostScript
+notation partially in PDF. However, this eclectic position ended
+up with a disastrous situation.
+
+<h3>Format Disaster</h2>
+<p>
+When designing a data format, there are two different strategies:
+using text or using binary. They both have obvious merits and
+demerits.  The biggest merit of having textual representation is
+that they are human readable and can be modified with any text
+editor. The demerits of textual representation is its bloted size,
+especially if you want to put something like pictures and
+multimedia data like audio or video. Another demerit of textual
+representation is that you need a program to serialize/deserialize
+(parse) the data, which can be very complex and buggy. On the
+other hand, binary representation normally doesn't require a
+complex parser and takes much less space than texts. However,
+they're not readable for humans.  Now, Adobe decided to take the
+good parts from both worlds by making PDF a partially text and
+partially binary format, and as a result, PDF inherits the
+drawbacks of both worlds without having much of their merits, i.e.
+PDF is a human *unreadable* document format that still requires a
+complex and error-prone parser and has a bloated file size.
+<p>
+Adobe has been probably aware of this problem from early on, and
+they tried to fix this over years. So they gradually dropped text
+representations and more inclided toward binaries.  For example,
+in PDF specification 1.5, they introduce a new notation called
+"object stream" (which is different from a "stream object" that
+was already there in the specification).
+
+However, by this time there are already tons of PDFs that were
+produced by the original standard, which still requires every PDF
+viewer to support.
+
+<h2>Problem of Text Extraction from PDF Documents</h2>
+<p>
+Many people tend to think that a PDF document is somewhat similar
+to a Word or HTML document, which is not true. In fact, the primary
+focus of PDF is printing and showing on a computer display, so 
+it is extremely versatile for showing the details of "looks"
+of text typography, picture and graphics. All the texts in a PDF document is
+just a bunch of string objects floating at various locations on a
+blank slate. There is no text flow control and no contexual clue
+about its content, except few special "tagged" PDF documents with
+extra annotations that denote headlines or page boundaries, which
+require specialized tools to create.
+<p>
+(OpenOffice, for example, has ability to create tagged PDF
+documents.  But the degree of the annotations is varied depending
+on its implementation, and in many cases it is not possible to
+obtain the full layout information by only using tags.)
+<p>
+Besides tagged documents, PDF doesn't care the order of text
+strings rendered in a page.  You can completely jumble up every 
+piece of strings in a PDF and still make it look like a
+perfect document on the surface.  Even worse, PDF allows a word to
+be split in the middle and drawn as multiple unrelated strings in
+order to represent precise text positioning.  For example, a
+certain word processing software creates a PDF that splits a word
+"You" into two separate strings "Y" and "ou" because of the subtle
+kerning between the letters.
+<p>
+So there's a huge problem associated with extracting texts properly
+from PDF files. They require almost similar kinds of analysis
+to optical character recognition (OCR).
+
+
+<hr noshade>
+<address>Yusuke Shinyama</address>
+</body>
diff --git a/pdfminer/cmap.py b/pdfminer/cmap.py
index 695ad51..f80284c 100644
--- a/pdfminer/cmap.py
+++ b/pdfminer/cmap.py
@@ -15,7 +15,6 @@ import sys
 import re
 import os
 import os.path
-from sys import stderr
 from struct import pack, unpack
 from psparser import PSStackParser
 from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF
@@ -24,8 +23,7 @@ from psparser import literal_name, keyword_name
 from fontmetrics import FONT_METRICS
 from latin_enc import ENCODING
 from glyphlist import charname2unicode
-from utils import choplist
-from utils import nunpack
+from utils import choplist, nunpack
 try:
     import cdb
 except ImportError:
@@ -38,16 +36,19 @@ class CMapError(Exception): pass
 ##  find_cmap_path
 ##
 def find_cmap_path():
-    try:
-        return os.environ['CMAP_PATH']
-    except KeyError:
-        pass
-    basedir = os.path.dirname(__file__)
-    return os.path.join(basedir, 'CMap')
+    """Returns the location of CMap directory."""
+    for path in (os.environ['CMAP_PATH'],
+                 os.path.join(os.path.dirname(__file__), 'CMap')):
+        if os.path.isdir(path):
+            return path
+    raise IOError
 
 
+##  name2unicode
+##
 STRIP_NAME = re.compile(r'[0-9]+')
 def name2unicode(name):
+    """Converts Adobe glyph names to Unicode numbers."""
     if name in charname2unicode:
         return charname2unicode[name]
     m = STRIP_NAME.search(name)
@@ -97,7 +98,7 @@ class CMap(object):
 
     def decode(self, bytes):
         if self.debug:
-            print >>stderr, 'decode: %r, %r' % (self, bytes)
+            print >>sys.stderr, 'decode: %r, %r' % (self, bytes)
         x = ''
         for c in bytes:
             if x:
@@ -179,7 +180,7 @@ class CDBCMap(CMap):
 
     def decode(self, bytes):
         if self.debug:
-            print >>stderr, 'decode: %r, %r' % (self, bytes)
+            print >>sys.stderr, 'decode: %r, %r' % (self, bytes)
         x = ''
         for c in bytes:
             if x:
@@ -227,11 +228,11 @@ class CMapDB(object):
             cdbname = os.path.join(self.cdbdirname, cmapname+'.cmap.cdb')
             if os.path.exists(cdbname):
                 if 1 <= self.debug:
-                    print >>stderr, 'Opening: CDBCMap %r...' % cdbname
+                    print >>sys.stderr, 'Opening: CDBCMap %r...' % cdbname
                 cmap = CDBCMap(cdbname)
             elif os.path.exists(fname):
                 if 1 <= self.debug:
-                    print >>stderr, 'Reading: CMap %r...' % fname
+                    print >>sys.stderr, 'Reading: CMap %r...' % fname
                 cmap = CMap()
                 fp = file(fname, 'rb')
                 CMapParser(cmap, fp).run()
@@ -423,10 +424,11 @@ class EncodingDB(object):
 
 ##  CMap -> CMapCDB conversion
 ##
-def dumpcdb(cmap, cdbfile, verbose=1):
+def dump_cdb(cmap, cdbfile, verbose=1):
+    """Writes a CMap object into a cdb file."""
     m = cdb.cdbmake(cdbfile, cdbfile+'.tmp')
     if verbose:
-        print >>stderr, 'Writing: %r...' % cdbfile
+        print >>sys.stderr, 'Writing: %r...' % cdbfile
     for (k,v) in cmap.getall_attrs():
         m.add('/'+k, repr(v))
     for (code,cid) in cmap.getall_code2cid():
@@ -437,44 +439,55 @@ def dumpcdb(cmap, cdbfile, verbose=1):
     return
 
 def convert_cmap(cmapdir, outputdir, force=False):
+    """Convert all CMap source files in a directory into cdb files."""
     CMapDB.initialize(cmapdir)
     for fname in os.listdir(cmapdir):
         if '.' in fname: continue
         cmapname = os.path.basename(fname)
         cdbname = os.path.join(outputdir, cmapname+'.cmap.cdb')
         if not force and os.path.exists(cdbname):
-            print >>stderr, 'Skipping: %r' % cmapname
+            print >>sys.stderr, 'Skipping: %r' % cmapname
             continue
-        print >>stderr, 'Reading: %r...' % cmapname
+        print >>sys.stderr, 'Reading: %r...' % cmapname
         cmap = CMapDB.get_cmap(cmapname)
-        dumpcdb(cmap, cdbname)
+        dump_cdb(cmap, cdbname)
     return
 
 def main(argv):
+    """Converts CMap files into cdb files.
+
+    usage: python -m pdfminer.cmap [-f] [cmap_dir [output_dir]]
+    """
+    
     import getopt
     def usage():
-        print 'usage: %s [-D outputdir] [-f] cmap_dir' % argv[0]
+        print 'usage: %s [-f] [cmap_dir [output_dir]]' % argv[0]
         return 100
     try:
-        (opts, args) = getopt.getopt(argv[1:], 'C:D:f')
+        (opts, args) = getopt.getopt(argv[1:], 'f')
     except getopt.GetoptError:
         return usage()
     if args:
         cmapdir = args.pop(0)
     else:
-        cmapdir = find_cmap_path()
-    outputdir = cmapdir
+        try:
+            cmapdir = find_cmap_path()
+        except IOError:
+            print >>sys.stderr, 'cannot find CMap directory'
+            return 1
+    if args:
+        outputdir = args.pop(0)
+    else:
+        outputdir = cmapdir
     force = False
     for (k, v) in opts:
         if k == '-f': force = True
-        elif k == '-C': cmapdir = v
-        elif k == '-D': outputdir = v
     if not os.path.isdir(cmapdir):
-        print >>stderr, 'directory does not exist: %r' % cmapdir
-        return 111
+        print >>sys.stderr, 'directory does not exist: %r' % cmapdir
+        return 1
     if not os.path.isdir(outputdir):
-        print >>stderr, 'directory does not exist: %r' % outputdir
-        return 111
+        print >>sys.stderr, 'directory does not exist: %r' % outputdir
+        return 1
     return convert_cmap(cmapdir, outputdir, force=force)
 
 if __name__ == '__main__': sys.exit(main(sys.argv))