diff --git a/README.html b/README.html
index 6501741..b56092b 100644
--- a/README.html
+++ b/README.html
@@ -14,10 +14,11 @@ Python PDF parser and analyzer
Homepage
+Recent Changes
-Last Modified: Mon Mar 30 00:30:36 JST 2009
+Last Modified: Thu Apr 2 08:21:56 JST 2009
@@ -45,6 +46,7 @@ which could be useful for analyzing the document.
PDF to HTML conversion (with a sample converter web app).
Outline (TOC) extraction.
Tagged contents extraction.
+ Infer text running by using clustering technique.
diff --git a/pdflib/pdf2txt.py b/pdflib/pdf2txt.py
index 243c7e4..6fefb9f 100755
--- a/pdflib/pdf2txt.py
+++ b/pdflib/pdf2txt.py
@@ -124,7 +124,7 @@ class HTMLConverter(PDFConverter):
##
class TextConverter(PDFConverter):
- def __init__(self, rsrc, outfp, codec='utf-8', pagenum=True, cluster_margin=None, splitwords=False):
+ def __init__(self, rsrc, outfp, codec='utf-8', pagenum=False, cluster_margin=None, splitwords=False):
PDFConverter.__init__(self, rsrc, outfp, codec=codec, splitwords=True)
self.pagenum = pagenum
if cluster_margin == None:
diff --git a/pdflib/pdfinterp.py b/pdflib/pdfinterp.py
index 9681c38..cd147ef 100644
--- a/pdflib/pdfinterp.py
+++ b/pdflib/pdfinterp.py
@@ -153,9 +153,9 @@ class PDFContentParser(PSStackParser):
c = self.buf[self.charpos]
data += c
self.charpos += 1
- if i >= len(target) and c.isspace():
+ if len(target) <= i and c.isspace():
i += 1
- elif c == target[i]:
+ elif i < len(target) and c == target[i]:
i += 1
else:
i = 0