git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@82 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2009-04-02 14:22:19 +00:00
parent 8b1e38295d
commit 548cdf9443
3 changed files with 6 additions and 4 deletions

View File

@ -14,10 +14,11 @@ Python PDF parser and analyzer
<p>
<a href="http://www.unixuser.org/~euske/python/pdfminer/index.html">Homepage</a>
<a href="#changes">Recent Changes</a>
<div align=right class=lastmod>
<!-- hhmts start -->
Last Modified: Mon Mar 30 00:30:36 JST 2009
Last Modified: Thu Apr 2 08:21:56 JST 2009
<!-- hhmts end -->
</div>
@ -45,6 +46,7 @@ which could be useful for analyzing the document.
<li> PDF to HTML conversion (with a sample converter web app).
<li> Outline (TOC) extraction.
<li> Tagged contents extraction.
<li> Infer text running by using clustering technique.
</ul>
<a name="source"></a>

View File

@ -124,7 +124,7 @@ class HTMLConverter(PDFConverter):
##
class TextConverter(PDFConverter):
def __init__(self, rsrc, outfp, codec='utf-8', pagenum=True, cluster_margin=None, splitwords=False):
def __init__(self, rsrc, outfp, codec='utf-8', pagenum=False, cluster_margin=None, splitwords=False):
PDFConverter.__init__(self, rsrc, outfp, codec=codec, splitwords=True)
self.pagenum = pagenum
if cluster_margin == None:

View File

@ -153,9 +153,9 @@ class PDFContentParser(PSStackParser):
c = self.buf[self.charpos]
data += c
self.charpos += 1
if i >= len(target) and c.isspace():
if len(target) <= i and c.isspace():
i += 1
elif c == target[i]:
elif i < len(target) and c == target[i]:
i += 1
else:
i = 0