bugfix
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@82 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
8b1e38295d
commit
548cdf9443
|
@ -14,10 +14,11 @@ Python PDF parser and analyzer
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
<a href="http://www.unixuser.org/~euske/python/pdfminer/index.html">Homepage</a>
|
<a href="http://www.unixuser.org/~euske/python/pdfminer/index.html">Homepage</a>
|
||||||
|
<a href="#changes">Recent Changes</a>
|
||||||
|
|
||||||
<div align=right class=lastmod>
|
<div align=right class=lastmod>
|
||||||
<!-- hhmts start -->
|
<!-- hhmts start -->
|
||||||
Last Modified: Mon Mar 30 00:30:36 JST 2009
|
Last Modified: Thu Apr 2 08:21:56 JST 2009
|
||||||
<!-- hhmts end -->
|
<!-- hhmts end -->
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -45,6 +46,7 @@ which could be useful for analyzing the document.
|
||||||
<li> PDF to HTML conversion (with a sample converter web app).
|
<li> PDF to HTML conversion (with a sample converter web app).
|
||||||
<li> Outline (TOC) extraction.
|
<li> Outline (TOC) extraction.
|
||||||
<li> Tagged contents extraction.
|
<li> Tagged contents extraction.
|
||||||
|
<li> Infer text running by using clustering technique.
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<a name="source"></a>
|
<a name="source"></a>
|
||||||
|
|
|
@ -124,7 +124,7 @@ class HTMLConverter(PDFConverter):
|
||||||
##
|
##
|
||||||
class TextConverter(PDFConverter):
|
class TextConverter(PDFConverter):
|
||||||
|
|
||||||
def __init__(self, rsrc, outfp, codec='utf-8', pagenum=True, cluster_margin=None, splitwords=False):
|
def __init__(self, rsrc, outfp, codec='utf-8', pagenum=False, cluster_margin=None, splitwords=False):
|
||||||
PDFConverter.__init__(self, rsrc, outfp, codec=codec, splitwords=True)
|
PDFConverter.__init__(self, rsrc, outfp, codec=codec, splitwords=True)
|
||||||
self.pagenum = pagenum
|
self.pagenum = pagenum
|
||||||
if cluster_margin == None:
|
if cluster_margin == None:
|
||||||
|
|
|
@ -153,9 +153,9 @@ class PDFContentParser(PSStackParser):
|
||||||
c = self.buf[self.charpos]
|
c = self.buf[self.charpos]
|
||||||
data += c
|
data += c
|
||||||
self.charpos += 1
|
self.charpos += 1
|
||||||
if i >= len(target) and c.isspace():
|
if len(target) <= i and c.isspace():
|
||||||
i += 1
|
i += 1
|
||||||
elif c == target[i]:
|
elif i < len(target) and c == target[i]:
|
||||||
i += 1
|
i += 1
|
||||||
else:
|
else:
|
||||||
i = 0
|
i = 0
|
||||||
|
|
Loading…
Reference in New Issue