release 20100424
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@210 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
97848409e5
commit
a16eba30b7
|
@ -19,7 +19,7 @@ Python PDF parser and analyzer
|
||||||
|
|
||||||
<div align=right class=lastmod>
|
<div align=right class=lastmod>
|
||||||
<!-- hhmts start -->
|
<!-- hhmts start -->
|
||||||
Last Modified: Sat Apr 24 02:48:00 UTC 2010
|
Last Modified: Sat Apr 24 04:30:10 UTC 2010
|
||||||
<!-- hhmts end -->
|
<!-- hhmts end -->
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -41,8 +41,7 @@ Last Modified: Sat Apr 24 02:48:00 UTC 2010
|
||||||
<hr noshade>
|
<hr noshade>
|
||||||
<h2>What's It?</h2>
|
<h2>What's It?</h2>
|
||||||
<p>
|
<p>
|
||||||
PDFMiner is a suite of programs that help
|
PDFMiner is a tool for extracting information from PDF documents.
|
||||||
extracting some information from PDF documents.
|
|
||||||
Unlike other PDF-related tools, it focuses entirely on getting
|
Unlike other PDF-related tools, it focuses entirely on getting
|
||||||
and analyzing text data. PDFMiner allows to obtain
|
and analyzing text data. PDFMiner allows to obtain
|
||||||
the exact location of texts in a page, as well as
|
the exact location of texts in a page, as well as
|
||||||
|
@ -270,6 +269,10 @@ are M = 1.0, L = 0.3, and W = 0.2, respectively.
|
||||||
<dt> <code>-n</code>
|
<dt> <code>-n</code>
|
||||||
<dd> Suppress layout analysis.
|
<dd> Suppress layout analysis.
|
||||||
<p>
|
<p>
|
||||||
|
<dt> <code>-A</code>
|
||||||
|
<dd> Forces to perform layout analysis for all the text strings,
|
||||||
|
including texts contained in figures.
|
||||||
|
<p>
|
||||||
<dt> <code>-s <em>scale</em></code>
|
<dt> <code>-s <em>scale</em></code>
|
||||||
<dd> Specifies the output scale. Can be used in HTML format only.
|
<dd> Specifies the output scale. Can be used in HTML format only.
|
||||||
<p>
|
<p>
|
||||||
|
@ -374,6 +377,7 @@ no stream header is displayed for the ease of saving it to a file.
|
||||||
<hr noshade>
|
<hr noshade>
|
||||||
<h2>Changes</h2>
|
<h2>Changes</h2>
|
||||||
<ul>
|
<ul>
|
||||||
|
<li> 2010/04/24: Bugfixes and tiny improvements on TOC extraction. Thanks to Jose Maria.
|
||||||
<li> 2010/03/26: Bugfixes. Thanks to Brian Berry and Lubos Pintes.
|
<li> 2010/03/26: Bugfixes. Thanks to Brian Berry and Lubos Pintes.
|
||||||
<li> 2010/03/22: Improved layout analysis. Added regression tests.
|
<li> 2010/03/22: Improved layout analysis. Added regression tests.
|
||||||
<li> 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield.
|
<li> 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__version__ = '20100327'
|
__version__ = '20100424'
|
||||||
|
|
||||||
if __name__ == '__main__': print __version__
|
if __name__ == '__main__': print __version__
|
||||||
|
|
13
setup.py
13
setup.py
|
@ -6,15 +6,14 @@ setup(
|
||||||
name='pdfminer',
|
name='pdfminer',
|
||||||
version=__version__,
|
version=__version__,
|
||||||
description='PDF parser and analyzer',
|
description='PDF parser and analyzer',
|
||||||
long_description='''PDFMiner is a suite of programs that help
|
long_description='''PDFMiner is a tool for extracting information from PDF documents.
|
||||||
extracting and analyzing text data from PDF documents.
|
Unlike other PDF-related tools, it focuses entirely on getting
|
||||||
Unlike other PDF-related tools, it allows to obtain
|
and analyzing text data. PDFMiner allows to obtain
|
||||||
the exact location of texts in a page, as well as
|
the exact location of texts in a page, as well as
|
||||||
other extra information such as font information or ruled lines.
|
other information such as fonts or lines.
|
||||||
It can also infer its text flow and reconstruct the original layout.
|
It includes a PDF converter that can transform PDF files
|
||||||
PDFMiner includes a PDF converter that can transform PDF files
|
|
||||||
into other text formats (such as HTML). It has an extensible
|
into other text formats (such as HTML). It has an extensible
|
||||||
PDF parser library that can be used for other purposes instead of text analysis.''',
|
PDF parser that can be used for other purposes instead of text analysis.''',
|
||||||
license='MIT/X',
|
license='MIT/X',
|
||||||
author='Yusuke Shinyama',
|
author='Yusuke Shinyama',
|
||||||
author_email='yusuke at cs dot nyu dot edu',
|
author_email='yusuke at cs dot nyu dot edu',
|
||||||
|
|
Loading…
Reference in New Issue