diff --git a/docs/index.html b/docs/index.html index 74c0653..645f703 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Last Modified: Fri Mar 26 11:14:17 UTC 2010 +Last Modified: Sun Mar 28 07:21:28 UTC 2010
@@ -63,6 +63,9 @@ PDF parser that can be used for other purposes instead of text analysis.
  • Tagged contents extraction.
  • Reconstruct the original layout by grouping text chunks. +

    +On the performance, PDFMiner is about 20 times slower than +other C/C++-based software such as XPdf.

    diff --git a/setup.py b/setup.py index 2229b2b..6f4850d 100644 --- a/setup.py +++ b/setup.py @@ -7,11 +7,12 @@ setup( version=__version__, description='PDF parser and analyzer', long_description='''PDFMiner is a suite of programs that help -extracting and analyzing text data of PDF documents. +extracting and analyzing text data from PDF documents. Unlike other PDF-related tools, it allows to obtain the exact location of texts in a page, as well as other extra information such as font information or ruled lines. -It includes a PDF converter that can transform PDF files +It can also infer its text flow and reconstruct the original layout. +PDFMiner includes a PDF converter that can transform PDF files into other text formats (such as HTML). It has an extensible PDF parser that can be used for other purposes instead of text analysis.''', license='MIT/X', @@ -33,5 +34,6 @@ PDF parser that can be used for other purposes instead of text analysis.''', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: MIT License', + 'Topic :: Text Processing', ], )