From 434720f76763d5f2609c243456a1d864a40982c6 Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Sun, 4 Apr 2010 12:18:57 +0000 Subject: [PATCH] git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@199 1aa58f4a-7d42-0410-adbc-911cccaed67c --- docs/index.html | 5 ++++- setup.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/index.html b/docs/index.html index 74c0653..645f703 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Last Modified: Fri Mar 26 11:14:17 UTC 2010 +Last Modified: Sun Mar 28 07:21:28 UTC 2010
@@ -63,6 +63,9 @@ PDF parser that can be used for other purposes instead of text analysis.
  • Tagged contents extraction.
  • Reconstruct the original layout by grouping text chunks. +

    +On the performance, PDFMiner is about 20 times slower than +other C/C++-based software such as XPdf.

    diff --git a/setup.py b/setup.py index 2229b2b..6f4850d 100644 --- a/setup.py +++ b/setup.py @@ -7,11 +7,12 @@ setup( version=__version__, description='PDF parser and analyzer', long_description='''PDFMiner is a suite of programs that help -extracting and analyzing text data of PDF documents. +extracting and analyzing text data from PDF documents. Unlike other PDF-related tools, it allows to obtain the exact location of texts in a page, as well as other extra information such as font information or ruled lines. -It includes a PDF converter that can transform PDF files +It can also infer its text flow and reconstruct the original layout. +PDFMiner includes a PDF converter that can transform PDF files into other text formats (such as HTML). It has an extensible PDF parser that can be used for other purposes instead of text analysis.''', license='MIT/X', @@ -33,5 +34,6 @@ PDF parser that can be used for other purposes instead of text analysis.''', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: MIT License', + 'Topic :: Text Processing', ], )