diff --git a/README.html b/README.html index ed974f6..9255959 100644 --- a/README.html +++ b/README.html @@ -18,7 +18,7 @@ Python PDF parser and analyzer
Features:
@@ -140,7 +140,7 @@ PDFMiner comes with two handy tools:$ python -m pdfminer.cmap
pdf2txt.py
extracts text contents from a PDF file.
-It extracts all the texts that are to be rendered programatically,
+It extracts all the texts that are to be rendered programmatically,
It cannot recognize texts drawn as images that would require optical character recognition.
It also extracts the corresponding locations, font names, font sizes, writing
direction (horizontal or vertical) for each text portion.
@@ -202,7 +202,7 @@ In the figure below, two text chunks whose distance is closer than
the char_margin (shown as M) is considered
continuous and get grouped into one. Also, two lines whose distance is closer than
the line_margin (L) is grouped
-as a text box, which is a recutangular area that contains a "cluster" of texts.
+as a text box, which is a rectangular area that contains a "cluster" of texts.
Furthermore, it may be required to insert blank characters (spaces) as necessary
if the distance between two words is greater than the word_margin
(W), as a blank between words might not be
diff --git a/setup.py b/setup.py
index 8ab539f..d51caa7 100644
--- a/setup.py
+++ b/setup.py
@@ -2,12 +2,30 @@
from distutils.core import setup
from pdfminer import __version__
-setup(name='pdfminer',
- version=__version__,
- description='PDF parser and analyzer',
- license='MIT/X',
- author='Yusuke Shinyama',
- url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
- packages=['pdfminer'],
- scripts=['tools/pdf2txt.py', 'tools/dumppdf.py'],
- )
+setup(
+ name='pdfminer',
+ version=__version__,
+ description='PDF parser and analyzer',
+ long_description='''PDFMiner is a suite of programs that help
+extracting and analyzing text data of PDF documents.
+Unlike other PDF-related tools, it allows to obtain
+the exact location of texts in a page, as well as
+other extra information such as font information or ruled lines.
+It includes a PDF converter that can transform PDF files
+into other text formats (such as HTML). It has an extensible
+PDF parser that can be used for other purposes instead of text analysis.''',
+ keywords='pdf parser, pdf converter, text mining',
+ license='MIT/X',
+ author='Yusuke Shinyama',
+ author_email='yusuke at cs dot nyu dot edu',
+ url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
+ packages=['pdfminer'],
+ scripts=['tools/pdf2txt.py', 'tools/dumppdf.py'],
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'Environment :: Console',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: Science/Research',
+ 'License :: OSI Approved :: MIT License',
+ ],
+ )