2022-02-02 21:24:32 +00:00
|
|
|
import sys
|
|
|
|
from pathlib import Path
|
|
|
|
|
Many changes to make pdf2txt.py work better in Py3, some in that script, others in module!
Sorry, changes should have been more atomic.
*In pdf2txt.py:*
* Re-wrote main function to use argparse instead of optparse.
* Manually tested in Py2/Py3 to get partial consistency.
* Errors abound including Tags mode, but most modes weren't working at all in Py3 anyway.
* Py2 mode *probably* unchanged, cannot find any bugs yet...
* Kept old main function for posterity, for now.
*In utils:*
* Added a few compatibility functions (some string hax required chardet, new dependency):
- make_compat_bytes(in_str)-> (py3->bytes | py2->str)
- make_compat_str(in_str)-> (str)
- compatible_encode_method(bytesorstring, encoding, erraction)-> (str)
*In pdfdevice:*
* To handle different output filetypes in Py3, injected lots of calls to new utils methods,
as well as some six.PYX checks and logic. These changes are largely responsible for
enhanced Py2/Py3 consistency.
*In converter:*
* To handle output filetypes in Py2, injected a few checks and fixes particularly around the
py2 `str.encode` method and its *assumed* usual use-analogies in Py3.
2015-05-17 20:08:57 +00:00
|
|
|
from setuptools import setup
|
2020-01-04 17:15:15 +00:00
|
|
|
from os import path
|
2009-05-16 06:57:26 +00:00
|
|
|
|
2022-02-02 21:24:32 +00:00
|
|
|
sys.path.append(str(Path(__file__).parent))
|
2022-06-26 16:25:28 +00:00
|
|
|
import pdfminer as package # noqa: E402
|
2020-01-04 17:15:15 +00:00
|
|
|
|
2022-02-11 21:46:51 +00:00
|
|
|
with open(path.join(path.abspath(path.dirname(__file__)), "README.md")) as f:
|
2020-01-04 17:15:15 +00:00
|
|
|
readme = f.read()
|
|
|
|
|
2009-07-11 15:38:13 +00:00
|
|
|
setup(
|
2022-02-11 21:46:51 +00:00
|
|
|
name="pdfminer.six",
|
2017-08-18 06:13:15 +00:00
|
|
|
version=package.__version__,
|
2022-02-11 21:46:51 +00:00
|
|
|
packages=["pdfminer"],
|
|
|
|
package_data={"pdfminer": ["cmap/*.pickle.gz", "py.typed"]},
|
2019-10-20 09:41:31 +00:00
|
|
|
install_requires=[
|
2022-05-07 18:35:18 +00:00
|
|
|
"charset-normalizer >= 2.0.0",
|
|
|
|
"cryptography >= 36.0.0",
|
2022-06-26 15:47:28 +00:00
|
|
|
'typing_extensions; python_version < "3.8"',
|
2019-10-20 09:41:31 +00:00
|
|
|
],
|
2019-11-07 20:12:34 +00:00
|
|
|
extras_require={
|
2022-02-11 21:46:51 +00:00
|
|
|
"dev": ["pytest", "nox", "black", "mypy == 0.931"],
|
2019-11-07 20:12:34 +00:00
|
|
|
"docs": ["sphinx", "sphinx-argparse"],
|
2022-02-22 19:20:17 +00:00
|
|
|
"image": ["Pillow"],
|
2019-11-07 20:12:34 +00:00
|
|
|
},
|
2022-02-11 21:46:51 +00:00
|
|
|
description="PDF parser and analyzer",
|
2020-01-04 17:15:15 +00:00
|
|
|
long_description=readme,
|
2022-02-11 21:46:51 +00:00
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
license="MIT/X",
|
|
|
|
author="Yusuke Shinyama + Philippe Guglielmetti",
|
|
|
|
author_email="pdfminer@goulu.net",
|
|
|
|
url="https://github.com/pdfminer/pdfminer.six",
|
2009-10-24 04:41:59 +00:00
|
|
|
scripts=[
|
2022-02-11 21:46:51 +00:00
|
|
|
"tools/pdf2txt.py",
|
|
|
|
"tools/dumppdf.py",
|
2017-08-18 06:13:15 +00:00
|
|
|
],
|
|
|
|
keywords=[
|
2022-02-11 21:46:51 +00:00
|
|
|
"pdf parser",
|
|
|
|
"pdf converter",
|
|
|
|
"layout analysis",
|
|
|
|
"text mining",
|
2009-07-21 14:23:23 +00:00
|
|
|
],
|
2022-02-11 21:46:51 +00:00
|
|
|
python_requires=">=3.6",
|
2009-10-24 04:41:59 +00:00
|
|
|
classifiers=[
|
2022-02-11 21:46:51 +00:00
|
|
|
"Programming Language :: Python",
|
|
|
|
"Programming Language :: Python :: 3.6",
|
|
|
|
"Programming Language :: Python :: 3.7",
|
|
|
|
"Programming Language :: Python :: 3.8",
|
|
|
|
"Programming Language :: Python :: 3.9",
|
|
|
|
"Programming Language :: Python :: 3 :: Only",
|
|
|
|
"Development Status :: 5 - Production/Stable",
|
|
|
|
"Environment :: Console",
|
|
|
|
"Intended Audience :: Developers",
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
"License :: OSI Approved :: MIT License",
|
|
|
|
"Topic :: Text Processing",
|
2009-10-24 04:41:59 +00:00
|
|
|
],
|
2017-08-18 06:13:15 +00:00
|
|
|
)
|