diff --git a/CHANGELOG.md b/CHANGELOG.md index 82c3d45..97b5236 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Exporting images without any specific encoding ([#737](https://github.com/pdfminer/pdfminer.six/pull/737)) +### Changed + +- Using charset-normalizer instead of chardet for less restrictive license ([#744](https://github.com/pdfminer/pdfminer.six/pull/744)) + ## [20220319] ### Added diff --git a/mypy.ini b/mypy.ini index ee71111..8c943f0 100644 --- a/mypy.ini +++ b/mypy.ini @@ -23,8 +23,11 @@ ignore_missing_imports = True [mypy-pytest.*] ignore_missing_imports = True -[mypy-setuptools] +[mypy-setuptools.*] ignore_missing_imports = True -[mypy-nox] +[mypy-nox.*] +ignore_missing_imports = True + +[mypy-charset_normalizer.*] ignore_missing_imports = True \ No newline at end of file diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 3f6cca5..599d57b 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -28,7 +28,7 @@ from typing import ( if TYPE_CHECKING: from .layout import LTComponent -import chardet # For str encoding detection +import charset_normalizer # For str encoding detection # from sys import maxint as INF doesn't work anymore under Python3, but PDF # still uses 32 bits ints @@ -75,7 +75,7 @@ def make_compat_bytes(in_str: str) -> bytes: def make_compat_str(o: object) -> str: """Converts everything to string, if bytes guessing the encoding.""" if isinstance(o, bytes): - enc = chardet.detect(o) + enc = charset_normalizer.detect(o) try: return o.decode(enc["encoding"]) except UnicodeDecodeError: diff --git a/setup.py b/setup.py index b9bb29d..e5a0067 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,8 @@ setup( packages=["pdfminer"], package_data={"pdfminer": ["cmap/*.pickle.gz", "py.typed"]}, install_requires=[ - 'chardet ; python_version > "3.0"', - "cryptography", + "charset-normalizer~=2.0.0", + "cryptography~=36.0.0", ], extras_require={ "dev": ["pytest", "nox", "black", "mypy == 0.931"],