Use charset-normalizer instead of chardet (#744)
* Use charset-normalizer instead of chardet * Ignore charset_normalizer type stub * Add CHANGELOG.mdpull/749/head
parent
617e4c8388
commit
1bf3c42b59
|
@ -19,6 +19,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
|
|
||||||
- Exporting images without any specific encoding ([#737](https://github.com/pdfminer/pdfminer.six/pull/737))
|
- Exporting images without any specific encoding ([#737](https://github.com/pdfminer/pdfminer.six/pull/737))
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Using charset-normalizer instead of chardet for less restrictive license ([#744](https://github.com/pdfminer/pdfminer.six/pull/744))
|
||||||
|
|
||||||
## [20220319]
|
## [20220319]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
7
mypy.ini
7
mypy.ini
|
@ -23,8 +23,11 @@ ignore_missing_imports = True
|
||||||
[mypy-pytest.*]
|
[mypy-pytest.*]
|
||||||
ignore_missing_imports = True
|
ignore_missing_imports = True
|
||||||
|
|
||||||
[mypy-setuptools]
|
[mypy-setuptools.*]
|
||||||
ignore_missing_imports = True
|
ignore_missing_imports = True
|
||||||
|
|
||||||
[mypy-nox]
|
[mypy-nox.*]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-charset_normalizer.*]
|
||||||
ignore_missing_imports = True
|
ignore_missing_imports = True
|
|
@ -28,7 +28,7 @@ from typing import (
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from .layout import LTComponent
|
from .layout import LTComponent
|
||||||
|
|
||||||
import chardet # For str encoding detection
|
import charset_normalizer # For str encoding detection
|
||||||
|
|
||||||
# from sys import maxint as INF doesn't work anymore under Python3, but PDF
|
# from sys import maxint as INF doesn't work anymore under Python3, but PDF
|
||||||
# still uses 32 bits ints
|
# still uses 32 bits ints
|
||||||
|
@ -75,7 +75,7 @@ def make_compat_bytes(in_str: str) -> bytes:
|
||||||
def make_compat_str(o: object) -> str:
|
def make_compat_str(o: object) -> str:
|
||||||
"""Converts everything to string, if bytes guessing the encoding."""
|
"""Converts everything to string, if bytes guessing the encoding."""
|
||||||
if isinstance(o, bytes):
|
if isinstance(o, bytes):
|
||||||
enc = chardet.detect(o)
|
enc = charset_normalizer.detect(o)
|
||||||
try:
|
try:
|
||||||
return o.decode(enc["encoding"])
|
return o.decode(enc["encoding"])
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -17,8 +17,8 @@ setup(
|
||||||
packages=["pdfminer"],
|
packages=["pdfminer"],
|
||||||
package_data={"pdfminer": ["cmap/*.pickle.gz", "py.typed"]},
|
package_data={"pdfminer": ["cmap/*.pickle.gz", "py.typed"]},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'chardet ; python_version > "3.0"',
|
"charset-normalizer~=2.0.0",
|
||||||
"cryptography",
|
"cryptography~=36.0.0",
|
||||||
],
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
"dev": ["pytest", "nox", "black", "mypy == 0.931"],
|
"dev": ["pytest", "nox", "black", "mypy == 0.931"],
|
||||||
|
|
Loading…
Reference in New Issue