Raise more specific error if Pillow cannot be imported (#714)
* Raise specific warning if Pillow cannot be imported * Improve error message * Update docs * Update CHANGELOG.md * Update pdfminer/image.py Co-authored-by: Jake Stockwin <jstockwin@gmail.com> Co-authored-by: Jake Stockwin <jstockwin@gmail.com>pull/730/head
parent
b9a8920cdf
commit
121235e24b
|
@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
- Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679))
|
- Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679))
|
||||||
- Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626))
|
- Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626))
|
||||||
- Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680))
|
- Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680))
|
||||||
|
- Installation of Pillow as an optional extra dependency ([#714](https://github.com/pdfminer/pdfminer.six/pull/714))
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
|
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
|
||||||
|
|
|
@ -43,6 +43,10 @@ How to use
|
||||||
|
|
||||||
`pip install pdfminer.six`
|
`pip install pdfminer.six`
|
||||||
|
|
||||||
|
* (Optionally) install extra dependencies for extracting images.
|
||||||
|
|
||||||
|
`pip install 'pdfminer.six[image]`
|
||||||
|
|
||||||
* Use command-line interface to extract text from pdf:
|
* Use command-line interface to extract text from pdf:
|
||||||
|
|
||||||
`python pdf2txt.py samples/simple1.pdf`
|
`python pdf2txt.py samples/simple1.pdf`
|
||||||
|
|
|
@ -66,6 +66,13 @@ Before using it, you must install it using Python 3.6 or newer.
|
||||||
$ pip install pdfminer.six
|
$ pip install pdfminer.six
|
||||||
|
|
||||||
|
|
||||||
|
Optionally install extra dependencies that are needed to extract jpg images.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
$ pip install 'pdfminer.six[image]'
|
||||||
|
|
||||||
|
|
||||||
Contributing
|
Contributing
|
||||||
============
|
============
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,12 @@ from .pdfcolor import LITERAL_DEVICE_GRAY
|
||||||
from .pdfcolor import LITERAL_DEVICE_RGB
|
from .pdfcolor import LITERAL_DEVICE_RGB
|
||||||
from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, LITERALS_JPX_DECODE
|
from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, LITERALS_JPX_DECODE
|
||||||
|
|
||||||
|
PIL_ERROR_MESSAGE = (
|
||||||
|
"Could not import Pillow. This dependency of pdfminer.six is not "
|
||||||
|
"installed by default. You need it to to save jpg images to a file. Install it "
|
||||||
|
"with `pip install 'pdfminer.six[image]'`"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def align32(x: int) -> int:
|
def align32(x: int) -> int:
|
||||||
return ((x + 3) // 4) * 4
|
return ((x + 3) // 4) * 4
|
||||||
|
@ -93,8 +99,10 @@ class ImageWriter:
|
||||||
raw_data = image.stream.get_rawdata()
|
raw_data = image.stream.get_rawdata()
|
||||||
assert raw_data is not None
|
assert raw_data is not None
|
||||||
if LITERAL_DEVICE_CMYK in image.colorspace:
|
if LITERAL_DEVICE_CMYK in image.colorspace:
|
||||||
from PIL import Image # type: ignore[import]
|
try:
|
||||||
from PIL import ImageChops
|
from PIL import Image, ImageChops # type: ignore[import]
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(PIL_ERROR_MESSAGE)
|
||||||
|
|
||||||
ifp = BytesIO(raw_data)
|
ifp = BytesIO(raw_data)
|
||||||
i = Image.open(ifp)
|
i = Image.open(ifp)
|
||||||
|
@ -104,12 +112,15 @@ class ImageWriter:
|
||||||
else:
|
else:
|
||||||
fp.write(raw_data)
|
fp.write(raw_data)
|
||||||
elif ext == ".jp2":
|
elif ext == ".jp2":
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(PIL_ERROR_MESSAGE)
|
||||||
|
|
||||||
# if we just write the raw data, most image programs
|
# if we just write the raw data, most image programs
|
||||||
# that I have tried cannot open the file. However,
|
# that I have tried cannot open the file. However,
|
||||||
# open and saving with PIL produces a file that
|
# open and saving with PIL produces a file that
|
||||||
# seems to be easily opened by other programs
|
# seems to be easily opened by other programs
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
raw_data = image.stream.get_rawdata()
|
raw_data = image.stream.get_rawdata()
|
||||||
assert raw_data is not None
|
assert raw_data is not None
|
||||||
ifp = BytesIO(raw_data)
|
ifp = BytesIO(raw_data)
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -23,6 +23,7 @@ setup(
|
||||||
extras_require={
|
extras_require={
|
||||||
"dev": ["pytest", "nox", "black", "mypy == 0.931"],
|
"dev": ["pytest", "nox", "black", "mypy == 0.931"],
|
||||||
"docs": ["sphinx", "sphinx-argparse"],
|
"docs": ["sphinx", "sphinx-argparse"],
|
||||||
|
"image": ["Pillow"],
|
||||||
},
|
},
|
||||||
description="PDF parser and analyzer",
|
description="PDF parser and analyzer",
|
||||||
long_description=readme,
|
long_description=readme,
|
||||||
|
|
Loading…
Reference in New Issue