Raise more specific error if Pillow cannot be imported (#714)

* Raise specific warning if Pillow cannot be imported

* Improve error message

* Update docs

* Update CHANGELOG.md

* Update pdfminer/image.py

Co-authored-by: Jake Stockwin <jstockwin@gmail.com>

Co-authored-by: Jake Stockwin <jstockwin@gmail.com>
pull/730/head
Pieter Marsman 2022-02-22 20:20:17 +01:00 committed by GitHub
parent b9a8920cdf
commit 121235e24b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 4 deletions

View File

@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679)) - Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679))
- Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626)) - Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626))
- Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680)) - Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680))
- Installation of Pillow as an optional extra dependency ([#714](https://github.com/pdfminer/pdfminer.six/pull/714))
### Fixed ### Fixed
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637)) - Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))

View File

@ -43,6 +43,10 @@ How to use
`pip install pdfminer.six` `pip install pdfminer.six`
* (Optionally) install extra dependencies for extracting images.
`pip install 'pdfminer.six[image]`
* Use command-line interface to extract text from pdf: * Use command-line interface to extract text from pdf:
`python pdf2txt.py samples/simple1.pdf` `python pdf2txt.py samples/simple1.pdf`

View File

@ -66,6 +66,13 @@ Before using it, you must install it using Python 3.6 or newer.
$ pip install pdfminer.six $ pip install pdfminer.six
Optionally install extra dependencies that are needed to extract jpg images.
::
$ pip install 'pdfminer.six[image]'
Contributing Contributing
============ ============

View File

@ -11,6 +11,12 @@ from .pdfcolor import LITERAL_DEVICE_GRAY
from .pdfcolor import LITERAL_DEVICE_RGB from .pdfcolor import LITERAL_DEVICE_RGB
from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, LITERALS_JPX_DECODE from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, LITERALS_JPX_DECODE
PIL_ERROR_MESSAGE = (
"Could not import Pillow. This dependency of pdfminer.six is not "
"installed by default. You need it to to save jpg images to a file. Install it "
"with `pip install 'pdfminer.six[image]'`"
)
def align32(x: int) -> int: def align32(x: int) -> int:
return ((x + 3) // 4) * 4 return ((x + 3) // 4) * 4
@ -93,8 +99,10 @@ class ImageWriter:
raw_data = image.stream.get_rawdata() raw_data = image.stream.get_rawdata()
assert raw_data is not None assert raw_data is not None
if LITERAL_DEVICE_CMYK in image.colorspace: if LITERAL_DEVICE_CMYK in image.colorspace:
from PIL import Image # type: ignore[import] try:
from PIL import ImageChops from PIL import Image, ImageChops # type: ignore[import]
except ImportError:
raise ImportError(PIL_ERROR_MESSAGE)
ifp = BytesIO(raw_data) ifp = BytesIO(raw_data)
i = Image.open(ifp) i = Image.open(ifp)
@ -104,12 +112,15 @@ class ImageWriter:
else: else:
fp.write(raw_data) fp.write(raw_data)
elif ext == ".jp2": elif ext == ".jp2":
try:
from PIL import Image
except ImportError:
raise ImportError(PIL_ERROR_MESSAGE)
# if we just write the raw data, most image programs # if we just write the raw data, most image programs
# that I have tried cannot open the file. However, # that I have tried cannot open the file. However,
# open and saving with PIL produces a file that # open and saving with PIL produces a file that
# seems to be easily opened by other programs # seems to be easily opened by other programs
from PIL import Image
raw_data = image.stream.get_rawdata() raw_data = image.stream.get_rawdata()
assert raw_data is not None assert raw_data is not None
ifp = BytesIO(raw_data) ifp = BytesIO(raw_data)

View File

@ -23,6 +23,7 @@ setup(
extras_require={ extras_require={
"dev": ["pytest", "nox", "black", "mypy == 0.931"], "dev": ["pytest", "nox", "black", "mypy == 0.931"],
"docs": ["sphinx", "sphinx-argparse"], "docs": ["sphinx", "sphinx-argparse"],
"image": ["Pillow"],
}, },
description="PDF parser and analyzer", description="PDF parser and analyzer",
long_description=readme, long_description=readme,