Add support for JPEG2000 image encoding
parent
b82229245a
commit
708dd20465
|
@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
|
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
|
||||||
|
- Add handling of JPXDecode filter to enable extraction of images for some pdfs ([#645](https://github.com/pdfminer/pdfminer.six/pull/645))
|
||||||
|
|
||||||
## [20211012]
|
## [20211012]
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,8 @@ from .layout import LTImage
|
||||||
from .pdfcolor import LITERAL_DEVICE_CMYK
|
from .pdfcolor import LITERAL_DEVICE_CMYK
|
||||||
from .pdfcolor import LITERAL_DEVICE_GRAY
|
from .pdfcolor import LITERAL_DEVICE_GRAY
|
||||||
from .pdfcolor import LITERAL_DEVICE_RGB
|
from .pdfcolor import LITERAL_DEVICE_RGB
|
||||||
from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE
|
from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, \
|
||||||
|
LITERALS_JPX_DECODE
|
||||||
|
|
||||||
|
|
||||||
def align32(x: int) -> int:
|
def align32(x: int) -> int:
|
||||||
|
@ -99,6 +100,16 @@ class ImageWriter:
|
||||||
i.save(fp, 'JPEG')
|
i.save(fp, 'JPEG')
|
||||||
else:
|
else:
|
||||||
fp.write(raw_data)
|
fp.write(raw_data)
|
||||||
|
elif ext == '.jp2':
|
||||||
|
# if we just write the raw data, most image programs
|
||||||
|
# that I have tried cannot open the file. However,
|
||||||
|
# open and saving with PIL produces a file that
|
||||||
|
# seems to be easily opened by other programs
|
||||||
|
from PIL import Image
|
||||||
|
raw_data = image.stream.get_rawdata()
|
||||||
|
ifp = BytesIO(raw_data)
|
||||||
|
i = Image.open(ifp)
|
||||||
|
i.save(fp, 'JPEG2000')
|
||||||
elif is_jbig2:
|
elif is_jbig2:
|
||||||
input_stream = BytesIO()
|
input_stream = BytesIO()
|
||||||
input_stream.write(image.stream.get_data())
|
input_stream.write(image.stream.get_data())
|
||||||
|
@ -156,6 +167,8 @@ class ImageWriter:
|
||||||
filters = image.stream.get_filters()
|
filters = image.stream.get_filters()
|
||||||
if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
|
if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
|
||||||
ext = '.jpg'
|
ext = '.jpg'
|
||||||
|
elif len(filters) == 1 and filters[0][0] in LITERALS_JPX_DECODE:
|
||||||
|
ext = '.jp2'
|
||||||
elif is_jbig2:
|
elif is_jbig2:
|
||||||
ext = '.jb2'
|
ext = '.jb2'
|
||||||
elif (image.bits == 1 or
|
elif (image.bits == 1 or
|
||||||
|
|
|
@ -34,6 +34,7 @@ LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL'))
|
||||||
LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF'))
|
LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF'))
|
||||||
LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))
|
LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))
|
||||||
LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)
|
LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)
|
||||||
|
LITERALS_JPX_DECODE = (LIT('JPXDecode'),)
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 8):
|
if sys.version_info >= (3, 8):
|
||||||
|
@ -358,6 +359,8 @@ class PDFStream(PDFObject):
|
||||||
pass
|
pass
|
||||||
elif f in LITERALS_JBIG2_DECODE:
|
elif f in LITERALS_JBIG2_DECODE:
|
||||||
pass
|
pass
|
||||||
|
elif f in LITERALS_JPX_DECODE:
|
||||||
|
pass
|
||||||
elif f == LITERAL_CRYPT:
|
elif f == LITERAL_CRYPT:
|
||||||
# not yet..
|
# not yet..
|
||||||
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
||||||
|
|
Loading…
Reference in New Issue