Add support for JPEG2000 image encoding

2022-01-23 21:17:47 +01:00 · 2022-01-23 21:17:47 +01:00 · 708dd20465
parent b82229245a
commit 708dd20465
3 changed files with 18 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Fixed
 - Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
 - Add handling of JPXDecode filter to enable extraction of images for some pdfs ([#645](https://github.com/pdfminer/pdfminer.six/pull/645))
 ## [20211012]
--- a/pdfminer/image.py
+++ b/pdfminer/image.py
@ -9,7 +9,8 @@ from .layout import LTImage
 from .pdfcolor import LITERAL_DEVICE_CMYK
 from .pdfcolor import LITERAL_DEVICE_GRAY
 from .pdfcolor import LITERAL_DEVICE_RGB
-from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE
+from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, \
    LITERALS_JPX_DECODE
 def align32(x: int) -> int:
@ -99,6 +100,16 @@ class ImageWriter:
                i.save(fp, 'JPEG')
            else:
                fp.write(raw_data)
        elif ext == '.jp2':
            # if we just write the raw data, most image programs
            # that I have tried cannot open the file. However,
            # open and saving with PIL produces a file that
            # seems to be easily opened by other programs
            from PIL import Image
            raw_data = image.stream.get_rawdata()
            ifp = BytesIO(raw_data)
            i = Image.open(ifp)
            i.save(fp, 'JPEG2000')
        elif is_jbig2:
            input_stream = BytesIO()
            input_stream.write(image.stream.get_data())
@ -156,6 +167,8 @@ class ImageWriter:
        filters = image.stream.get_filters()
        if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
            ext = '.jpg'
        elif len(filters) == 1 and filters[0][0] in LITERALS_JPX_DECODE:
            ext = '.jp2'
        elif is_jbig2:
            ext = '.jb2'
        elif (image.bits == 1 or
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@ -34,6 +34,7 @@ LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL'))
 LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF'))
 LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))
 LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)
 LITERALS_JPX_DECODE = (LIT('JPXDecode'),)
 if sys.version_info >= (3, 8):
@ -358,6 +359,8 @@ class PDFStream(PDFObject):
                pass
            elif f in LITERALS_JBIG2_DECODE:
                pass
            elif f in LITERALS_JPX_DECODE:
                pass
            elif f == LITERAL_CRYPT:
                # not yet..
                raise PDFNotImplementedError('/Crypt filter is unsupported')