From 6413eb7de4a0d9e96d0605d4c0d8f1680a8ad0ca Mon Sep 17 00:00:00 2001 From: Jim Morrison Date: Tue, 24 Jan 2012 16:18:36 -0800 Subject: [PATCH] Deal with CMYK images by converting them to RGB. PIL does not invert CMYK images as of PIL 1.1.7, so the invert happens in ImageWriter. --- pdfminer/image.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pdfminer/image.py b/pdfminer/image.py index 9faf0c0..c5f85c7 100644 --- a/pdfminer/image.py +++ b/pdfminer/image.py @@ -1,9 +1,13 @@ #!/usr/bin/env python2 +import cStringIO +import logging import sys import struct import os, os.path +from PIL import Image +from PIL import ImageChops from pdftypes import LITERALS_DCT_DECODE -from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB +from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK def align32(x): return ((x+3)/4)*4 @@ -77,7 +81,15 @@ class ImageWriter(object): path = os.path.join(self.outdir, name) fp = file(path, 'wb') if ext == '.jpg': - fp.write(stream.get_rawdata()) + raw_data = stream.get_rawdata() + if LITERAL_DEVICE_CMYK in image.colorspace: + ifp = cStringIO.StringIO(raw_data) + i = Image.open(ifp) + i = ImageChops.invert(i) + i = i.convert('RGB') + i.save(fp, 'JPEG') + else: + fp.write(raw_data) elif image.bits == 1: bmp = BMPWriter(fp, 1, width, height) data = stream.get_data()