Merge branch 'master' of github.com:euske/pdfminer

pull/1/head
Yusuke Shinyama 2013-04-09 18:28:24 +09:00
commit d932bf675e
3 changed files with 20 additions and 8 deletions

View File

@ -1,9 +1,13 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
import cStringIO
import logging
import sys import sys
import struct import struct
import os, os.path import os, os.path
from PIL import Image
from PIL import ImageChops
from pdftypes import LITERALS_DCT_DECODE from pdftypes import LITERALS_DCT_DECODE
from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK
def align32(x): def align32(x):
return ((x+3)/4)*4 return ((x+3)/4)*4
@ -77,7 +81,15 @@ class ImageWriter(object):
path = os.path.join(self.outdir, name) path = os.path.join(self.outdir, name)
fp = file(path, 'wb') fp = file(path, 'wb')
if ext == '.jpg': if ext == '.jpg':
fp.write(stream.get_rawdata()) raw_data = stream.get_rawdata()
if LITERAL_DEVICE_CMYK in image.colorspace:
ifp = cStringIO.StringIO(raw_data)
i = Image.open(ifp)
i = ImageChops.invert(i)
i = i.convert('RGB')
i.save(fp, 'JPEG')
else:
fp.write(raw_data)
elif image.bits == 1: elif image.bits == 1:
bmp = BMPWriter(fp, 1, width, height) bmp = BMPWriter(fp, 1, width, height)
data = stream.get_data() data = stream.get_data()

View File

@ -609,8 +609,8 @@ class LTLayoutContainer(LTContainer):
group = LTTextGroupLRTB([obj1,obj2]) group = LTTextGroupLRTB([obj1,obj2])
plane.remove(obj1) plane.remove(obj1)
plane.remove(obj2) plane.remove(obj2)
dists = [ (c,d,o1,o2) for (c,d,o1,o2) in dists # this line is optimized -- don't change without profiling
if o1 in plane and o2 in plane ] dists = [ n for n in dists if n[2] in plane._objs and n[3] in plane._objs ]
for other in plane: for other in plane:
dists.append((0, dist(group,other), group, other)) dists.append((0, dist(group,other), group, other))
dists.sort() dists.sort()