diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 8a94be8..72f6b23 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -6,6 +6,7 @@ from ascii85 import ascii85decode, asciihexdecode from runlength import rldecode from psparser import PSException, PSObject from psparser import LIT, KWD, STRICT +from utils import apply_png_predictor LITERAL_CRYPT = LIT('Crypt') @@ -231,22 +232,19 @@ class PDFStream(PDFObject): raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) - if 'Predictor' in params and 'Columns' in params: + if 'Predictor' in params: pred = int_value(params['Predictor']) - columns = int_value(params['Columns']) - if pred: - if pred != 12: - raise PDFNotImplementedError('Unsupported predictor: %r' % pred) - buf = '' - ent0 = '\x00' * columns - for i in xrange(0, len(data), columns+1): - pred = data[i] - ent1 = data[i+1:i+1+columns] - if pred == '\x02': - ent1 = ''.join( chr((ord(a)+ord(b)) & 255) for (a,b) in zip(ent0,ent1) ) - buf += ent1 - ent0 = ent1 - data = buf + if pred == 1: + # no predictor + pass + elif 10 <= pred: + # PNG predictor + colors = int_value(params.get('Colors', 1)) + columns = int_value(params.get('Columns', 1)) + bitspercomponent = int_value(params.get('BitsPerComponent', 8)) + data = apply_png_predictor(pred, colors, columns, bitspercomponent, data) + else: + raise PDFNotImplementedError('Unsupported predictor: %r' % pred) self.data = data self.rawdata = None return diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 84dceea..1e12ca7 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -6,6 +6,41 @@ import struct from sys import maxint as INF +## PNG Predictor +## +def apply_png_predictor(pred, colors, columns, bitspercomponent, data): + if bitspercomponent != 8: + # unsupported + raise ValueError(bitspercomponent) + nbytes = colors*columns*bitspercomponent/8 + i = 0 + buf = '' + line0 = '\x00' * columns + while i < len(data): + pred = data[i] + i += 1 + line1 = data[i:i+nbytes] + i += nbytes + if pred == '\x00': + # PNG none + buf += line1 + elif pred == '\x01': + # PNG sub + b = 0 + for c in line1: + b += ord(c) + buf += chr(b & 255) + elif pred == '\x02': + # PNG up + for (a,b) in zip(line0,line1): + buf += chr((ord(a)+ord(b)) & 255) + else: + # unsupported + raise ValueError(pred) + line0 = line1 + return buf + + ## Matrix operations ## MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)