diff --git a/pdfminer/converter.py b/pdfminer/converter.py index ad721fb..41e08b6 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -52,8 +52,14 @@ class PDFPageAggregator(PDFTextDevice): def render_image(self, name, stream): assert isinstance(self.cur_item, LTFigure) - item = LTImage(name, stream.get('Filter'), - (stream['Width'], stream['Height']), + ismask = stream.get_any(('IM', 'ImageMask')) + bits = stream.get_any(('BPC', 'BitsPerCompoment'), 1) + csp = stream.get_any(('CS', 'ColorSpace')) + if not isinstance(csp, list): + csp = [csp] + item = LTImage(name, stream.get_any(('F', 'Filter')), + (stream.get_any(('W', 'Width')), + stream.get_any(('H', 'Height'))), (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1), stream.get_rawdata()) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index dfb1b8e..a62aa17 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -338,8 +338,10 @@ class PDFFont(object): self.fontname = descriptor.get('FontName', 'unknown') if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) + self.flags = int_value(descriptor.get('Flags', 0)) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) + self.italic_angle = num_value(descriptor.get('ItalicAngle', 0)) self.default_width = default_width or descriptor.get('MissingWidth', 0) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 2d62282..78f7b5d 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -173,6 +173,11 @@ class PDFStream(PDFObject): return self.attrs[name] def get(self, name, default=None): return self.attrs.get(name, default) + def get_any(self, names, default=None): + for name in names: + if name in self.attrs: + return self.attrs[name] + return default def decomp(self,data): buf = data @@ -193,9 +198,8 @@ class PDFStream(PDFObject): if self.decipher: # Handle encryption data = self.decipher(self.objid, self.genno, data) - try: - filters = self['Filter'] - except KeyError: + filters = self.get_any(('F', 'Filter')) + if not filters: self.rawdata = self.data = data return if not isinstance(filters, list): @@ -218,18 +222,13 @@ class PDFStream(PDFObject): else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors - try: - params = self['DP'] - except KeyError: - params = self.get('DecodeParms', {}) - if 'Predictor' in params: + params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) + if 'Predictor' in params and 'Columns' in params: pred = int_value(params['Predictor']) + columns = int_value(params['Columns']) if pred: if pred != 12: raise PDFNotImplementedError('Unsupported predictor: %r' % pred) - if 'Columns' not in params: - raise PDFValueError('Columns undefined for predictor=12') - columns = int_value(params['Columns']) buf = '' ent0 = '\x00' * columns for i in xrange(0, len(data), columns+1):