diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 4754926..c170f1a 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -219,7 +219,7 @@ class PDFXRefStream(PDFBaseXRef): if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF: raise PDFNoValidXRef('Invalid PDF stream spec.') size = stream['Size'] - index_array = stream.get('Index', (0, size)) + index_array = stream.get('Index', (1, size)) if len(index_array) % 2 != 0: raise PDFSyntaxError('Invalid index number') self.ranges.extend(choplist(2, index_array)) diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 1b1acf7..7826240 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -114,8 +114,7 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', dumpall=False, codec=None, extractdir=None): fp = file(fname, 'rb') parser = PDFParser(fp) - doc = PDFDocument(parser) - doc.initialize(password) + doc = PDFDocument(parser, password) pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(PDFPage.create_pages(doc)) ) def resolve_dest(dest): @@ -185,9 +184,7 @@ def extractembedded(outfp, fname, objids, pagenos, password='', fp = file(fname, 'rb') parser = PDFParser(fp) - doc = PDFDocument(parser) - doc.initialize(password) - + doc = PDFDocument(parser, password) for xref in doc.xrefs: for objid in xref.get_objids(): obj = doc.getobj(objid) @@ -200,8 +197,7 @@ def dumppdf(outfp, fname, objids, pagenos, password='', dumpall=False, codec=None, extractdir=None): fp = file(fname, 'rb') parser = PDFParser(fp) - doc = PDFDocument(parser) - doc.initialize(password) + doc = PDFDocument(parser, password) if objids: for objid in objids: obj = doc.getobj(objid)