From 938419c4764293db6539abd1ed7204102dfb1234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20Sz=C3=A1sz?= Date: Thu, 20 Jul 2017 19:46:11 +0100 Subject: [PATCH] Align dumppdf tool to modified data structures. (#73) * Align dumppdf tool to modified data structures. TOC page numbers should also work now, counting from 1. * Update version number. --- pdfminer/__init__.py | 2 +- tools/dumppdf.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py index 15770c2..2d41e1a 100644 --- a/pdfminer/__init__.py +++ b/pdfminer/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -__version__ = '20170510' +__version__ = '20170630' if __name__ == '__main__': print (__version__) diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 037c6e0..110f196 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -122,7 +122,7 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', parser = PDFParser(fp) doc = PDFDocument(parser, password) pages = dict( (page.pageid, pageno) for (pageno,page) - in enumerate(PDFPage.create_pages(doc)) ) + in enumerate(PDFPage.create_pages(doc), 1) ) def resolve_dest(dest): if isinstance(dest, str): dest = resolve1(doc.get_dest(dest)) @@ -130,6 +130,8 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', dest = resolve1(doc.get_dest(dest.name)) if isinstance(dest, dict): dest = dest['D'] + if isinstance(dest, PDFObjRef): + dest = dest.resolve() return dest try: outlines = doc.get_outlines() @@ -140,10 +142,10 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', dest = resolve_dest(dest) pageno = pages[dest[0].objid] elif a: - action = a.resolve() + action = a if isinstance(action, dict): subtype = action.get('S') - if subtype and repr(subtype) == '/GoTo' and action.get('D'): + if subtype and repr(subtype) == '/\'GoTo\'' and action.get('D'): dest = resolve_dest(action['D']) pageno = pages[dest[0].objid] s = e(title).encode('utf-8', 'xmlcharrefreplace')