diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 1b686d6..afef6b3 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -30,6 +30,7 @@ from utils import decode_text, ObjIdRange class PDFSyntaxError(PDFException): pass class PDFNoValidXRef(PDFSyntaxError): pass class PDFNoOutlines(PDFException): pass +class PDFDestinationNotFound(PDFException): pass class PDFEncryptionError(PDFException): pass class PDFPasswordIncorrect(PDFEncryptionError): pass @@ -517,7 +518,7 @@ class PDFDocument(object): def get_outlines(self): if 'Outlines' not in self.catalog: - raise PDFNoOutlines('No /Outlines defined!') + raise PDFNoOutlines def search(entry, level): entry = dict_value(entry) if 'Title' in entry: @@ -558,6 +559,20 @@ class PDFDocument(object): raise KeyError((cat,key)) return lookup(d0) + def get_dest(self, name): + try: + # PDF-1.2 or later + obj = self.lookup_name('Dests', name) + except KeyError: + # PDF-1.1 or prior + if 'Dests' not in self.catalog: + raise PDFDestinationNotFound(name) + d0 = dict_value(self.catalog['Dests']) + if name not in d0: + raise PDFDestinationNotFound(name) + obj = d0[name] + return obj + ## PDFParser ## diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 9e48974..247ef56 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -113,24 +113,28 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', doc.set_parser(parser) doc.initialize(password) pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) ) + def resolve_dest(dest): + if isinstance(dest, str): + dest = resolve1(doc.get_dest(dest)) + elif isinstance(dest, PSLiteral): + dest = resolve1(doc.get_dest(dest.name)) + if isinstance(dest, dict): + dest = dest['D'] + return dest try: outlines = doc.get_outlines() outfp.write('\n') for (level,title,dest,a,se) in outlines: pageno = None if dest: - dest = resolve1(doc.lookup_name('Dests', dest)) - if isinstance(dest, dict): - dest = dest['D'] - pageno = pages[dest[0].objid] + dest = resolve_dest(dest) + pageno = pages[dest[0].objid] elif a: action = a.resolve() if isinstance(action, dict): subtype = action.get('S') if subtype and repr(subtype) == '/GoTo' and action.get('D'): - dest = action['D'] - if isinstance(dest, str): - dest = resolve1(doc.lookup_name('Dests', dest)) + dest = resolve_dest(action['D']) pageno = pages[dest[0].objid] s = e(title).encode('utf-8', 'xmlcharrefreplace') outfp.write('\n' % (level, s))