outline bug fixed

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@249 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-10-17 05:14:52 +00:00
parent 0b962443ed
commit afe33312c6
2 changed files with 27 additions and 8 deletions

View File

@ -30,6 +30,7 @@ from utils import decode_text, ObjIdRange
class PDFSyntaxError(PDFException): pass class PDFSyntaxError(PDFException): pass
class PDFNoValidXRef(PDFSyntaxError): pass class PDFNoValidXRef(PDFSyntaxError): pass
class PDFNoOutlines(PDFException): pass class PDFNoOutlines(PDFException): pass
class PDFDestinationNotFound(PDFException): pass
class PDFEncryptionError(PDFException): pass class PDFEncryptionError(PDFException): pass
class PDFPasswordIncorrect(PDFEncryptionError): pass class PDFPasswordIncorrect(PDFEncryptionError): pass
@ -517,7 +518,7 @@ class PDFDocument(object):
def get_outlines(self): def get_outlines(self):
if 'Outlines' not in self.catalog: if 'Outlines' not in self.catalog:
raise PDFNoOutlines('No /Outlines defined!') raise PDFNoOutlines
def search(entry, level): def search(entry, level):
entry = dict_value(entry) entry = dict_value(entry)
if 'Title' in entry: if 'Title' in entry:
@ -558,6 +559,20 @@ class PDFDocument(object):
raise KeyError((cat,key)) raise KeyError((cat,key))
return lookup(d0) return lookup(d0)
def get_dest(self, name):
try:
# PDF-1.2 or later
obj = self.lookup_name('Dests', name)
except KeyError:
# PDF-1.1 or prior
if 'Dests' not in self.catalog:
raise PDFDestinationNotFound(name)
d0 = dict_value(self.catalog['Dests'])
if name not in d0:
raise PDFDestinationNotFound(name)
obj = d0[name]
return obj
## PDFParser ## PDFParser
## ##

View File

@ -113,24 +113,28 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
doc.set_parser(parser) doc.set_parser(parser)
doc.initialize(password) doc.initialize(password)
pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) ) pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
def resolve_dest(dest):
if isinstance(dest, str):
dest = resolve1(doc.get_dest(dest))
elif isinstance(dest, PSLiteral):
dest = resolve1(doc.get_dest(dest.name))
if isinstance(dest, dict):
dest = dest['D']
return dest
try: try:
outlines = doc.get_outlines() outlines = doc.get_outlines()
outfp.write('<outlines>\n') outfp.write('<outlines>\n')
for (level,title,dest,a,se) in outlines: for (level,title,dest,a,se) in outlines:
pageno = None pageno = None
if dest: if dest:
dest = resolve1(doc.lookup_name('Dests', dest)) dest = resolve_dest(dest)
if isinstance(dest, dict): pageno = pages[dest[0].objid]
dest = dest['D']
pageno = pages[dest[0].objid]
elif a: elif a:
action = a.resolve() action = a.resolve()
if isinstance(action, dict): if isinstance(action, dict):
subtype = action.get('S') subtype = action.get('S')
if subtype and repr(subtype) == '/GoTo' and action.get('D'): if subtype and repr(subtype) == '/GoTo' and action.get('D'):
dest = action['D'] dest = resolve_dest(action['D'])
if isinstance(dest, str):
dest = resolve1(doc.lookup_name('Dests', dest))
pageno = pages[dest[0].objid] pageno = pages[dest[0].objid]
s = e(title).encode('utf-8', 'xmlcharrefreplace') s = e(title).encode('utf-8', 'xmlcharrefreplace')
outfp.write('<outline level="%r" title="%s">\n' % (level, s)) outfp.write('<outline level="%r" title="%s">\n' % (level, s))