Fallback when /Pages does not exist.

pull/1/head
Yusuke Shinyama 2013-10-09 22:08:16 +09:00
parent 06425bba00
commit 87143cb36f
2 changed files with 19 additions and 17 deletions

View File

@ -18,7 +18,7 @@ from pdffont import PDFFontError
from pdffont import PDFType1Font, PDFTrueTypeFont, PDFType3Font
from pdffont import PDFCIDFont
from pdfparser import PDFDocument, PDFParser
from pdfparser import PDFPasswordIncorrect
from pdfparser import PDFPasswordIncorrect, PDFObjectNotFound
from pdfcolor import PDFColorSpace
from pdfcolor import PREDEFINED_COLORSPACE
from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
@ -335,7 +335,10 @@ class PDFPageInterpreter(object):
objid = None
if isinstance(spec, PDFObjRef):
objid = spec.objid
spec = dict_value(spec)
try:
spec = dict_value(spec)
except PDFObjectNotFound:
spec = {}
self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
elif k == 'ColorSpace':
for (csid,spec) in dict_value(v).iteritems():
@ -629,10 +632,8 @@ class PDFPageInterpreter(object):
try:
self.textstate.font = self.fontmap[literal_name(fontid)]
except KeyError:
raise
if STRICT:
raise PDFInterpreterError('Undefined Font id: %r' % fontid)
return
self.textstate.fontsize = fontsize
return
# setrendering

View File

@ -505,12 +505,6 @@ class PDFDocument(object):
obj = decipher_all(self.decipher, objid, genno, obj)
return obj
def get_objects(self):
for xref in self.xrefs:
for objid in xref.get_objids():
yield self.getobj(objid)
return
INHERITABLE_ATTRS = set(['Resources', 'MediaBox', 'CropBox', 'Rotate'])
def get_pages(self):
if not self.xrefs:
@ -535,14 +529,21 @@ class PDFDocument(object):
if 1 <= self.debug:
print >>sys.stderr, 'Page: %r' % tree
yield (objid, tree)
if 'Pages' in self.catalog:
for (pageid,tree) in search(self.catalog['Pages'], self.catalog):
yield PDFPage(self, pageid, tree)
else:
try:
if 'Pages' in self.catalog:
for (objid,tree) in search(self.catalog['Pages'], self.catalog):
yield PDFPage(self, objid, tree)
return
except PDFObjectNotFound:
# fallback when /Pages is missing.
for obj in self.get_objects():
if isinstance(obj, dict) and obj.get('Type') is LITERAL_PAGES:
yield PDFPage(self, pageid, obj)
for xref in self.xrefs:
for objid in xref.get_objids():
try:
obj = self.getobj(objid)
if isinstance(obj, dict) and obj.get('Type') is LITERAL_PAGE:
yield PDFPage(self, objid, obj)
except PDFObjectNotFound:
pass
return
def get_outlines(self):