Expand ObjStm in XRefFallback.

pull/1/head
Yusuke Shinyama 2013-10-10 19:40:43 +09:00
parent e4bc4e43b1
commit 2df67d85ae
1 changed files with 47 additions and 20 deletions

View File

@ -149,7 +149,30 @@ class PDFXRefFallback(PDFXRef):
m = self.PDFOBJ_CUE.match(line) m = self.PDFOBJ_CUE.match(line)
if not m: continue if not m: continue
(objid, genno) = m.groups() (objid, genno) = m.groups()
self.offsets[int(objid)] = (None, pos, int(genno)) objid = int(objid)
genno = int(genno)
self.offsets[objid] = (None, pos, genno)
# expand ObjStm.
(_,obj) = parser.nextobject()
if isinstance(obj, PDFStream) and obj.get('Type') is LITERAL_OBJSTM:
stream = stream_value(obj)
try:
n = stream['N']
except KeyError:
if STRICT:
raise PDFSyntaxError('N is not defined: %r' % stream)
n = 0
parser1 = PDFStreamParser(stream.get_data())
objs = []
try:
while 1:
(_,obj) = parser1.nextobject()
objs.append(obj)
except PSEOF:
pass
for index in xrange(n):
objid1 = objs[index*2]
self.offsets[objid1] = (objid, index, 0)
return return
@ -411,6 +434,20 @@ class PDFDocument(object):
return Arcfour(key).process(data) return Arcfour(key).process(data)
def _getobj_objstm(self, stream, index, objid): def _getobj_objstm(self, stream, index, objid):
if stream.objid in self._parsed_objs:
(objs,n) = self._parsed_objs[stream.objid]
else:
(objs,n) = self._get_objects(stream)
if self.caching:
self._parsed_objs[stream.objid] = (objs,n)
i = n*2+index
try:
obj = objs[i]
except IndexError:
raise PDFSyntaxError('index too big: %r' % index)
return obj
def _get_objects(self, stream):
if stream.get('Type') is not LITERAL_OBJSTM: if stream.get('Type') is not LITERAL_OBJSTM:
if STRICT: if STRICT:
raise PDFSyntaxError('Not a stream object: %r' % stream) raise PDFSyntaxError('Not a stream object: %r' % stream)
@ -420,9 +457,6 @@ class PDFDocument(object):
if STRICT: if STRICT:
raise PDFSyntaxError('N is not defined: %r' % stream) raise PDFSyntaxError('N is not defined: %r' % stream)
n = 0 n = 0
if stream.objid in self._parsed_objs:
objs = self._parsed_objs[stream.objid]
else:
parser = PDFStreamParser(stream.get_data()) parser = PDFStreamParser(stream.get_data())
parser.set_document(self) parser.set_document(self)
objs = [] objs = []
@ -432,14 +466,7 @@ class PDFDocument(object):
objs.append(obj) objs.append(obj)
except PSEOF: except PSEOF:
pass pass
if self.caching: return (objs, n)
self._parsed_objs[stream.objid] = objs
i = n*2+index
try:
obj = objs[i]
except IndexError:
raise PDFSyntaxError('index too big: %r' % index)
return obj
KEYWORD_OBJ = KWD('obj') KEYWORD_OBJ = KWD('obj')
def _getobj_parse(self, pos, objid): def _getobj_parse(self, pos, objid):