Expand ObjStm in XRefFallback.
parent
e4bc4e43b1
commit
2df67d85ae
|
@ -149,7 +149,30 @@ class PDFXRefFallback(PDFXRef):
|
||||||
m = self.PDFOBJ_CUE.match(line)
|
m = self.PDFOBJ_CUE.match(line)
|
||||||
if not m: continue
|
if not m: continue
|
||||||
(objid, genno) = m.groups()
|
(objid, genno) = m.groups()
|
||||||
self.offsets[int(objid)] = (None, pos, int(genno))
|
objid = int(objid)
|
||||||
|
genno = int(genno)
|
||||||
|
self.offsets[objid] = (None, pos, genno)
|
||||||
|
# expand ObjStm.
|
||||||
|
(_,obj) = parser.nextobject()
|
||||||
|
if isinstance(obj, PDFStream) and obj.get('Type') is LITERAL_OBJSTM:
|
||||||
|
stream = stream_value(obj)
|
||||||
|
try:
|
||||||
|
n = stream['N']
|
||||||
|
except KeyError:
|
||||||
|
if STRICT:
|
||||||
|
raise PDFSyntaxError('N is not defined: %r' % stream)
|
||||||
|
n = 0
|
||||||
|
parser1 = PDFStreamParser(stream.get_data())
|
||||||
|
objs = []
|
||||||
|
try:
|
||||||
|
while 1:
|
||||||
|
(_,obj) = parser1.nextobject()
|
||||||
|
objs.append(obj)
|
||||||
|
except PSEOF:
|
||||||
|
pass
|
||||||
|
for index in xrange(n):
|
||||||
|
objid1 = objs[index*2]
|
||||||
|
self.offsets[objid1] = (objid, index, 0)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@ -411,6 +434,20 @@ class PDFDocument(object):
|
||||||
return Arcfour(key).process(data)
|
return Arcfour(key).process(data)
|
||||||
|
|
||||||
def _getobj_objstm(self, stream, index, objid):
|
def _getobj_objstm(self, stream, index, objid):
|
||||||
|
if stream.objid in self._parsed_objs:
|
||||||
|
(objs,n) = self._parsed_objs[stream.objid]
|
||||||
|
else:
|
||||||
|
(objs,n) = self._get_objects(stream)
|
||||||
|
if self.caching:
|
||||||
|
self._parsed_objs[stream.objid] = (objs,n)
|
||||||
|
i = n*2+index
|
||||||
|
try:
|
||||||
|
obj = objs[i]
|
||||||
|
except IndexError:
|
||||||
|
raise PDFSyntaxError('index too big: %r' % index)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def _get_objects(self, stream):
|
||||||
if stream.get('Type') is not LITERAL_OBJSTM:
|
if stream.get('Type') is not LITERAL_OBJSTM:
|
||||||
if STRICT:
|
if STRICT:
|
||||||
raise PDFSyntaxError('Not a stream object: %r' % stream)
|
raise PDFSyntaxError('Not a stream object: %r' % stream)
|
||||||
|
@ -420,9 +457,6 @@ class PDFDocument(object):
|
||||||
if STRICT:
|
if STRICT:
|
||||||
raise PDFSyntaxError('N is not defined: %r' % stream)
|
raise PDFSyntaxError('N is not defined: %r' % stream)
|
||||||
n = 0
|
n = 0
|
||||||
if stream.objid in self._parsed_objs:
|
|
||||||
objs = self._parsed_objs[stream.objid]
|
|
||||||
else:
|
|
||||||
parser = PDFStreamParser(stream.get_data())
|
parser = PDFStreamParser(stream.get_data())
|
||||||
parser.set_document(self)
|
parser.set_document(self)
|
||||||
objs = []
|
objs = []
|
||||||
|
@ -432,14 +466,7 @@ class PDFDocument(object):
|
||||||
objs.append(obj)
|
objs.append(obj)
|
||||||
except PSEOF:
|
except PSEOF:
|
||||||
pass
|
pass
|
||||||
if self.caching:
|
return (objs, n)
|
||||||
self._parsed_objs[stream.objid] = objs
|
|
||||||
i = n*2+index
|
|
||||||
try:
|
|
||||||
obj = objs[i]
|
|
||||||
except IndexError:
|
|
||||||
raise PDFSyntaxError('index too big: %r' % index)
|
|
||||||
return obj
|
|
||||||
|
|
||||||
KEYWORD_OBJ = KWD('obj')
|
KEYWORD_OBJ = KWD('obj')
|
||||||
def _getobj_parse(self, pos, objid):
|
def _getobj_parse(self, pos, objid):
|
||||||
|
|
Loading…
Reference in New Issue