Speed up handling of PDFs with large images

pull/133/head
Tim Bell 2018-03-29 14:21:31 +11:00
parent eddf861fbd
commit fab1c9462c
1 changed files with 4 additions and 3 deletions

View File

@ -102,7 +102,7 @@ class PDFParser(PSStackParser):
return return
pos += len(line) pos += len(line)
self.fp.seek(pos) self.fp.seek(pos)
data = self.fp.read(objlen) data_list = [self.fp.read(objlen)]
self.seek(pos+objlen) self.seek(pos+objlen)
while 1: while 1:
try: try:
@ -115,11 +115,12 @@ class PDFParser(PSStackParser):
i = line.index(b'endstream') i = line.index(b'endstream')
objlen += i objlen += i
if self.fallback: if self.fallback:
data += line[:i] data_list.append(line[:i])
break break
objlen += len(line) objlen += len(line)
if self.fallback: if self.fallback:
data += line data_list.append(line)
data = b''.join(data_list)
self.seek(pos+objlen) self.seek(pos+objlen)
# XXX limit objlen not to exceed object boundary # XXX limit objlen not to exceed object boundary
log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10]) log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10])