Fixes for LZW error (see https://bitbucket.org/hsoft/pdfminer3k/commits/ae9a4ca0691a/)
parent
c7709045e9
commit
44653071c3
|
@ -6,6 +6,10 @@ except ImportError:
|
|||
from StringIO import StringIO
|
||||
|
||||
|
||||
class CorruptDataError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
## LZWDecoder
|
||||
##
|
||||
class LZWDecoder(object):
|
||||
|
@ -46,12 +50,12 @@ class LZWDecoder(object):
|
|||
return v
|
||||
|
||||
def feed(self, code):
|
||||
x = ''
|
||||
x = b''
|
||||
if code == 256:
|
||||
self.table = [ chr(c) for c in xrange(256) ] # 0-255
|
||||
self.table = [bytes([i]) for i in range(256)] # 0-255
|
||||
self.table.append(None) # 256
|
||||
self.table.append(None) # 257
|
||||
self.prevbuf = ''
|
||||
self.prevbuf = b''
|
||||
self.nbits = 9
|
||||
elif code == 257:
|
||||
pass
|
||||
|
@ -60,10 +64,12 @@ class LZWDecoder(object):
|
|||
else:
|
||||
if code < len(self.table):
|
||||
x = self.table[code]
|
||||
self.table.append(self.prevbuf+x[0])
|
||||
else:
|
||||
self.table.append(self.prevbuf+self.prevbuf[0])
|
||||
self.table.append(self.prevbuf+x[:1])
|
||||
elif code == len(self.table):
|
||||
self.table.append(self.prevbuf+self.prevbuf[:1])
|
||||
x = self.table[code]
|
||||
else:
|
||||
raise CorruptDataError()
|
||||
l = len(self.table)
|
||||
if l == 511:
|
||||
self.nbits = 10
|
||||
|
@ -81,6 +87,11 @@ class LZWDecoder(object):
|
|||
except EOFError:
|
||||
break
|
||||
x = self.feed(code)
|
||||
try:
|
||||
x = self.feed(code)
|
||||
except CorruptDataError:
|
||||
# just ignore corrupt data and stop yielding there
|
||||
break
|
||||
yield x
|
||||
if self.debug:
|
||||
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
|
||||
|
|
|
@ -217,7 +217,10 @@ class PDFContentParser(PSStackParser):
|
|||
self.istream += 1
|
||||
else:
|
||||
raise PSEOF('Unexpected EOF, file truncated?')
|
||||
self.fp = StringIO(strm.get_data())
|
||||
data = strm.get_data()
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode('latin-1')
|
||||
self.fp = io.StringIO(data)
|
||||
return
|
||||
|
||||
def seek(self, pos):
|
||||
|
|
Loading…
Reference in New Issue