Jordan Reiter 2013-03-27 13:05:29 -04:00
parent c7709045e9
commit 44653071c3
2 changed files with 21 additions and 7 deletions

View File

@ -6,6 +6,10 @@ except ImportError:
from StringIO import StringIO
class CorruptDataError(Exception):
pass
## LZWDecoder
##
class LZWDecoder(object):
@ -46,12 +50,12 @@ class LZWDecoder(object):
return v
def feed(self, code):
x = ''
x = b''
if code == 256:
self.table = [ chr(c) for c in xrange(256) ] # 0-255
self.table = [bytes([i]) for i in range(256)] # 0-255
self.table.append(None) # 256
self.table.append(None) # 257
self.prevbuf = ''
self.prevbuf = b''
self.nbits = 9
elif code == 257:
pass
@ -60,10 +64,12 @@ class LZWDecoder(object):
else:
if code < len(self.table):
x = self.table[code]
self.table.append(self.prevbuf+x[0])
else:
self.table.append(self.prevbuf+self.prevbuf[0])
self.table.append(self.prevbuf+x[:1])
elif code == len(self.table):
self.table.append(self.prevbuf+self.prevbuf[:1])
x = self.table[code]
else:
raise CorruptDataError()
l = len(self.table)
if l == 511:
self.nbits = 10
@ -81,6 +87,11 @@ class LZWDecoder(object):
except EOFError:
break
x = self.feed(code)
try:
x = self.feed(code)
except CorruptDataError:
# just ignore corrupt data and stop yielding there
break
yield x
if self.debug:
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %

View File

@ -217,7 +217,10 @@ class PDFContentParser(PSStackParser):
self.istream += 1
else:
raise PSEOF('Unexpected EOF, file truncated?')
self.fp = StringIO(strm.get_data())
data = strm.get_data()
if isinstance(data, bytes):
data = data.decode('latin-1')
self.fp = io.StringIO(data)
return
def seek(self, pos):