Merge branch 'master' of https://github.com/JordanReiter/pdfminer into JordanReiter-master
commit
bfd9e93c12
|
@ -6,6 +6,10 @@ except ImportError:
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
|
|
||||||
|
class CorruptDataError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
## LZWDecoder
|
## LZWDecoder
|
||||||
##
|
##
|
||||||
class LZWDecoder(object):
|
class LZWDecoder(object):
|
||||||
|
@ -46,12 +50,12 @@ class LZWDecoder(object):
|
||||||
return v
|
return v
|
||||||
|
|
||||||
def feed(self, code):
|
def feed(self, code):
|
||||||
x = ''
|
x = b''
|
||||||
if code == 256:
|
if code == 256:
|
||||||
self.table = [ chr(c) for c in xrange(256) ] # 0-255
|
self.table = [bytes([i]) for i in range(256)] # 0-255
|
||||||
self.table.append(None) # 256
|
self.table.append(None) # 256
|
||||||
self.table.append(None) # 257
|
self.table.append(None) # 257
|
||||||
self.prevbuf = ''
|
self.prevbuf = b''
|
||||||
self.nbits = 9
|
self.nbits = 9
|
||||||
elif code == 257:
|
elif code == 257:
|
||||||
pass
|
pass
|
||||||
|
@ -60,10 +64,12 @@ class LZWDecoder(object):
|
||||||
else:
|
else:
|
||||||
if code < len(self.table):
|
if code < len(self.table):
|
||||||
x = self.table[code]
|
x = self.table[code]
|
||||||
self.table.append(self.prevbuf+x[0])
|
self.table.append(self.prevbuf+x[:1])
|
||||||
else:
|
elif code == len(self.table):
|
||||||
self.table.append(self.prevbuf+self.prevbuf[0])
|
self.table.append(self.prevbuf+self.prevbuf[:1])
|
||||||
x = self.table[code]
|
x = self.table[code]
|
||||||
|
else:
|
||||||
|
raise CorruptDataError()
|
||||||
l = len(self.table)
|
l = len(self.table)
|
||||||
if l == 511:
|
if l == 511:
|
||||||
self.nbits = 10
|
self.nbits = 10
|
||||||
|
@ -81,6 +87,11 @@ class LZWDecoder(object):
|
||||||
except EOFError:
|
except EOFError:
|
||||||
break
|
break
|
||||||
x = self.feed(code)
|
x = self.feed(code)
|
||||||
|
try:
|
||||||
|
x = self.feed(code)
|
||||||
|
except CorruptDataError:
|
||||||
|
# just ignore corrupt data and stop yielding there
|
||||||
|
break
|
||||||
yield x
|
yield x
|
||||||
if self.debug:
|
if self.debug:
|
||||||
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
|
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
|
||||||
|
|
|
@ -219,7 +219,10 @@ class PDFContentParser(PSStackParser):
|
||||||
self.istream += 1
|
self.istream += 1
|
||||||
else:
|
else:
|
||||||
raise PSEOF('Unexpected EOF, file truncated?')
|
raise PSEOF('Unexpected EOF, file truncated?')
|
||||||
self.fp = StringIO(strm.get_data())
|
data = strm.get_data()
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
data = data.decode('latin-1')
|
||||||
|
self.fp = StringIO(data)
|
||||||
return
|
return
|
||||||
|
|
||||||
def seek(self, pos):
|
def seek(self, pos):
|
||||||
|
|
Loading…
Reference in New Issue