Merge branch 'master' of https://github.com/JordanReiter/pdfminer into JordanReiter-master

pull/1/head
Yusuke Shinyama 2013-10-19 07:46:45 +09:00
commit bfd9e93c12
2 changed files with 21 additions and 7 deletions

View File

@ -6,6 +6,10 @@ except ImportError:
from StringIO import StringIO from StringIO import StringIO
class CorruptDataError(Exception):
pass
## LZWDecoder ## LZWDecoder
## ##
class LZWDecoder(object): class LZWDecoder(object):
@ -46,12 +50,12 @@ class LZWDecoder(object):
return v return v
def feed(self, code): def feed(self, code):
x = '' x = b''
if code == 256: if code == 256:
self.table = [ chr(c) for c in xrange(256) ] # 0-255 self.table = [bytes([i]) for i in range(256)] # 0-255
self.table.append(None) # 256 self.table.append(None) # 256
self.table.append(None) # 257 self.table.append(None) # 257
self.prevbuf = '' self.prevbuf = b''
self.nbits = 9 self.nbits = 9
elif code == 257: elif code == 257:
pass pass
@ -60,10 +64,12 @@ class LZWDecoder(object):
else: else:
if code < len(self.table): if code < len(self.table):
x = self.table[code] x = self.table[code]
self.table.append(self.prevbuf+x[0]) self.table.append(self.prevbuf+x[:1])
else: elif code == len(self.table):
self.table.append(self.prevbuf+self.prevbuf[0]) self.table.append(self.prevbuf+self.prevbuf[:1])
x = self.table[code] x = self.table[code]
else:
raise CorruptDataError()
l = len(self.table) l = len(self.table)
if l == 511: if l == 511:
self.nbits = 10 self.nbits = 10
@ -81,6 +87,11 @@ class LZWDecoder(object):
except EOFError: except EOFError:
break break
x = self.feed(code) x = self.feed(code)
try:
x = self.feed(code)
except CorruptDataError:
# just ignore corrupt data and stop yielding there
break
yield x yield x
if self.debug: if self.debug:
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' % print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %

View File

@ -219,7 +219,10 @@ class PDFContentParser(PSStackParser):
self.istream += 1 self.istream += 1
else: else:
raise PSEOF('Unexpected EOF, file truncated?') raise PSEOF('Unexpected EOF, file truncated?')
self.fp = StringIO(strm.get_data()) data = strm.get_data()
if isinstance(data, bytes):
data = data.decode('latin-1')
self.fp = StringIO(data)
return return
def seek(self, pos): def seek(self, pos):