Fix a unicode conversion bug.

See https://github.com/euske/pdfminer/issues/75
pull/55/head
speedplane 2014-11-11 23:34:33 -05:00
parent b0e035c24f
commit ecc4d05675
1 changed files with 9 additions and 1 deletions

View File

@ -343,7 +343,15 @@ class PSBaseParser(object):
self.hex = b'' self.hex = b''
self._parse1 = self._parse_literal_hex self._parse1 = self._parse_literal_hex
return j+1 return j+1
self._add_token(LIT(unicode(self._curtoken)))
try:
# Try to interpret the token as a utf-8 string
utoken = self._curtoken.decode('utf-8')
except UnicodeDecodeError:
# We failed, there is possibly a corrupt PDF here.
if STRICT: raise
utoken = ""
self._add_token(LIT(utoken))
self._parse1 = self._parse_main self._parse1 = self._parse_main
return j return j