issue #56 reproduced, solution attempt unsucessful

pull/58/head
Philippe Guglielmetti 2017-04-19 14:19:14 +02:00
parent cd92883925
commit 82af7f0aac
3 changed files with 19 additions and 2 deletions

View File

@ -644,10 +644,24 @@ class PDFDocument(object):
def _getobj_parse(self, pos, objid):
self._parser.seek(pos)
(_, objid1) = self._parser.nexttoken() # objid
if objid1 != objid:
raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid))
(_, genno) = self._parser.nexttoken() # genno
(_, kwd) = self._parser.nexttoken()
# #### hack around malformed pdf files
# copied from https://github.com/jaepil/pdfminer3k/blob/master/pdfminer/pdfparser.py#L399
#to solve https://github.com/pdfminer/pdfminer.six/issues/56
#assert objid1 == objid, (objid, objid1)
if objid1 != objid:
x = []
while kwd is not self.KEYWORD_OBJ:
(_,kwd) = self._parser.nexttoken()
x.append(kwd)
if x:
objid1 = x[-2]
genno = x[-1]
# #### end hack around malformed pdf files
if objid1 != objid:
raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid))
if kwd != KWD(b'obj'):
raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
(_, obj) = self._parser.nextobject()

BIN
samples/contrib/2b.pdf Normal file

Binary file not shown.

View File

@ -47,5 +47,8 @@ class TestDumpPDF():
def test_7(self):
run('../samples/contrib/','stamp-no')
"""
def test_8(self):
run('../samples/contrib/','2b','-A -t xml')
if __name__ == '__main__':
nose.runmodule()