issue #56 reproduced, solution attempt unsucessful
parent
cd92883925
commit
82af7f0aac
|
@ -644,10 +644,24 @@ class PDFDocument(object):
|
||||||
def _getobj_parse(self, pos, objid):
|
def _getobj_parse(self, pos, objid):
|
||||||
self._parser.seek(pos)
|
self._parser.seek(pos)
|
||||||
(_, objid1) = self._parser.nexttoken() # objid
|
(_, objid1) = self._parser.nexttoken() # objid
|
||||||
if objid1 != objid:
|
|
||||||
raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid))
|
|
||||||
(_, genno) = self._parser.nexttoken() # genno
|
(_, genno) = self._parser.nexttoken() # genno
|
||||||
(_, kwd) = self._parser.nexttoken()
|
(_, kwd) = self._parser.nexttoken()
|
||||||
|
# #### hack around malformed pdf files
|
||||||
|
# copied from https://github.com/jaepil/pdfminer3k/blob/master/pdfminer/pdfparser.py#L399
|
||||||
|
#to solve https://github.com/pdfminer/pdfminer.six/issues/56
|
||||||
|
#assert objid1 == objid, (objid, objid1)
|
||||||
|
if objid1 != objid:
|
||||||
|
x = []
|
||||||
|
while kwd is not self.KEYWORD_OBJ:
|
||||||
|
(_,kwd) = self._parser.nexttoken()
|
||||||
|
x.append(kwd)
|
||||||
|
if x:
|
||||||
|
objid1 = x[-2]
|
||||||
|
genno = x[-1]
|
||||||
|
# #### end hack around malformed pdf files
|
||||||
|
if objid1 != objid:
|
||||||
|
raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid))
|
||||||
|
|
||||||
if kwd != KWD(b'obj'):
|
if kwd != KWD(b'obj'):
|
||||||
raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
|
raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
|
||||||
(_, obj) = self._parser.nextobject()
|
(_, obj) = self._parser.nextobject()
|
||||||
|
|
Binary file not shown.
|
@ -47,5 +47,8 @@ class TestDumpPDF():
|
||||||
def test_7(self):
|
def test_7(self):
|
||||||
run('../samples/contrib/','stamp-no')
|
run('../samples/contrib/','stamp-no')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def test_8(self):
|
||||||
|
run('../samples/contrib/','2b','-A -t xml')
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
nose.runmodule()
|
nose.runmodule()
|
||||||
|
|
Loading…
Reference in New Issue