diff --git a/CHANGELOG.md b/CHANGELOG.md index 9082416..d91d035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added - Support for extracting JBIG2 encoded images ([#311](https://github.com/pdfminer/pdfminer.six/pull/311) and [#46](https://github.com/pdfminer/pdfminer.six/pull/46)) +### Fixed +- Unhandled AssertionError when dumping pdf containing reference to object id 0 ([#318](https://github.com/pdfminer/pdfminer.six/pull/318)) + ## [20191020] - 2019-10-20 ### Deprecated diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 5fb9cce..1c4b276 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -671,7 +671,11 @@ class PDFDocument(object): # can raise PDFObjectNotFound def getobj(self, objid): - assert objid != 0 + """Get object from PDF + + :raises PDFException if PDFDocument is not initialized + :raises PDFObjectNotFound if objid does not exist in PDF + """ if not self.xrefs: raise PDFException('PDFDocument is not initialized') log.debug('getobj: objid=%r', objid) diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py new file mode 100644 index 0000000..67a5e45 --- /dev/null +++ b/tests/test_pdfdocument.py @@ -0,0 +1,15 @@ +from nose.tools import raises +from pdfminer.pdftypes import PDFObjectNotFound + +from pdfminer.pdfdocument import PDFDocument +from pdfminer.pdfparser import PDFParser + + +class TestPdfDocument(object): + + @raises(PDFObjectNotFound) + def test_get_zero_objid_raises_pdfobjectnotfound(self): + with open('../samples/simple1.pdf', 'rb') as in_file: + parser = PDFParser(in_file) + doc = PDFDocument(parser) + doc.getobj(0) \ No newline at end of file