From 543976f195d2fb042d065474141ffac05c58ef10 Mon Sep 17 00:00:00 2001 From: "Tony(Baojia) Tong" Date: Thu, 26 Aug 2021 14:55:02 -0400 Subject: [PATCH] Fix issue of ValueError and KeyError rasied in PDFdocument and PDFparser (#574) * check obj type * update changelog * Update CHANGELOG.md * fix the bug * fix condition * update changelog * update changelog again * update changelog * update Co-authored-by: Pieter Marsman Co-authored-by: Tony Tong --- CHANGELOG.md | 1 + pdfminer/pdfdocument.py | 2 +- pdfminer/pdfparser.py | 16 ++++++++-------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5107fbf..89b0fe7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Support for Paeth PNG filter compression (predictor value = 4) ([#537](https://github.com/pdfminer/pdfminer.six/pull/537)) ### Fixed +- Fix issue of ValueError and KeyError rasied in PDFdocument and PDFparser ([#573](https://github.com/pdfminer/pdfminer.six/pull/574)) - Fix issue of TypeError: cannot unpack non-iterable PDFObjRef object, when unpacking the value of 'DW2' ([#529](https://github.com/pdfminer/pdfminer.six/pull/529)) - `PermissionError` when creating temporary filepaths on windows when running tests ([#469](https://github.com/pdfminer/pdfminer.six/issues/469)) - Detecting trailer correctly when surrounded with needless whitespace ([#535](https://github.com/pdfminer/pdfminer.six/pull/535)) diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index e67eae7..2da93c8 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -230,7 +230,7 @@ class PDFXRefStream(PDFBaseXRef): (_, kwd) = parser.nexttoken() (_, stream) = parser.nextobject() if not isinstance(stream, PDFStream) \ - or stream['Type'] is not LITERAL_XREF: + or stream.get('Type') is not LITERAL_XREF: raise PDFNoValidXRef('Invalid PDF stream spec.') size = stream['Size'] index_array = stream.get('Index', (0, size)) diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index ee64c2e..b604b9d 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -68,14 +68,14 @@ class PDFParser(PSStackParser): elif token is self.KEYWORD_R: # reference to indirect object - try: - ((_, objid), (_, genno)) = self.pop(2) - (objid, genno) = (int(objid), int(genno)) - obj = PDFObjRef(self.doc, objid, genno) - self.push((pos, obj)) - except PSSyntaxError: - pass - + if len(self.curstack) >= 2: + try: + ((_, objid), (_, genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass elif token is self.KEYWORD_STREAM: # stream object ((_, dic),) = self.pop(1)