diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index e2ac6cf..f1b6c7f 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -151,6 +151,7 @@ class PDFStreamParser(PDFParser): self.add_results(*self.popall()) return + KEYWORD_OBJ = KWD('obj') def do_keyword(self, pos, token): if token is self.KEYWORD_R: # reference to indirect object @@ -162,6 +163,12 @@ class PDFStreamParser(PDFParser): except PSSyntaxError: pass return + elif token in (self.KEYWORD_OBJ, self.KEYWORD_ENDOBJ): + if STRICT: + # See PDF Spec 3.4.6: Only the object values are stored in the + # stream; the obj and endobj keywords are not used. + raise PDFSyntaxError("Keyword endobj found in stream") + return # others self.push((pos, token)) return