diff --git a/CHANGELOG.md b/CHANGELOG.md index c611b8c..af33efa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed - Rename PDFTextExtractionNotAllowedError to PDFTextExtractionNotAllowed to revert breaking change ([#461](https://github.com/pdfminer/pdfminer.six/pull/461)) - Always try to get CMap, not only for identity encodings ([#438](https://github.com/pdfminer/pdfminer.six/pull/438)) -- Recognizing 'trailer' keyword with spaces as prefix or suffix ([#513](https://github.com/pdfminer/pdfminer.six/pull/513)) ## [20200720] diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 28e0fd7..108d348 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -93,15 +93,16 @@ class PDFXRef(PDFBaseXRef): while True: try: (pos, line) = parser.nextline() - line = line.strip() - if not line: + if not line.strip(): continue except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + if not line: + raise PDFNoValidXRef('Premature eof: %r' % parser) if line.startswith(b'trailer'): parser.seek(pos) break - f = line.split(b' ') + f = line.strip().split(b' ') if len(f) != 2: error_msg = 'Trailer not found: {!r}: line={!r}'\ .format(parser, line) @@ -117,7 +118,7 @@ class PDFXRef(PDFBaseXRef): (_, line) = parser.nextline() except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.split(b' ') + f = line.strip().split(b' ') if len(f) != 3: error_msg = 'Invalid XRef format: {!r}, line={!r}'\ .format(parser, line)