diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f7ca62..3386e86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed - Rename PDFTextExtractionNotAllowedError to PDFTextExtractionNotAllowed to revert breaking change ([#461](https://github.com/pdfminer/pdfminer.six/pull/461)) - Always try to get CMap, not only for identity encodings ([#438](https://github.com/pdfminer/pdfminer.six/pull/438)) +- Recognizing 'trailer' keyword with spaces as prefix or suffix ([#513](https://github.com/pdfminer/pdfminer.six/pull/513)) ## [20200720] diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 108d348..28e0fd7 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -93,16 +93,15 @@ class PDFXRef(PDFBaseXRef): while True: try: (pos, line) = parser.nextline() - if not line.strip(): + line = line.strip() + if not line: continue except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - if not line: - raise PDFNoValidXRef('Premature eof: %r' % parser) if line.startswith(b'trailer'): parser.seek(pos) break - f = line.strip().split(b' ') + f = line.split(b' ') if len(f) != 2: error_msg = 'Trailer not found: {!r}: line={!r}'\ .format(parser, line) @@ -118,7 +117,7 @@ class PDFXRef(PDFBaseXRef): (_, line) = parser.nextline() except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.strip().split(b' ') + f = line.split(b' ') if len(f) != 3: error_msg = 'Invalid XRef format: {!r}, line={!r}'\ .format(parser, line)