From ec223d1f1d2483069de0e4b80e7fcfb9d740c6ed Mon Sep 17 00:00:00 2001 From: Jake Stockwin Date: Sat, 24 Oct 2020 17:55:07 +0100 Subject: [PATCH] Fix for when 'trailer' is indented (#513) * Fix for when 'trailer' is indented Closes #214 * Address CR comments - strip line after parsing * Update CHANGELOG.md Co-authored-by: Pieter Marsman --- CHANGELOG.md | 1 + pdfminer/pdfdocument.py | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f7ca62..3386e86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed - Rename PDFTextExtractionNotAllowedError to PDFTextExtractionNotAllowed to revert breaking change ([#461](https://github.com/pdfminer/pdfminer.six/pull/461)) - Always try to get CMap, not only for identity encodings ([#438](https://github.com/pdfminer/pdfminer.six/pull/438)) +- Recognizing 'trailer' keyword with spaces as prefix or suffix ([#513](https://github.com/pdfminer/pdfminer.six/pull/513)) ## [20200720] diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 108d348..28e0fd7 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -93,16 +93,15 @@ class PDFXRef(PDFBaseXRef): while True: try: (pos, line) = parser.nextline() - if not line.strip(): + line = line.strip() + if not line: continue except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - if not line: - raise PDFNoValidXRef('Premature eof: %r' % parser) if line.startswith(b'trailer'): parser.seek(pos) break - f = line.strip().split(b' ') + f = line.split(b' ') if len(f) != 2: error_msg = 'Trailer not found: {!r}: line={!r}'\ .format(parser, line) @@ -118,7 +117,7 @@ class PDFXRef(PDFBaseXRef): (_, line) = parser.nextline() except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.strip().split(b' ') + f = line.split(b' ') if len(f) != 3: error_msg = 'Invalid XRef format: {!r}, line={!r}'\ .format(parser, line)