diff --git a/.gitignore b/.gitignore index 3bf3591..a39febb 100644 --- a/.gitignore +++ b/.gitignore @@ -17,5 +17,9 @@ tests/*.xml tests/*.txt .idea/ .tox/ + +# python venv management tools Pipfile -Pipfile.lock \ No newline at end of file +Pipfile.lock +pyproject.toml +poetry.lock \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a385a89..17b8c6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed - Fix issue of TypeError: cannot unpack non-iterable PDFObjRef object, when unpacking the value of 'DW2' ([#529](https://github.com/pdfminer/pdfminer.six/pull/529)) - `PermissionError` when creating temporary filepaths on windows when running tests ([#469](https://github.com/pdfminer/pdfminer.six/issues/469)) +- Detecting trailer correctly when surrounded with needless whitespace ([#535](https://github.com/pdfminer/pdfminer.six/pull/535)) - Fix `.paint_path` logic for handling single line segments and extracting point-on-curve positions of BeziƩr path commands ([#530](https://github.com/pdfminer/pdfminer.six/pull/530)) ## Removed diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 108d348..e67eae7 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -93,16 +93,15 @@ class PDFXRef(PDFBaseXRef): while True: try: (pos, line) = parser.nextline() - if not line.strip(): + line = line.strip() + if not line: continue except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - if not line: - raise PDFNoValidXRef('Premature eof: %r' % parser) if line.startswith(b'trailer'): parser.seek(pos) break - f = line.strip().split(b' ') + f = line.split(b' ') if len(f) != 2: error_msg = 'Trailer not found: {!r}: line={!r}'\ .format(parser, line) @@ -116,9 +115,10 @@ class PDFXRef(PDFBaseXRef): for objid in range(start, start+nobjs): try: (_, line) = parser.nextline() + line = line.strip() except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.strip().split(b' ') + f = line.split(b' ') if len(f) != 3: error_msg = 'Invalid XRef format: {!r}, line={!r}'\ .format(parser, line)