diff --git a/CHANGELOG.md b/CHANGELOG.md index 66ca4af..7d9f1e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Add handling of JPXDecode filter to enable extraction of images for some pdfs ([#645](https://github.com/pdfminer/pdfminer.six/pull/645)) - Fix extraction of jbig2 files, which was producing invalid files ([#652](https://github.com/pdfminer/pdfminer.six/pull/653)) - Crash in `pdf2txt.py --boxes-flow=disabled` ([#682](https://github.com/pdfminer/pdfminer.six/pull/682)) +- Only use xref fallback if `PDFNoValidXRef` is raised and `fallback` is True ([#684](https://github.com/pdfminer/pdfminer.six/pull/684)) + +### Changed +- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673)) ## [20211012] @@ -41,7 +45,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522)) - Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525)) - Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523)) -- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673)) - Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677)) ## [20201018] diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index c96b974..f0102ef 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -11,7 +11,7 @@ from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from . import settings from .arcfour import Arcfour from .pdfparser import PDFSyntaxError, PDFParser, PDFStreamParser -from .pdftypes import DecipherCallable, PDFException, PDFTypeError, PDFStream,\ +from .pdftypes import DecipherCallable, PDFException, PDFTypeError, PDFStream, \ PDFObjectNotFound, decipher_all, int_value, str_value, list_value, \ uint_value, dict_value, stream_value from .psparser import PSEOF, literal_name, LIT, KWD @@ -706,12 +706,12 @@ class PDFDocument: pos = self.find_xref(parser) self.read_xref_from(parser, pos, self.xrefs) except PDFNoValidXRef: - pass # fallback = True - if fallback: - parser.fallback = True - newxref = PDFXRefFallback() - newxref.load(parser) - self.xrefs.append(newxref) + if fallback: + parser.fallback = True + newxref = PDFXRefFallback() + newxref.load(parser) + self.xrefs.append(newxref) + for xref in self.xrefs: trailer = xref.get_trailer() if not trailer: