From b4054ff4cfb52ae7a2ca44e6f2d9575869222a22 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Thu, 10 Sep 2020 15:09:07 -0400 Subject: [PATCH] Pass caching parameter to PDFResourceManager in `high_level` functions (#475) * Updated high_level.py This commit enables caching to be turned on and off rather than be always on regardless of the user input. * Reverted params back to fix errors * Updated CHANGELOG.md to reflect quick fix * Update CHANGELOG.md Co-authored-by: Pieter Marsman --- CHANGELOG.md | 4 +++- pdfminer/high_level.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 499cd08..dc0df88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] -### Removed +### Fixed +- Pass caching parameter to PDFResourceManager in `high_level` functions ([#475](https://github.com/pdfminer/pdfminer.six/pull/475)) +### Removed - Remove unused rijndael encryption implementation ([#465](https://github.com/pdfminer/pdfminer.six/pull/465)) ## [20200726] diff --git a/pdfminer/high_level.py b/pdfminer/high_level.py index 644c6ca..96911ec 100644 --- a/pdfminer/high_level.py +++ b/pdfminer/high_level.py @@ -106,7 +106,7 @@ def extract_text(pdf_file, password='', page_numbers=None, maxpages=0, laparams = LAParams() with open_filename(pdf_file, "rb") as fp, StringIO() as output_string: - rsrcmgr = PDFResourceManager() + rsrcmgr = PDFResourceManager(caching=caching) device = TextConverter(rsrcmgr, output_string, codec=codec, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) @@ -141,7 +141,7 @@ def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0, laparams = LAParams() with open_filename(pdf_file, "rb") as fp: - resource_manager = PDFResourceManager() + resource_manager = PDFResourceManager(caching=caching) device = PDFPageAggregator(resource_manager, laparams=laparams) interpreter = PDFPageInterpreter(resource_manager, device) for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages,