Pass caching parameter to PDFResourceManager in `high_level` functions (#475)

* Updated high_level.py

This commit enables caching to be turned on and off rather than be always on regardless of the user input.

* Reverted params back to fix errors

* Updated CHANGELOG.md to reflect quick fix

* Update CHANGELOG.md

Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>
pull/500/head
David Nicholson 2020-09-10 15:09:07 -04:00 committed by GitHub
parent a83f853de7
commit b4054ff4cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 3 deletions

View File

@ -5,8 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [Unreleased] ## [Unreleased]
### Removed ### Fixed
- Pass caching parameter to PDFResourceManager in `high_level` functions ([#475](https://github.com/pdfminer/pdfminer.six/pull/475))
### Removed
- Remove unused rijndael encryption implementation ([#465](https://github.com/pdfminer/pdfminer.six/pull/465)) - Remove unused rijndael encryption implementation ([#465](https://github.com/pdfminer/pdfminer.six/pull/465))
## [20200726] ## [20200726]

View File

@ -106,7 +106,7 @@ def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
laparams = LAParams() laparams = LAParams()
with open_filename(pdf_file, "rb") as fp, StringIO() as output_string: with open_filename(pdf_file, "rb") as fp, StringIO() as output_string:
rsrcmgr = PDFResourceManager() rsrcmgr = PDFResourceManager(caching=caching)
device = TextConverter(rsrcmgr, output_string, codec=codec, device = TextConverter(rsrcmgr, output_string, codec=codec,
laparams=laparams) laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device) interpreter = PDFPageInterpreter(rsrcmgr, device)
@ -141,7 +141,7 @@ def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0,
laparams = LAParams() laparams = LAParams()
with open_filename(pdf_file, "rb") as fp: with open_filename(pdf_file, "rb") as fp:
resource_manager = PDFResourceManager() resource_manager = PDFResourceManager(caching=caching)
device = PDFPageAggregator(resource_manager, laparams=laparams) device = PDFPageAggregator(resource_manager, laparams=laparams)
interpreter = PDFPageInterpreter(resource_manager, device) interpreter = PDFPageInterpreter(resource_manager, device)
for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages, for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages,