From f03657e5c48152d4a86a68c680131d5203c8e804 Mon Sep 17 00:00:00 2001 From: estshorter <1430311+estshorter@users.noreply.github.com> Date: Fri, 18 Sep 2020 04:29:00 +0900 Subject: [PATCH] Allow a pathlib.PurePath object as a input to open_filename (#492) * open_filename accepts a pathlib.PurePath object * Add test for open_filename with pathlib * Fix a wrong function name * Cast a pathlib object to string for py3.4/3.5 * Add link to the PR * Raise an exception when open_filename gets an unsupported type * Add tests for open_filename * Update CHANGELOG.md * Documentation Co-authored-by: Pieter Marsman --- CHANGELOG.md | 3 +++ pdfminer/utils.py | 11 +++++++++-- tests/test_utils.py | 27 +++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc0df88..b2386d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Added +- Support for `pathlib.PurePath` in `open_filename` ([#491](https://github.com/pdfminer/pdfminer.six/issues/491)) + ### Fixed - Pass caching parameter to PDFResourceManager in `high_level` functions ([#475](https://github.com/pdfminer/pdfminer.six/pull/475)) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 8531f79..0c67ff1 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -1,6 +1,8 @@ """ Miscellaneous Routines. """ +import io +import pathlib import struct from html import escape @@ -13,16 +15,21 @@ INF = (1 << 31) - 1 class open_filename(object): """ - Context manager that allows opening a filename and closes it on exit, + Context manager that allows opening a filename + (str or pathlib.PurePath type is supported) and closes it on exit, (just like `open`), but does nothing for file-like objects. """ def __init__(self, filename, *args, **kwargs): + if isinstance(filename, pathlib.PurePath): + filename = str(filename) if isinstance(filename, str): self.file_handler = open(filename, *args, **kwargs) self.closing = True - else: + elif isinstance(filename, io.IOBase): self.file_handler = filename self.closing = False + else: + raise TypeError('Unsupported input type: %s' % type(filename)) def __enter__(self): return self.file_handler diff --git a/tests/test_utils.py b/tests/test_utils.py index b0c7bac..dca99a6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,30 @@ -from nose.tools import assert_equal +from nose.tools import assert_equal, assert_raises +import pathlib +from helpers import absolute_sample_path from pdfminer.layout import LTComponent -from pdfminer.utils import Plane, shorten_str +from pdfminer.utils import open_filename, Plane, shorten_str + + +class TestOpenFilename: + def test_string_input(self): + filename = absolute_sample_path("simple1.pdf") + opened = open_filename(filename) + assert_equal(opened.closing, True) + + def test_pathlib_input(self): + filename = pathlib.Path(absolute_sample_path("simple1.pdf")) + opened = open_filename(filename) + assert_equal(opened.closing, True) + + def test_file_input(self): + filename = absolute_sample_path("simple1.pdf") + with open(filename, "rb") as in_file: + opened = open_filename(in_file) + assert_equal(opened.file_handler, in_file) + + def test_unsupported_input(self): + assert_raises(TypeError, open_filename, 0) class TestPlane: