diff --git a/CHANGELOG.md b/CHANGELOG.md index dc0df88..b2386d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Added +- Support for `pathlib.PurePath` in `open_filename` ([#491](https://github.com/pdfminer/pdfminer.six/issues/491)) + ### Fixed - Pass caching parameter to PDFResourceManager in `high_level` functions ([#475](https://github.com/pdfminer/pdfminer.six/pull/475)) diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 8531f79..0c67ff1 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -1,6 +1,8 @@ """ Miscellaneous Routines. """ +import io +import pathlib import struct from html import escape @@ -13,16 +15,21 @@ INF = (1 << 31) - 1 class open_filename(object): """ - Context manager that allows opening a filename and closes it on exit, + Context manager that allows opening a filename + (str or pathlib.PurePath type is supported) and closes it on exit, (just like `open`), but does nothing for file-like objects. """ def __init__(self, filename, *args, **kwargs): + if isinstance(filename, pathlib.PurePath): + filename = str(filename) if isinstance(filename, str): self.file_handler = open(filename, *args, **kwargs) self.closing = True - else: + elif isinstance(filename, io.IOBase): self.file_handler = filename self.closing = False + else: + raise TypeError('Unsupported input type: %s' % type(filename)) def __enter__(self): return self.file_handler diff --git a/tests/test_utils.py b/tests/test_utils.py index b0c7bac..dca99a6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,30 @@ -from nose.tools import assert_equal +from nose.tools import assert_equal, assert_raises +import pathlib +from helpers import absolute_sample_path from pdfminer.layout import LTComponent -from pdfminer.utils import Plane, shorten_str +from pdfminer.utils import open_filename, Plane, shorten_str + + +class TestOpenFilename: + def test_string_input(self): + filename = absolute_sample_path("simple1.pdf") + opened = open_filename(filename) + assert_equal(opened.closing, True) + + def test_pathlib_input(self): + filename = pathlib.Path(absolute_sample_path("simple1.pdf")) + opened = open_filename(filename) + assert_equal(opened.closing, True) + + def test_file_input(self): + filename = absolute_sample_path("simple1.pdf") + with open(filename, "rb") as in_file: + opened = open_filename(in_file) + assert_equal(opened.file_handler, in_file) + + def test_unsupported_input(self): + assert_raises(TypeError, open_filename, 0) class TestPlane: