import pathlib import pytest from helpers import absolute_sample_path from pdfminer.layout import LTComponent from pdfminer.utils import ( open_filename, Plane, shorten_str, format_int_roman, format_int_alpha, ) class TestOpenFilename: def test_string_input(self): filename = absolute_sample_path("simple1.pdf") opened = open_filename(filename) assert opened.closing def test_pathlib_input(self): filename = pathlib.Path(absolute_sample_path("simple1.pdf")) opened = open_filename(filename) assert opened.closing def test_file_input(self): filename = absolute_sample_path("simple1.pdf") with open(filename, "rb") as in_file: opened = open_filename(in_file) assert opened.file_handler == in_file def test_unsupported_input(self): with pytest.raises(TypeError): open_filename(0) class TestPlane: def test_find_nothing_in_empty_bbox(self): plane, _ = self.given_plane_with_one_object() result = list(plane.find((50, 50, 100, 100))) assert result == [] def test_find_nothing_after_removing(self): plane, obj = self.given_plane_with_one_object() plane.remove(obj) result = list(plane.find((0, 0, 100, 100))) assert result == [] def test_find_object_in_whole_plane(self): plane, obj = self.given_plane_with_one_object() result = list(plane.find((0, 0, 100, 100))) assert result == [obj] def test_find_if_object_is_smaller_than_gridsize(self): plane, obj = self.given_plane_with_one_object(object_size=1, gridsize=100) result = list(plane.find((0, 0, 100, 100))) assert result == [obj] def test_find_object_if_much_larger_than_gridsize(self): plane, obj = self.given_plane_with_one_object(object_size=100, gridsize=10) result = list(plane.find((0, 0, 100, 100))) assert result == [obj] @staticmethod def given_plane_with_one_object(object_size=50, gridsize=50): bounding_box = (0, 0, 100, 100) plane = Plane(bounding_box, gridsize) obj = LTComponent((0, 0, object_size, object_size)) plane.add(obj) return plane, obj class TestFunctions(object): def test_shorten_str(self): s = shorten_str("Hello there World", 15) assert s == "Hello ... World" def test_shorten_short_str_is_same(self): s = "Hello World" assert shorten_str(s, 50) == s def test_shorten_to_really_short(self): assert shorten_str("Hello World", 5) == "Hello" def test_format_int_alpha(self): assert format_int_alpha(1) == "a" assert format_int_alpha(2) == "b" assert format_int_alpha(26) == "z" assert format_int_alpha(27) == "aa" assert format_int_alpha(28) == "ab" assert format_int_alpha(26 * 2) == "az" assert format_int_alpha(26 * 2 + 1) == "ba" assert format_int_alpha(26 * 27) == "zz" assert format_int_alpha(26 * 27 + 1) == "aaa" def test_format_int_roman(self): assert format_int_roman(1) == "i" assert format_int_roman(2) == "ii" assert format_int_roman(3) == "iii" assert format_int_roman(4) == "iv" assert format_int_roman(5) == "v" assert format_int_roman(6) == "vi" assert format_int_roman(7) == "vii" assert format_int_roman(8) == "viii" assert format_int_roman(9) == "ix" assert format_int_roman(10) == "x" assert format_int_roman(11) == "xi" assert format_int_roman(20) == "xx" assert format_int_roman(40) == "xl" assert format_int_roman(45) == "xlv" assert format_int_roman(50) == "l" assert format_int_roman(90) == "xc" assert format_int_roman(91) == "xci" assert format_int_roman(100) == "c"