test/tests/test_utils.py

116 lines
3.8 KiB
Python
Raw Normal View History

2023-08-07 12:10:10 +00:00
import pathlib
import pytest
from helpers import absolute_sample_path
from pdfminer.layout import LTComponent
from pdfminer.utils import (
open_filename,
Plane,
shorten_str,
format_int_roman,
format_int_alpha,
)
class TestOpenFilename:
def test_string_input(self):
filename = absolute_sample_path("simple1.pdf")
opened = open_filename(filename)
assert opened.closing
def test_pathlib_input(self):
filename = pathlib.Path(absolute_sample_path("simple1.pdf"))
opened = open_filename(filename)
assert opened.closing
def test_file_input(self):
filename = absolute_sample_path("simple1.pdf")
with open(filename, "rb") as in_file:
opened = open_filename(in_file)
assert opened.file_handler == in_file
def test_unsupported_input(self):
with pytest.raises(TypeError):
open_filename(0)
class TestPlane:
def test_find_nothing_in_empty_bbox(self):
plane, _ = self.given_plane_with_one_object()
result = list(plane.find((50, 50, 100, 100)))
assert result == []
def test_find_nothing_after_removing(self):
plane, obj = self.given_plane_with_one_object()
plane.remove(obj)
result = list(plane.find((0, 0, 100, 100)))
assert result == []
def test_find_object_in_whole_plane(self):
plane, obj = self.given_plane_with_one_object()
result = list(plane.find((0, 0, 100, 100)))
assert result == [obj]
def test_find_if_object_is_smaller_than_gridsize(self):
plane, obj = self.given_plane_with_one_object(object_size=1, gridsize=100)
result = list(plane.find((0, 0, 100, 100)))
assert result == [obj]
def test_find_object_if_much_larger_than_gridsize(self):
plane, obj = self.given_plane_with_one_object(object_size=100, gridsize=10)
result = list(plane.find((0, 0, 100, 100)))
assert result == [obj]
@staticmethod
def given_plane_with_one_object(object_size=50, gridsize=50):
bounding_box = (0, 0, 100, 100)
plane = Plane(bounding_box, gridsize)
obj = LTComponent((0, 0, object_size, object_size))
plane.add(obj)
return plane, obj
class TestFunctions(object):
def test_shorten_str(self):
s = shorten_str("Hello there World", 15)
assert s == "Hello ... World"
def test_shorten_short_str_is_same(self):
s = "Hello World"
assert shorten_str(s, 50) == s
def test_shorten_to_really_short(self):
assert shorten_str("Hello World", 5) == "Hello"
def test_format_int_alpha(self):
assert format_int_alpha(1) == "a"
assert format_int_alpha(2) == "b"
assert format_int_alpha(26) == "z"
assert format_int_alpha(27) == "aa"
assert format_int_alpha(28) == "ab"
assert format_int_alpha(26 * 2) == "az"
assert format_int_alpha(26 * 2 + 1) == "ba"
assert format_int_alpha(26 * 27) == "zz"
assert format_int_alpha(26 * 27 + 1) == "aaa"
def test_format_int_roman(self):
assert format_int_roman(1) == "i"
assert format_int_roman(2) == "ii"
assert format_int_roman(3) == "iii"
assert format_int_roman(4) == "iv"
assert format_int_roman(5) == "v"
assert format_int_roman(6) == "vi"
assert format_int_roman(7) == "vii"
assert format_int_roman(8) == "viii"
assert format_int_roman(9) == "ix"
assert format_int_roman(10) == "x"
assert format_int_roman(11) == "xi"
assert format_int_roman(20) == "xx"
assert format_int_roman(40) == "xl"
assert format_int_roman(45) == "xlv"
assert format_int_roman(50) == "l"
assert format_int_roman(90) == "xc"
assert format_int_roman(91) == "xci"
assert format_int_roman(100) == "c"