2020-09-17 19:29:00 +00:00
|
|
|
import pathlib
|
2019-10-17 10:04:25 +00:00
|
|
|
|
2022-02-02 21:24:32 +00:00
|
|
|
import pytest
|
|
|
|
|
2020-09-17 19:29:00 +00:00
|
|
|
from helpers import absolute_sample_path
|
2019-10-17 10:04:25 +00:00
|
|
|
from pdfminer.layout import LTComponent
|
2022-02-02 21:24:32 +00:00
|
|
|
from pdfminer.utils import open_filename, Plane, shorten_str, \
|
|
|
|
format_int_roman, format_int_alpha
|
2020-09-17 19:29:00 +00:00
|
|
|
|
|
|
|
|
|
|
|
class TestOpenFilename:
|
|
|
|
def test_string_input(self):
|
|
|
|
filename = absolute_sample_path("simple1.pdf")
|
|
|
|
opened = open_filename(filename)
|
2022-02-02 21:24:32 +00:00
|
|
|
assert opened.closing
|
2020-09-17 19:29:00 +00:00
|
|
|
|
|
|
|
def test_pathlib_input(self):
|
|
|
|
filename = pathlib.Path(absolute_sample_path("simple1.pdf"))
|
|
|
|
opened = open_filename(filename)
|
2022-02-02 21:24:32 +00:00
|
|
|
assert opened.closing
|
2020-09-17 19:29:00 +00:00
|
|
|
|
|
|
|
def test_file_input(self):
|
|
|
|
filename = absolute_sample_path("simple1.pdf")
|
|
|
|
with open(filename, "rb") as in_file:
|
|
|
|
opened = open_filename(in_file)
|
2022-02-02 21:24:32 +00:00
|
|
|
assert opened.file_handler == in_file
|
2020-09-17 19:29:00 +00:00
|
|
|
|
|
|
|
def test_unsupported_input(self):
|
2022-02-02 21:24:32 +00:00
|
|
|
with pytest.raises(TypeError):
|
|
|
|
open_filename(0)
|
2019-10-17 10:04:25 +00:00
|
|
|
|
|
|
|
|
2020-01-04 15:47:07 +00:00
|
|
|
class TestPlane:
|
2019-10-17 10:04:25 +00:00
|
|
|
def test_find_nothing_in_empty_bbox(self):
|
|
|
|
plane, _ = self.given_plane_with_one_object()
|
|
|
|
result = list(plane.find((50, 50, 100, 100)))
|
2022-02-02 21:24:32 +00:00
|
|
|
assert result == []
|
2019-10-17 10:04:25 +00:00
|
|
|
|
|
|
|
def test_find_nothing_after_removing(self):
|
|
|
|
plane, obj = self.given_plane_with_one_object()
|
|
|
|
plane.remove(obj)
|
|
|
|
result = list(plane.find((0, 0, 100, 100)))
|
2022-02-02 21:24:32 +00:00
|
|
|
assert result == []
|
2019-10-17 10:04:25 +00:00
|
|
|
|
|
|
|
def test_find_object_in_whole_plane(self):
|
|
|
|
plane, obj = self.given_plane_with_one_object()
|
|
|
|
result = list(plane.find((0, 0, 100, 100)))
|
2022-02-02 21:24:32 +00:00
|
|
|
assert result == [obj]
|
2019-10-17 10:04:25 +00:00
|
|
|
|
|
|
|
def test_find_if_object_is_smaller_than_gridsize(self):
|
2019-12-29 20:20:20 +00:00
|
|
|
plane, obj = self.given_plane_with_one_object(object_size=1,
|
|
|
|
gridsize=100)
|
2019-10-17 10:04:25 +00:00
|
|
|
result = list(plane.find((0, 0, 100, 100)))
|
2022-02-02 21:24:32 +00:00
|
|
|
assert result == [obj]
|
2019-10-17 10:04:25 +00:00
|
|
|
|
|
|
|
def test_find_object_if_much_larger_than_gridsize(self):
|
2019-12-29 20:20:20 +00:00
|
|
|
plane, obj = self.given_plane_with_one_object(object_size=100,
|
|
|
|
gridsize=10)
|
2019-10-17 10:04:25 +00:00
|
|
|
result = list(plane.find((0, 0, 100, 100)))
|
2022-02-02 21:24:32 +00:00
|
|
|
assert result == [obj]
|
2019-10-17 10:04:25 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def given_plane_with_one_object(object_size=50, gridsize=50):
|
|
|
|
bounding_box = (0, 0, 100, 100)
|
|
|
|
plane = Plane(bounding_box, gridsize)
|
|
|
|
obj = LTComponent((0, 0, object_size, object_size))
|
|
|
|
plane.add(obj)
|
2019-10-26 16:42:33 +00:00
|
|
|
return plane, obj
|
2019-11-10 11:18:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
class TestFunctions(object):
|
|
|
|
def test_shorten_str(self):
|
|
|
|
s = shorten_str('Hello there World', 15)
|
2022-02-02 21:24:32 +00:00
|
|
|
assert s == 'Hello ... World'
|
2019-11-10 11:18:49 +00:00
|
|
|
|
|
|
|
def test_shorten_short_str_is_same(self):
|
|
|
|
s = 'Hello World'
|
2022-02-02 21:24:32 +00:00
|
|
|
assert shorten_str(s, 50) == s
|
2019-11-10 11:18:49 +00:00
|
|
|
|
|
|
|
def test_shorten_to_really_short(self):
|
2022-02-02 21:24:32 +00:00
|
|
|
assert shorten_str('Hello World', 5) == 'Hello'
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
def test_format_int_alpha(self):
|
2022-02-02 21:24:32 +00:00
|
|
|
assert format_int_alpha(1) == 'a'
|
|
|
|
assert format_int_alpha(2) == 'b'
|
|
|
|
assert format_int_alpha(26) == 'z'
|
|
|
|
assert format_int_alpha(27) == 'aa'
|
|
|
|
assert format_int_alpha(28) == 'ab'
|
|
|
|
assert format_int_alpha(26 * 2) == 'az'
|
|
|
|
assert format_int_alpha(26 * 2 + 1) == 'ba'
|
|
|
|
assert format_int_alpha(26 * 27) == 'zz'
|
|
|
|
assert format_int_alpha(26 * 27 + 1) == 'aaa'
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
def test_format_int_roman(self):
|
2022-02-02 21:24:32 +00:00
|
|
|
assert format_int_roman(1) == 'i'
|
|
|
|
assert format_int_roman(2) == 'ii'
|
|
|
|
assert format_int_roman(3) == 'iii'
|
|
|
|
assert format_int_roman(4) == 'iv'
|
|
|
|
assert format_int_roman(5) == 'v'
|
|
|
|
assert format_int_roman(6) == 'vi'
|
|
|
|
assert format_int_roman(7) == 'vii'
|
|
|
|
assert format_int_roman(8) == 'viii'
|
|
|
|
assert format_int_roman(9) == 'ix'
|
|
|
|
assert format_int_roman(10) == 'x'
|
|
|
|
assert format_int_roman(11) == 'xi'
|
|
|
|
assert format_int_roman(20) == 'xx'
|
|
|
|
assert format_int_roman(40) == 'xl'
|
|
|
|
assert format_int_roman(45) == 'xlv'
|
|
|
|
assert format_int_roman(50) == 'l'
|
|
|
|
assert format_int_roman(90) == 'xc'
|
|
|
|
assert format_int_roman(91) == 'xci'
|
|
|
|
assert format_int_roman(100) == 'c'
|