pdfminer.six/tests/test_highlevel_extracttext.py

63 lines
1.7 KiB
Python

import unittest
from helpers import absolute_sample_path
from pdfminer.high_level import extract_text
def run_with_string(sample_path):
absolute_path = absolute_sample_path(sample_path)
s = extract_text(absolute_path)
return s
def run_with_file(sample_path):
absolute_path = absolute_sample_path(sample_path)
with open(absolute_path, "rb") as in_file:
s = extract_text(in_file)
return s
test_strings = {
"simple1.pdf": "Hello \n\nWorld\n\nHello \n\nWorld\n\n"
"H e l l o \n\nW o r l d\n\n"
"H e l l o \n\nW o r l d\n\n\f",
"simple2.pdf": "\f",
"simple3.pdf": "Hello\n\nHello\n\nWorld\n\nWorld\n\n\f",
}
class TestExtractText(unittest.TestCase):
def test_simple1_with_string(self):
test_file = "simple1.pdf"
s = run_with_string(test_file)
self.assertEqual(s, test_strings[test_file])
def test_simple2_with_string(self):
test_file = "simple2.pdf"
s = run_with_string(test_file)
self.assertEqual(s, test_strings[test_file])
def test_simple3_with_string(self):
test_file = "simple3.pdf"
s = run_with_string(test_file)
self.assertEqual(s, test_strings[test_file])
def test_simple1_with_file(self):
test_file = "simple1.pdf"
s = run_with_file(test_file)
self.assertEqual(s, test_strings[test_file])
def test_simple2_with_file(self):
test_file = "simple2.pdf"
s = run_with_file(test_file)
self.assertEqual(s, test_strings[test_file])
def test_simple3_with_file(self):
test_file = "simple3.pdf"
s = run_with_file(test_file)
self.assertEqual(s, test_strings[test_file])
if __name__ == "__main__":
unittest.main()