import itertools import pytest from helpers import absolute_sample_path from pdfminer.pdfdocument import PDFDocument, PDFNoPageLabels from pdfminer.pdfparser import PDFParser from pdfminer.pdftypes import PDFObjectNotFound, dict_value, int_value class TestPdfDocument(object): def test_get_zero_objid_raises_pdfobjectnotfound(self): with open(absolute_sample_path("simple1.pdf"), "rb") as in_file: parser = PDFParser(in_file) doc = PDFDocument(parser) with pytest.raises(PDFObjectNotFound): doc.getobj(0) def test_encrypted_no_id(self): # Some documents may be encrypted but not have an /ID key in # their trailer. Tests # https://github.com/pdfminer/pdfminer.six/issues/594 path = absolute_sample_path("encryption/encrypted_doc_no_id.pdf") with open(path, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser) assert doc.info == [{"Producer": b"European Patent Office"}] def test_page_labels(self): path = absolute_sample_path("contrib/pagelabels.pdf") with open(path, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser) total_pages = int_value(dict_value(doc.catalog["Pages"])["Count"]) assert list(itertools.islice(doc.get_page_labels(), total_pages)) == [ "iii", "iv", "1", "2", "1", ] def test_no_page_labels(self): path = absolute_sample_path("simple1.pdf") with open(path, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser) with pytest.raises(PDFNoPageLabels): doc.get_page_labels()