2022-02-01 09:08:05 +00:00
|
|
|
import itertools
|
|
|
|
|
2022-02-02 21:24:32 +00:00
|
|
|
import pytest
|
2019-10-25 20:49:58 +00:00
|
|
|
|
2019-10-26 16:42:33 +00:00
|
|
|
from helpers import absolute_sample_path
|
2022-02-01 09:08:05 +00:00
|
|
|
from pdfminer.pdfdocument import PDFDocument, PDFNoPageLabels
|
2019-10-25 20:49:58 +00:00
|
|
|
from pdfminer.pdfparser import PDFParser
|
2022-02-01 09:08:05 +00:00
|
|
|
from pdfminer.pdftypes import PDFObjectNotFound, dict_value, int_value
|
2019-10-25 20:49:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
class TestPdfDocument(object):
|
|
|
|
def test_get_zero_objid_raises_pdfobjectnotfound(self):
|
2022-02-11 21:46:51 +00:00
|
|
|
with open(absolute_sample_path("simple1.pdf"), "rb") as in_file:
|
2019-10-25 20:49:58 +00:00
|
|
|
parser = PDFParser(in_file)
|
|
|
|
doc = PDFDocument(parser)
|
2022-02-02 21:24:32 +00:00
|
|
|
with pytest.raises(PDFObjectNotFound):
|
|
|
|
doc.getobj(0)
|
2021-08-29 19:32:14 +00:00
|
|
|
|
|
|
|
def test_encrypted_no_id(self):
|
|
|
|
# Some documents may be encrypted but not have an /ID key in
|
|
|
|
# their trailer. Tests
|
|
|
|
# https://github.com/pdfminer/pdfminer.six/issues/594
|
2022-02-11 21:46:51 +00:00
|
|
|
path = absolute_sample_path("encryption/encrypted_doc_no_id.pdf")
|
|
|
|
with open(path, "rb") as fp:
|
2021-08-29 19:32:14 +00:00
|
|
|
parser = PDFParser(fp)
|
|
|
|
doc = PDFDocument(parser)
|
2022-02-11 21:46:51 +00:00
|
|
|
assert doc.info == [{"Producer": b"European Patent Office"}]
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
def test_page_labels(self):
|
2022-02-11 21:46:51 +00:00
|
|
|
path = absolute_sample_path("contrib/pagelabels.pdf")
|
|
|
|
with open(path, "rb") as fp:
|
2022-02-01 09:08:05 +00:00
|
|
|
parser = PDFParser(fp)
|
|
|
|
doc = PDFDocument(parser)
|
2022-02-11 21:46:51 +00:00
|
|
|
total_pages = int_value(dict_value(doc.catalog["Pages"])["Count"])
|
|
|
|
assert list(itertools.islice(doc.get_page_labels(), total_pages)) == [
|
|
|
|
"iii",
|
|
|
|
"iv",
|
|
|
|
"1",
|
|
|
|
"2",
|
|
|
|
"1",
|
|
|
|
]
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
def test_no_page_labels(self):
|
2022-02-11 21:46:51 +00:00
|
|
|
path = absolute_sample_path("simple1.pdf")
|
|
|
|
with open(path, "rb") as fp:
|
2022-02-01 09:08:05 +00:00
|
|
|
parser = PDFParser(fp)
|
|
|
|
doc = PDFDocument(parser)
|
2022-02-02 21:24:32 +00:00
|
|
|
|
|
|
|
with pytest.raises(PDFNoPageLabels):
|
|
|
|
doc.get_page_labels()
|