pdfminer.six/tests/test_pdfdocument.py

import itertools

from nose.tools import assert_equal, raises

from helpers import absolute_sample_path
from pdfminer.pdfdocument import PDFDocument, PDFNoPageLabels
from pdfminer.pdfparser import PDFParser
from pdfminer.pdftypes import PDFObjectNotFound, dict_value, int_value


class TestPdfDocument(object):

    @raises(PDFObjectNotFound)
    def test_get_zero_objid_raises_pdfobjectnotfound(self):
        with open(absolute_sample_path('simple1.pdf'), 'rb') as in_file:
            parser = PDFParser(in_file)
            doc = PDFDocument(parser)
            doc.getobj(0)

    def test_encrypted_no_id(self):
        # Some documents may be encrypted but not have an /ID key in
        # their trailer. Tests
        # https://github.com/pdfminer/pdfminer.six/issues/594
        path = absolute_sample_path('encryption/encrypted_doc_no_id.pdf')
        with open(path, 'rb') as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
            assert_equal(doc.info,
                         [{'Producer': b'European Patent Office'}])

    def test_page_labels(self):
        path = absolute_sample_path('contrib/pagelabels.pdf')
        with open(path, 'rb') as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
            total_pages = int_value(dict_value(doc.catalog['Pages'])['Count'])
            assert_equal(
                list(itertools.islice(doc.get_page_labels(), total_pages)),
                ['iii', 'iv', '1', '2', '1'])

    @raises(PDFNoPageLabels)
    def test_no_page_labels(self):
        path = absolute_sample_path('simple1.pdf')
        with open(path, 'rb') as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
            doc.get_page_labels()