48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
import itertools
|
|
|
|
from nose.tools import assert_equal, raises
|
|
|
|
from helpers import absolute_sample_path
|
|
from pdfminer.pdfdocument import PDFDocument, PDFNoPageLabels
|
|
from pdfminer.pdfparser import PDFParser
|
|
from pdfminer.pdftypes import PDFObjectNotFound, dict_value, int_value
|
|
|
|
|
|
class TestPdfDocument(object):
|
|
|
|
@raises(PDFObjectNotFound)
|
|
def test_get_zero_objid_raises_pdfobjectnotfound(self):
|
|
with open(absolute_sample_path('simple1.pdf'), 'rb') as in_file:
|
|
parser = PDFParser(in_file)
|
|
doc = PDFDocument(parser)
|
|
doc.getobj(0)
|
|
|
|
def test_encrypted_no_id(self):
|
|
# Some documents may be encrypted but not have an /ID key in
|
|
# their trailer. Tests
|
|
# https://github.com/pdfminer/pdfminer.six/issues/594
|
|
path = absolute_sample_path('encryption/encrypted_doc_no_id.pdf')
|
|
with open(path, 'rb') as fp:
|
|
parser = PDFParser(fp)
|
|
doc = PDFDocument(parser)
|
|
assert_equal(doc.info,
|
|
[{'Producer': b'European Patent Office'}])
|
|
|
|
def test_page_labels(self):
|
|
path = absolute_sample_path('contrib/pagelabels.pdf')
|
|
with open(path, 'rb') as fp:
|
|
parser = PDFParser(fp)
|
|
doc = PDFDocument(parser)
|
|
total_pages = int_value(dict_value(doc.catalog['Pages'])['Count'])
|
|
assert_equal(
|
|
list(itertools.islice(doc.get_page_labels(), total_pages)),
|
|
['iii', 'iv', '1', '2', '1'])
|
|
|
|
@raises(PDFNoPageLabels)
|
|
def test_no_page_labels(self):
|
|
path = absolute_sample_path('simple1.pdf')
|
|
with open(path, 'rb') as fp:
|
|
parser = PDFParser(fp)
|
|
doc = PDFDocument(parser)
|
|
doc.get_page_labels()
|