pdfminer.six/pdfminer/data_structures.py

54 lines
1.6 KiB
Python
Raw Normal View History

import functools
from typing import Any, Dict, Iterable, List, Optional, Tuple
from pdfminer import settings
from pdfminer.pdfparser import PDFSyntaxError
from pdfminer.pdftypes import list_value, int_value, dict_value
from pdfminer.utils import choplist
class NumberTree:
"""A PDF number tree.
See Section 3.8.6 of the PDF Reference.
"""
def __init__(self, obj: Any):
self._obj = dict_value(obj)
self.nums: Optional[Iterable[Any]] = None
self.kids: Optional[Iterable[Any]] = None
self.limits: Optional[Iterable[Any]] = None
if 'Nums' in self._obj:
self.nums = list_value(self._obj['Nums'])
if 'Kids' in self._obj:
self.kids = list_value(self._obj['Kids'])
if 'Limits' in self._obj:
self.limits = list_value(self._obj['Limits'])
def _parse(self) -> List[Tuple[int, Any]]:
l = []
if self.nums: # Leaf node
for k, v in choplist(2, self.nums):
l.append((int_value(k), v))
if self.kids: # Root or intermediate node
for child_ref in self.kids:
l += NumberTree(child_ref)._parse()
return l
values: List[Tuple[int, Any]] # workaround decorators unsupported by mypy
@property # type: ignore [no-redef,misc]
@functools.lru_cache
def values(self) -> List[Tuple[int, Any]]:
values = self._parse()
if settings.STRICT:
if not all(a[0] <= b[0] for a, b in zip(values, values[1:])):
raise PDFSyntaxError('Number tree elements are out of order')
else:
values.sort(key=lambda t: t[0])
return values