2022-02-02 21:24:32 +00:00
|
|
|
from typing import Any, Iterable, List, Optional, Tuple
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
from pdfminer import settings
|
|
|
|
from pdfminer.pdfparser import PDFSyntaxError
|
|
|
|
from pdfminer.pdftypes import list_value, int_value, dict_value
|
|
|
|
from pdfminer.utils import choplist
|
|
|
|
|
|
|
|
|
|
|
|
class NumberTree:
|
|
|
|
"""A PDF number tree.
|
|
|
|
|
|
|
|
See Section 3.8.6 of the PDF Reference.
|
|
|
|
"""
|
2022-02-11 21:46:51 +00:00
|
|
|
|
2022-02-01 09:08:05 +00:00
|
|
|
def __init__(self, obj: Any):
|
|
|
|
self._obj = dict_value(obj)
|
|
|
|
self.nums: Optional[Iterable[Any]] = None
|
|
|
|
self.kids: Optional[Iterable[Any]] = None
|
|
|
|
self.limits: Optional[Iterable[Any]] = None
|
|
|
|
|
2022-02-11 21:46:51 +00:00
|
|
|
if "Nums" in self._obj:
|
|
|
|
self.nums = list_value(self._obj["Nums"])
|
|
|
|
if "Kids" in self._obj:
|
|
|
|
self.kids = list_value(self._obj["Kids"])
|
|
|
|
if "Limits" in self._obj:
|
|
|
|
self.limits = list_value(self._obj["Limits"])
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
def _parse(self) -> List[Tuple[int, Any]]:
|
2022-02-02 21:24:32 +00:00
|
|
|
items = []
|
2022-02-01 09:08:05 +00:00
|
|
|
if self.nums: # Leaf node
|
|
|
|
for k, v in choplist(2, self.nums):
|
2022-02-02 21:24:32 +00:00
|
|
|
items.append((int_value(k), v))
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
if self.kids: # Root or intermediate node
|
|
|
|
for child_ref in self.kids:
|
2022-02-02 21:24:32 +00:00
|
|
|
items += NumberTree(child_ref)._parse()
|
2022-02-01 09:08:05 +00:00
|
|
|
|
2022-02-02 21:24:32 +00:00
|
|
|
return items
|
2022-02-01 09:08:05 +00:00
|
|
|
|
|
|
|
values: List[Tuple[int, Any]] # workaround decorators unsupported by mypy
|
|
|
|
|
2022-02-02 21:24:32 +00:00
|
|
|
@property # type: ignore[no-redef,misc]
|
2022-02-01 09:08:05 +00:00
|
|
|
def values(self) -> List[Tuple[int, Any]]:
|
|
|
|
values = self._parse()
|
|
|
|
|
|
|
|
if settings.STRICT:
|
|
|
|
if not all(a[0] <= b[0] for a, b in zip(values, values[1:])):
|
2022-02-11 21:46:51 +00:00
|
|
|
raise PDFSyntaxError("Number tree elements are out of order")
|
2022-02-01 09:08:05 +00:00
|
|
|
else:
|
|
|
|
values.sort(key=lambda t: t[0])
|
|
|
|
|
|
|
|
return values
|