Fixes #64 -- be less strict when inspecting a tree type (#76)

In the PDFStream it's possible that the /Type element is not
present, but /type is. According to the spec, these are different
elements, but in the case in point they had the same meaning.

If PDFMiner is not running in STRICT mode and /Type doesn't resolve,
a fallback to /type is used to determine the tree type.
pull/80/head
Sergei Maertens 2017-07-20 20:46:35 +02:00 committed by Goulu
parent 938419c476
commit 3e364354da
1 changed files with 8 additions and 2 deletions

View File

@ -1,5 +1,6 @@
import logging
from . import settings
from .psparser import LIT
from .pdftypes import PDFObjectNotFound
from .pdftypes import resolve1
@ -88,12 +89,17 @@ class PDFPage(object):
for (k, v) in six.iteritems(parent):
if k in klass.INHERITABLE_ATTRS and k not in tree:
tree[k] = v
if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
tree_type = tree.get('Type')
if tree_type is None and not settings.STRICT: # See #64
tree_type = tree.get('type')
if tree_type is LITERAL_PAGES and 'Kids' in tree:
log.info('Pages: Kids=%r', tree['Kids'])
for c in list_value(tree['Kids']):
for x in search(c, tree):
yield x
elif tree.get('Type') is LITERAL_PAGE:
elif tree_type is LITERAL_PAGE:
log.info('Page: %r', tree)
yield (objid, tree)
pages = False