Merge pull request #16 from stevenhair/settings-management
Improved settings managementpull/17/head
commit
4f762cb897
|
@ -19,7 +19,7 @@ from .psparser import PSEOF
|
|||
from .psparser import literal_name
|
||||
from .psparser import LIT
|
||||
from .psparser import KWD
|
||||
from .settings import STRICT
|
||||
from . import settings
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFTypeError
|
||||
from .pdftypes import PDFStream
|
||||
|
@ -196,7 +196,7 @@ class PDFXRefFallback(PDFXRef):
|
|||
try:
|
||||
n = stream['N']
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFSyntaxError('N is not defined: %r' % stream)
|
||||
n = 0
|
||||
parser1 = PDFStreamParser(stream.get_data())
|
||||
|
@ -582,7 +582,7 @@ class PDFDocument(object):
|
|||
else:
|
||||
raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
|
||||
if self.catalog.get('Type') is not LITERAL_CATALOG:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFSyntaxError('Catalog not found!')
|
||||
return
|
||||
|
||||
|
@ -620,12 +620,12 @@ class PDFDocument(object):
|
|||
|
||||
def _get_objects(self, stream):
|
||||
if stream.get('Type') is not LITERAL_OBJSTM:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFSyntaxError('Not a stream object: %r' % stream)
|
||||
try:
|
||||
n = stream['N']
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFSyntaxError('N is not defined: %r' % stream)
|
||||
n = 0
|
||||
parser = PDFStreamParser(stream.get_data())
|
||||
|
|
|
@ -12,7 +12,7 @@ from .psparser import PSStackParser
|
|||
from .psparser import PSEOF
|
||||
from .psparser import LIT
|
||||
from .psparser import KWD
|
||||
from .settings import STRICT
|
||||
from . import settings
|
||||
from .psparser import PSLiteral
|
||||
from .psparser import literal_name
|
||||
from .pdftypes import PDFException
|
||||
|
@ -574,7 +574,7 @@ class PDFType1Font(PDFSimpleFont):
|
|||
try:
|
||||
self.basefont = literal_name(spec['BaseFont'])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFFontError('BaseFont is missing')
|
||||
self.basefont = 'unknown'
|
||||
try:
|
||||
|
@ -632,11 +632,11 @@ class PDFType3Font(PDFSimpleFont):
|
|||
# PDFCIDFont
|
||||
class PDFCIDFont(PDFFont):
|
||||
|
||||
def __init__(self, rsrcmgr, spec, STRICT=False):
|
||||
def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
|
||||
try:
|
||||
self.basefont = literal_name(spec['BaseFont'])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if strict:
|
||||
raise PDFFontError('BaseFont is missing')
|
||||
self.basefont = 'unknown'
|
||||
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
|
||||
|
@ -645,19 +645,19 @@ class PDFCIDFont(PDFFont):
|
|||
try:
|
||||
name = literal_name(spec['Encoding'])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if strict:
|
||||
raise PDFFontError('Encoding is unspecified')
|
||||
name = 'unknown'
|
||||
try:
|
||||
self.cmap = CMapDB.get_cmap(name)
|
||||
except CMapDB.CMapNotFound as e:
|
||||
if STRICT:
|
||||
if strict:
|
||||
raise PDFFontError(e)
|
||||
self.cmap = CMap()
|
||||
try:
|
||||
descriptor = dict_value(spec['FontDescriptor'])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if strict:
|
||||
raise PDFFontError('FontDescriptor is missing')
|
||||
descriptor = {}
|
||||
ttf = None
|
||||
|
|
|
@ -12,7 +12,7 @@ from .psparser import keyword_name
|
|||
from .psparser import PSStackParser
|
||||
from .psparser import LIT
|
||||
from .psparser import KWD
|
||||
from .settings import STRICT
|
||||
from . import settings
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import PDFObjRef
|
||||
|
@ -167,14 +167,14 @@ class PDFResourceManager(object):
|
|||
font = self._cached_fonts[objid]
|
||||
else:
|
||||
logging.info('get_font: create: objid=%r, spec=%r', objid, spec)
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
if spec['Type'] is not LITERAL_FONT:
|
||||
raise PDFFontError('Type is not /Font')
|
||||
# Create a Font object.
|
||||
if 'Subtype' in spec:
|
||||
subtype = literal_name(spec['Subtype'])
|
||||
else:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFFontError('Font Subtype is not specified.')
|
||||
subtype = 'Type1'
|
||||
if subtype in ('Type1', 'MMType1'):
|
||||
|
@ -199,7 +199,7 @@ class PDFResourceManager(object):
|
|||
subspec[k] = resolve1(spec[k])
|
||||
font = self.get_font(None, subspec)
|
||||
else:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFFontError('Invalid Font spec: %r' % spec)
|
||||
font = PDFType1Font(self, spec) # this is so wrong!
|
||||
if objid and self.caching:
|
||||
|
@ -299,7 +299,7 @@ class PDFContentParser(PSStackParser):
|
|||
self.push((pos, obj))
|
||||
self.push((pos, self.KEYWORD_EI))
|
||||
except PSTypeError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise
|
||||
else:
|
||||
self.push((pos, token))
|
||||
|
@ -559,7 +559,7 @@ class PDFPageInterpreter(object):
|
|||
try:
|
||||
self.scs = self.csmap[literal_name(name)]
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
|
||||
return
|
||||
|
||||
|
@ -568,7 +568,7 @@ class PDFPageInterpreter(object):
|
|||
try:
|
||||
self.ncs = self.csmap[literal_name(name)]
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
|
||||
return
|
||||
|
||||
|
@ -607,7 +607,7 @@ class PDFPageInterpreter(object):
|
|||
if self.scs:
|
||||
n = self.scs.ncomponents
|
||||
else:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('No colorspace specified!')
|
||||
n = 1
|
||||
self.pop(n)
|
||||
|
@ -617,7 +617,7 @@ class PDFPageInterpreter(object):
|
|||
if self.ncs:
|
||||
n = self.ncs.ncomponents
|
||||
else:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('No colorspace specified!')
|
||||
n = 1
|
||||
self.pop(n)
|
||||
|
@ -698,7 +698,7 @@ class PDFPageInterpreter(object):
|
|||
try:
|
||||
self.textstate.font = self.fontmap[literal_name(fontid)]
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('Undefined Font id: %r' % fontid)
|
||||
self.textstate.font = self.rsrcmgr.get_font(None, {})
|
||||
self.textstate.fontsize = fontsize
|
||||
|
@ -748,7 +748,7 @@ class PDFPageInterpreter(object):
|
|||
def do_TJ(self, seq):
|
||||
#print >>sys.stderr, 'TJ(%r): %r' % (seq, self.textstate)
|
||||
if self.textstate.font is None:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('No font specified!')
|
||||
return
|
||||
self.device.render_string(self.textstate, seq)
|
||||
|
@ -793,7 +793,7 @@ class PDFPageInterpreter(object):
|
|||
try:
|
||||
xobj = stream_value(self.xobjmap[xobjid])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
|
||||
return
|
||||
logging.info('Processing xobj: %r', xobj)
|
||||
|
@ -872,7 +872,7 @@ class PDFPageInterpreter(object):
|
|||
logging.debug('exec: %s', name)
|
||||
func()
|
||||
else:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFInterpreterError('Unknown operator: %r' % name)
|
||||
else:
|
||||
self.push(obj)
|
||||
|
|
|
@ -5,7 +5,7 @@ from .psparser import PSStackParser
|
|||
from .psparser import PSSyntaxError
|
||||
from .psparser import PSEOF
|
||||
from .psparser import KWD
|
||||
from .settings import STRICT
|
||||
from . import settings
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import PDFObjRef
|
||||
|
@ -89,13 +89,13 @@ class PDFParser(PSStackParser):
|
|||
try:
|
||||
objlen = int_value(dic['Length'])
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFSyntaxError('/Length is undefined: %r' % dic)
|
||||
self.seek(pos)
|
||||
try:
|
||||
(_, line) = self.nextline() # 'stream'
|
||||
except PSEOF:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFSyntaxError('Unexpected EOF')
|
||||
return
|
||||
pos += len(line)
|
||||
|
@ -106,7 +106,7 @@ class PDFParser(PSStackParser):
|
|||
try:
|
||||
(linepos, line) = self.nextline()
|
||||
except PSEOF:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFSyntaxError('Unexpected EOF')
|
||||
break
|
||||
if b'endstream' in line:
|
||||
|
@ -164,7 +164,7 @@ class PDFStreamParser(PDFParser):
|
|||
pass
|
||||
return
|
||||
elif token in (self.KEYWORD_OBJ, self.KEYWORD_ENDOBJ):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
# See PDF Spec 3.4.6: Only the object values are stored in the
|
||||
# stream; the obj and endobj keywords are not used.
|
||||
raise PDFSyntaxError('Keyword endobj found in stream')
|
||||
|
|
|
@ -8,7 +8,7 @@ from .ccitt import ccittfaxdecode
|
|||
from .psparser import PSException
|
||||
from .psparser import PSObject
|
||||
from .psparser import LIT
|
||||
from .settings import STRICT
|
||||
from . import settings
|
||||
from .utils import apply_png_predictor
|
||||
from .utils import isnumber
|
||||
|
||||
|
@ -53,7 +53,7 @@ class PDFObjRef(PDFObject):
|
|||
|
||||
def __init__(self, doc, objid, _):
|
||||
if objid == 0:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFValueError('PDF object id cannot be 0.')
|
||||
self.doc = doc
|
||||
self.objid = objid
|
||||
|
@ -115,7 +115,7 @@ def decipher_all(decipher, objid, genno, x):
|
|||
def int_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, int):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFTypeError('Integer required: %r' % x)
|
||||
return 0
|
||||
return x
|
||||
|
@ -124,7 +124,7 @@ def int_value(x):
|
|||
def float_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, float):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFTypeError('Float required: %r' % x)
|
||||
return 0.0
|
||||
return x
|
||||
|
@ -133,7 +133,7 @@ def float_value(x):
|
|||
def num_value(x):
|
||||
x = resolve1(x)
|
||||
if not isnumber(x):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFTypeError('Int or Float required: %r' % x)
|
||||
return 0
|
||||
return x
|
||||
|
@ -142,7 +142,7 @@ def num_value(x):
|
|||
def str_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, six.binary_type):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFTypeError('String required: %r' % x)
|
||||
return ''
|
||||
return x
|
||||
|
@ -151,7 +151,7 @@ def str_value(x):
|
|||
def list_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, (list, tuple)):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFTypeError('List required: %r' % x)
|
||||
return []
|
||||
return x
|
||||
|
@ -160,7 +160,7 @@ def list_value(x):
|
|||
def dict_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, dict):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
import logging
|
||||
logging.error('PDFTypeError : Dict required: %r', x)
|
||||
raise PDFTypeError('Dict required: %r' % x)
|
||||
|
@ -171,7 +171,7 @@ def dict_value(x):
|
|||
def stream_value(x):
|
||||
x = resolve1(x)
|
||||
if not isinstance(x, PDFStream):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFTypeError('PDFStream required: %r' % x)
|
||||
return PDFStream({}, '')
|
||||
return x
|
||||
|
@ -247,7 +247,7 @@ class PDFStream(PDFObject):
|
|||
try:
|
||||
data = zlib.decompress(data)
|
||||
except zlib.error as e:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
|
||||
data = b''
|
||||
elif f in LITERALS_LZW_DECODE:
|
||||
|
|
|
@ -6,7 +6,7 @@ import logging
|
|||
|
||||
import six # Python 2+3 compatibility
|
||||
|
||||
from .settings import STRICT
|
||||
from . import settings
|
||||
|
||||
def bytesindex(s,i,j=None):
|
||||
"""implements s[i], s[i:], s[i:j] for Python2 and Python3"""
|
||||
|
@ -134,7 +134,7 @@ KEYWORD_DICT_END = KWD(b'>>')
|
|||
|
||||
def literal_name(x):
|
||||
if not isinstance(x, PSLiteral):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PSTypeError('Literal required: %r' % x)
|
||||
else:
|
||||
name=x
|
||||
|
@ -149,7 +149,7 @@ def literal_name(x):
|
|||
|
||||
def keyword_name(x):
|
||||
if not isinstance(x, PSKeyword):
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise PSTypeError('Keyword required: %r' % x)
|
||||
else:
|
||||
name=x
|
||||
|
@ -592,7 +592,7 @@ class PSStackParser(PSBaseParser):
|
|||
try:
|
||||
self.push(self.end_type('a'))
|
||||
except PSTypeError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise
|
||||
elif token == KEYWORD_DICT_BEGIN:
|
||||
# begin dictionary
|
||||
|
@ -607,7 +607,7 @@ class PSStackParser(PSBaseParser):
|
|||
d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None)
|
||||
self.push((pos, d))
|
||||
except PSTypeError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise
|
||||
elif token == KEYWORD_PROC_BEGIN:
|
||||
# begin proc
|
||||
|
@ -617,7 +617,7 @@ class PSStackParser(PSBaseParser):
|
|||
try:
|
||||
self.push(self.end_type('p'))
|
||||
except PSTypeError:
|
||||
if STRICT:
|
||||
if settings.STRICT:
|
||||
raise
|
||||
elif isinstance(token,PSKeyword):
|
||||
logging.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
STRICT = True
|
||||
|
||||
try:
|
||||
from django.conf import django_settings
|
||||
except (ImportError, NameError) as e:
|
||||
STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', STRICT)
|
||||
except Exception:
|
||||
# in case it's not a django project
|
||||
django_settings = None
|
||||
|
||||
# Get defaults from django settings
|
||||
STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', True)
|
||||
pass
|
||||
|
|
Loading…
Reference in New Issue