Merge pull request #16 from stevenhair/settings-management

Improved settings management
pull/17/head
Goulu 2016-01-18 11:21:26 +01:00
commit 4f762cb897
7 changed files with 51 additions and 51 deletions

View File

@ -19,7 +19,7 @@ from .psparser import PSEOF
from .psparser import literal_name
from .psparser import LIT
from .psparser import KWD
from .settings import STRICT
from . import settings
from .pdftypes import PDFException
from .pdftypes import PDFTypeError
from .pdftypes import PDFStream
@ -196,7 +196,7 @@ class PDFXRefFallback(PDFXRef):
try:
n = stream['N']
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFSyntaxError('N is not defined: %r' % stream)
n = 0
parser1 = PDFStreamParser(stream.get_data())
@ -582,7 +582,7 @@ class PDFDocument(object):
else:
raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
if self.catalog.get('Type') is not LITERAL_CATALOG:
if STRICT:
if settings.STRICT:
raise PDFSyntaxError('Catalog not found!')
return
@ -620,12 +620,12 @@ class PDFDocument(object):
def _get_objects(self, stream):
if stream.get('Type') is not LITERAL_OBJSTM:
if STRICT:
if settings.STRICT:
raise PDFSyntaxError('Not a stream object: %r' % stream)
try:
n = stream['N']
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFSyntaxError('N is not defined: %r' % stream)
n = 0
parser = PDFStreamParser(stream.get_data())

View File

@ -12,7 +12,7 @@ from .psparser import PSStackParser
from .psparser import PSEOF
from .psparser import LIT
from .psparser import KWD
from .settings import STRICT
from . import settings
from .psparser import PSLiteral
from .psparser import literal_name
from .pdftypes import PDFException
@ -574,7 +574,7 @@ class PDFType1Font(PDFSimpleFont):
try:
self.basefont = literal_name(spec['BaseFont'])
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFFontError('BaseFont is missing')
self.basefont = 'unknown'
try:
@ -632,11 +632,11 @@ class PDFType3Font(PDFSimpleFont):
# PDFCIDFont
class PDFCIDFont(PDFFont):
def __init__(self, rsrcmgr, spec, STRICT=False):
def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
try:
self.basefont = literal_name(spec['BaseFont'])
except KeyError:
if STRICT:
if strict:
raise PDFFontError('BaseFont is missing')
self.basefont = 'unknown'
self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
@ -645,19 +645,19 @@ class PDFCIDFont(PDFFont):
try:
name = literal_name(spec['Encoding'])
except KeyError:
if STRICT:
if strict:
raise PDFFontError('Encoding is unspecified')
name = 'unknown'
try:
self.cmap = CMapDB.get_cmap(name)
except CMapDB.CMapNotFound as e:
if STRICT:
if strict:
raise PDFFontError(e)
self.cmap = CMap()
try:
descriptor = dict_value(spec['FontDescriptor'])
except KeyError:
if STRICT:
if strict:
raise PDFFontError('FontDescriptor is missing')
descriptor = {}
ttf = None

View File

@ -12,7 +12,7 @@ from .psparser import keyword_name
from .psparser import PSStackParser
from .psparser import LIT
from .psparser import KWD
from .settings import STRICT
from . import settings
from .pdftypes import PDFException
from .pdftypes import PDFStream
from .pdftypes import PDFObjRef
@ -167,14 +167,14 @@ class PDFResourceManager(object):
font = self._cached_fonts[objid]
else:
logging.info('get_font: create: objid=%r, spec=%r', objid, spec)
if STRICT:
if settings.STRICT:
if spec['Type'] is not LITERAL_FONT:
raise PDFFontError('Type is not /Font')
# Create a Font object.
if 'Subtype' in spec:
subtype = literal_name(spec['Subtype'])
else:
if STRICT:
if settings.STRICT:
raise PDFFontError('Font Subtype is not specified.')
subtype = 'Type1'
if subtype in ('Type1', 'MMType1'):
@ -199,7 +199,7 @@ class PDFResourceManager(object):
subspec[k] = resolve1(spec[k])
font = self.get_font(None, subspec)
else:
if STRICT:
if settings.STRICT:
raise PDFFontError('Invalid Font spec: %r' % spec)
font = PDFType1Font(self, spec) # this is so wrong!
if objid and self.caching:
@ -299,7 +299,7 @@ class PDFContentParser(PSStackParser):
self.push((pos, obj))
self.push((pos, self.KEYWORD_EI))
except PSTypeError:
if STRICT:
if settings.STRICT:
raise
else:
self.push((pos, token))
@ -559,7 +559,7 @@ class PDFPageInterpreter(object):
try:
self.scs = self.csmap[literal_name(name)]
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
return
@ -568,7 +568,7 @@ class PDFPageInterpreter(object):
try:
self.ncs = self.csmap[literal_name(name)]
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
return
@ -607,7 +607,7 @@ class PDFPageInterpreter(object):
if self.scs:
n = self.scs.ncomponents
else:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('No colorspace specified!')
n = 1
self.pop(n)
@ -617,7 +617,7 @@ class PDFPageInterpreter(object):
if self.ncs:
n = self.ncs.ncomponents
else:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('No colorspace specified!')
n = 1
self.pop(n)
@ -698,7 +698,7 @@ class PDFPageInterpreter(object):
try:
self.textstate.font = self.fontmap[literal_name(fontid)]
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('Undefined Font id: %r' % fontid)
self.textstate.font = self.rsrcmgr.get_font(None, {})
self.textstate.fontsize = fontsize
@ -748,7 +748,7 @@ class PDFPageInterpreter(object):
def do_TJ(self, seq):
#print >>sys.stderr, 'TJ(%r): %r' % (seq, self.textstate)
if self.textstate.font is None:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('No font specified!')
return
self.device.render_string(self.textstate, seq)
@ -793,7 +793,7 @@ class PDFPageInterpreter(object):
try:
xobj = stream_value(self.xobjmap[xobjid])
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
return
logging.info('Processing xobj: %r', xobj)
@ -872,7 +872,7 @@ class PDFPageInterpreter(object):
logging.debug('exec: %s', name)
func()
else:
if STRICT:
if settings.STRICT:
raise PDFInterpreterError('Unknown operator: %r' % name)
else:
self.push(obj)

View File

@ -5,7 +5,7 @@ from .psparser import PSStackParser
from .psparser import PSSyntaxError
from .psparser import PSEOF
from .psparser import KWD
from .settings import STRICT
from . import settings
from .pdftypes import PDFException
from .pdftypes import PDFStream
from .pdftypes import PDFObjRef
@ -89,13 +89,13 @@ class PDFParser(PSStackParser):
try:
objlen = int_value(dic['Length'])
except KeyError:
if STRICT:
if settings.STRICT:
raise PDFSyntaxError('/Length is undefined: %r' % dic)
self.seek(pos)
try:
(_, line) = self.nextline() # 'stream'
except PSEOF:
if STRICT:
if settings.STRICT:
raise PDFSyntaxError('Unexpected EOF')
return
pos += len(line)
@ -106,7 +106,7 @@ class PDFParser(PSStackParser):
try:
(linepos, line) = self.nextline()
except PSEOF:
if STRICT:
if settings.STRICT:
raise PDFSyntaxError('Unexpected EOF')
break
if b'endstream' in line:
@ -164,7 +164,7 @@ class PDFStreamParser(PDFParser):
pass
return
elif token in (self.KEYWORD_OBJ, self.KEYWORD_ENDOBJ):
if STRICT:
if settings.STRICT:
# See PDF Spec 3.4.6: Only the object values are stored in the
# stream; the obj and endobj keywords are not used.
raise PDFSyntaxError('Keyword endobj found in stream')

View File

@ -8,7 +8,7 @@ from .ccitt import ccittfaxdecode
from .psparser import PSException
from .psparser import PSObject
from .psparser import LIT
from .settings import STRICT
from . import settings
from .utils import apply_png_predictor
from .utils import isnumber
@ -53,7 +53,7 @@ class PDFObjRef(PDFObject):
def __init__(self, doc, objid, _):
if objid == 0:
if STRICT:
if settings.STRICT:
raise PDFValueError('PDF object id cannot be 0.')
self.doc = doc
self.objid = objid
@ -115,7 +115,7 @@ def decipher_all(decipher, objid, genno, x):
def int_value(x):
x = resolve1(x)
if not isinstance(x, int):
if STRICT:
if settings.STRICT:
raise PDFTypeError('Integer required: %r' % x)
return 0
return x
@ -124,7 +124,7 @@ def int_value(x):
def float_value(x):
x = resolve1(x)
if not isinstance(x, float):
if STRICT:
if settings.STRICT:
raise PDFTypeError('Float required: %r' % x)
return 0.0
return x
@ -133,7 +133,7 @@ def float_value(x):
def num_value(x):
x = resolve1(x)
if not isnumber(x):
if STRICT:
if settings.STRICT:
raise PDFTypeError('Int or Float required: %r' % x)
return 0
return x
@ -142,7 +142,7 @@ def num_value(x):
def str_value(x):
x = resolve1(x)
if not isinstance(x, six.binary_type):
if STRICT:
if settings.STRICT:
raise PDFTypeError('String required: %r' % x)
return ''
return x
@ -151,7 +151,7 @@ def str_value(x):
def list_value(x):
x = resolve1(x)
if not isinstance(x, (list, tuple)):
if STRICT:
if settings.STRICT:
raise PDFTypeError('List required: %r' % x)
return []
return x
@ -160,7 +160,7 @@ def list_value(x):
def dict_value(x):
x = resolve1(x)
if not isinstance(x, dict):
if STRICT:
if settings.STRICT:
import logging
logging.error('PDFTypeError : Dict required: %r', x)
raise PDFTypeError('Dict required: %r' % x)
@ -171,7 +171,7 @@ def dict_value(x):
def stream_value(x):
x = resolve1(x)
if not isinstance(x, PDFStream):
if STRICT:
if settings.STRICT:
raise PDFTypeError('PDFStream required: %r' % x)
return PDFStream({}, '')
return x
@ -247,7 +247,7 @@ class PDFStream(PDFObject):
try:
data = zlib.decompress(data)
except zlib.error as e:
if STRICT:
if settings.STRICT:
raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
data = b''
elif f in LITERALS_LZW_DECODE:

View File

@ -6,7 +6,7 @@ import logging
import six # Python 2+3 compatibility
from .settings import STRICT
from . import settings
def bytesindex(s,i,j=None):
"""implements s[i], s[i:], s[i:j] for Python2 and Python3"""
@ -134,7 +134,7 @@ KEYWORD_DICT_END = KWD(b'>>')
def literal_name(x):
if not isinstance(x, PSLiteral):
if STRICT:
if settings.STRICT:
raise PSTypeError('Literal required: %r' % x)
else:
name=x
@ -149,7 +149,7 @@ def literal_name(x):
def keyword_name(x):
if not isinstance(x, PSKeyword):
if STRICT:
if settings.STRICT:
raise PSTypeError('Keyword required: %r' % x)
else:
name=x
@ -592,7 +592,7 @@ class PSStackParser(PSBaseParser):
try:
self.push(self.end_type('a'))
except PSTypeError:
if STRICT:
if settings.STRICT:
raise
elif token == KEYWORD_DICT_BEGIN:
# begin dictionary
@ -607,7 +607,7 @@ class PSStackParser(PSBaseParser):
d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None)
self.push((pos, d))
except PSTypeError:
if STRICT:
if settings.STRICT:
raise
elif token == KEYWORD_PROC_BEGIN:
# begin proc
@ -617,7 +617,7 @@ class PSStackParser(PSBaseParser):
try:
self.push(self.end_type('p'))
except PSTypeError:
if STRICT:
if settings.STRICT:
raise
elif isinstance(token,PSKeyword):
logging.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack)

View File

@ -1,8 +1,8 @@
STRICT = True
try:
from django.conf import django_settings
except (ImportError, NameError) as e:
STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', STRICT)
except Exception:
# in case it's not a django project
django_settings = None
# Get defaults from django settings
STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', True)
pass