diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 964610f..fd62c82 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -19,7 +19,7 @@ from .psparser import PSEOF from .psparser import literal_name from .psparser import LIT from .psparser import KWD -from .settings import STRICT +from . import settings from .pdftypes import PDFException from .pdftypes import PDFTypeError from .pdftypes import PDFStream @@ -196,7 +196,7 @@ class PDFXRefFallback(PDFXRef): try: n = stream['N'] except KeyError: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser1 = PDFStreamParser(stream.get_data()) @@ -582,7 +582,7 @@ class PDFDocument(object): else: raise PDFSyntaxError('No /Root object! - Is this really a PDF?') if self.catalog.get('Type') is not LITERAL_CATALOG: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Catalog not found!') return @@ -620,12 +620,12 @@ class PDFDocument(object): def _get_objects(self, stream): if stream.get('Type') is not LITERAL_OBJSTM: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Not a stream object: %r' % stream) try: n = stream['N'] except KeyError: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser = PDFStreamParser(stream.get_data()) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 8196a33..c6f18a5 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -12,7 +12,7 @@ from .psparser import PSStackParser from .psparser import PSEOF from .psparser import LIT from .psparser import KWD -from .settings import STRICT +from . import settings from .psparser import PSLiteral from .psparser import literal_name from .pdftypes import PDFException @@ -574,7 +574,7 @@ class PDFType1Font(PDFSimpleFont): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: - if STRICT: + if settings.STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: @@ -632,11 +632,11 @@ class PDFType3Font(PDFSimpleFont): # PDFCIDFont class PDFCIDFont(PDFFont): - def __init__(self, rsrcmgr, spec, STRICT=False): + def __init__(self, rsrcmgr, spec, strict=settings.STRICT): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: - if STRICT: + if strict: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) @@ -645,19 +645,19 @@ class PDFCIDFont(PDFFont): try: name = literal_name(spec['Encoding']) except KeyError: - if STRICT: + if strict: raise PDFFontError('Encoding is unspecified') name = 'unknown' try: self.cmap = CMapDB.get_cmap(name) except CMapDB.CMapNotFound as e: - if STRICT: + if strict: raise PDFFontError(e) self.cmap = CMap() try: descriptor = dict_value(spec['FontDescriptor']) except KeyError: - if STRICT: + if strict: raise PDFFontError('FontDescriptor is missing') descriptor = {} ttf = None diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 80d57ea..ff1d072 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -12,7 +12,7 @@ from .psparser import keyword_name from .psparser import PSStackParser from .psparser import LIT from .psparser import KWD -from .settings import STRICT +from . import settings from .pdftypes import PDFException from .pdftypes import PDFStream from .pdftypes import PDFObjRef @@ -167,14 +167,14 @@ class PDFResourceManager(object): font = self._cached_fonts[objid] else: logging.info('get_font: create: objid=%r, spec=%r', objid, spec) - if STRICT: + if settings.STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: - if STRICT: + if settings.STRICT: raise PDFFontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): @@ -199,7 +199,7 @@ class PDFResourceManager(object): subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: - if STRICT: + if settings.STRICT: raise PDFFontError('Invalid Font spec: %r' % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: @@ -299,7 +299,7 @@ class PDFContentParser(PSStackParser): self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: - if STRICT: + if settings.STRICT: raise else: self.push((pos, token)) @@ -559,7 +559,7 @@ class PDFPageInterpreter(object): try: self.scs = self.csmap[literal_name(name)] except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined ColorSpace: %r' % name) return @@ -568,7 +568,7 @@ class PDFPageInterpreter(object): try: self.ncs = self.csmap[literal_name(name)] except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined ColorSpace: %r' % name) return @@ -607,7 +607,7 @@ class PDFPageInterpreter(object): if self.scs: n = self.scs.ncomponents else: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 self.pop(n) @@ -617,7 +617,7 @@ class PDFPageInterpreter(object): if self.ncs: n = self.ncs.ncomponents else: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 self.pop(n) @@ -698,7 +698,7 @@ class PDFPageInterpreter(object): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) self.textstate.font = self.rsrcmgr.get_font(None, {}) self.textstate.fontsize = fontsize @@ -748,7 +748,7 @@ class PDFPageInterpreter(object): def do_TJ(self, seq): #print >>sys.stderr, 'TJ(%r): %r' % (seq, self.textstate) if self.textstate.font is None: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('No font specified!') return self.device.render_string(self.textstate, seq) @@ -793,7 +793,7 @@ class PDFPageInterpreter(object): try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return logging.info('Processing xobj: %r', xobj) @@ -872,7 +872,7 @@ class PDFPageInterpreter(object): logging.debug('exec: %s', name) func() else: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Unknown operator: %r' % name) else: self.push(obj) diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 8e1934e..e5202d6 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -5,7 +5,7 @@ from .psparser import PSStackParser from .psparser import PSSyntaxError from .psparser import PSEOF from .psparser import KWD -from .settings import STRICT +from . import settings from .pdftypes import PDFException from .pdftypes import PDFStream from .pdftypes import PDFObjRef @@ -89,13 +89,13 @@ class PDFParser(PSStackParser): try: objlen = int_value(dic['Length']) except KeyError: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('/Length is undefined: %r' % dic) self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Unexpected EOF') return pos += len(line) @@ -106,7 +106,7 @@ class PDFParser(PSStackParser): try: (linepos, line) = self.nextline() except PSEOF: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Unexpected EOF') break if b'endstream' in line: @@ -164,7 +164,7 @@ class PDFStreamParser(PDFParser): pass return elif token in (self.KEYWORD_OBJ, self.KEYWORD_ENDOBJ): - if STRICT: + if settings.STRICT: # See PDF Spec 3.4.6: Only the object values are stored in the # stream; the obj and endobj keywords are not used. raise PDFSyntaxError('Keyword endobj found in stream') diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 834675e..a8fc009 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -8,7 +8,7 @@ from .ccitt import ccittfaxdecode from .psparser import PSException from .psparser import PSObject from .psparser import LIT -from .settings import STRICT +from . import settings from .utils import apply_png_predictor from .utils import isnumber @@ -53,7 +53,7 @@ class PDFObjRef(PDFObject): def __init__(self, doc, objid, _): if objid == 0: - if STRICT: + if settings.STRICT: raise PDFValueError('PDF object id cannot be 0.') self.doc = doc self.objid = objid @@ -115,7 +115,7 @@ def decipher_all(decipher, objid, genno, x): def int_value(x): x = resolve1(x) if not isinstance(x, int): - if STRICT: + if settings.STRICT: raise PDFTypeError('Integer required: %r' % x) return 0 return x @@ -124,7 +124,7 @@ def int_value(x): def float_value(x): x = resolve1(x) if not isinstance(x, float): - if STRICT: + if settings.STRICT: raise PDFTypeError('Float required: %r' % x) return 0.0 return x @@ -133,7 +133,7 @@ def float_value(x): def num_value(x): x = resolve1(x) if not isnumber(x): - if STRICT: + if settings.STRICT: raise PDFTypeError('Int or Float required: %r' % x) return 0 return x @@ -142,7 +142,7 @@ def num_value(x): def str_value(x): x = resolve1(x) if not isinstance(x, six.binary_type): - if STRICT: + if settings.STRICT: raise PDFTypeError('String required: %r' % x) return '' return x @@ -151,7 +151,7 @@ def str_value(x): def list_value(x): x = resolve1(x) if not isinstance(x, (list, tuple)): - if STRICT: + if settings.STRICT: raise PDFTypeError('List required: %r' % x) return [] return x @@ -160,7 +160,7 @@ def list_value(x): def dict_value(x): x = resolve1(x) if not isinstance(x, dict): - if STRICT: + if settings.STRICT: import logging logging.error('PDFTypeError : Dict required: %r', x) raise PDFTypeError('Dict required: %r' % x) @@ -171,7 +171,7 @@ def dict_value(x): def stream_value(x): x = resolve1(x) if not isinstance(x, PDFStream): - if STRICT: + if settings.STRICT: raise PDFTypeError('PDFStream required: %r' % x) return PDFStream({}, '') return x @@ -247,7 +247,7 @@ class PDFStream(PDFObject): try: data = zlib.decompress(data) except zlib.error as e: - if STRICT: + if settings.STRICT: raise PDFException('Invalid zlib bytes: %r, %r' % (e, data)) data = b'' elif f in LITERALS_LZW_DECODE: diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index dff3e04..1b17695 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -6,7 +6,7 @@ import logging import six # Python 2+3 compatibility -from .settings import STRICT +from . import settings def bytesindex(s,i,j=None): """implements s[i], s[i:], s[i:j] for Python2 and Python3""" @@ -134,7 +134,7 @@ KEYWORD_DICT_END = KWD(b'>>') def literal_name(x): if not isinstance(x, PSLiteral): - if STRICT: + if settings.STRICT: raise PSTypeError('Literal required: %r' % x) else: name=x @@ -149,7 +149,7 @@ def literal_name(x): def keyword_name(x): if not isinstance(x, PSKeyword): - if STRICT: + if settings.STRICT: raise PSTypeError('Keyword required: %r' % x) else: name=x @@ -592,7 +592,7 @@ class PSStackParser(PSBaseParser): try: self.push(self.end_type('a')) except PSTypeError: - if STRICT: + if settings.STRICT: raise elif token == KEYWORD_DICT_BEGIN: # begin dictionary @@ -607,7 +607,7 @@ class PSStackParser(PSBaseParser): d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None) self.push((pos, d)) except PSTypeError: - if STRICT: + if settings.STRICT: raise elif token == KEYWORD_PROC_BEGIN: # begin proc @@ -617,7 +617,7 @@ class PSStackParser(PSBaseParser): try: self.push(self.end_type('p')) except PSTypeError: - if STRICT: + if settings.STRICT: raise elif isinstance(token,PSKeyword): logging.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack) diff --git a/pdfminer/settings.py b/pdfminer/settings.py index 350b2ce..0956960 100644 --- a/pdfminer/settings.py +++ b/pdfminer/settings.py @@ -1,8 +1,8 @@ +STRICT = True + try: from django.conf import django_settings -except (ImportError, NameError) as e: + STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', STRICT) +except Exception: # in case it's not a django project - django_settings = None - -# Get defaults from django settings -STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', True) + pass