pdfminer/settings.py for STRICT and added ENFORCE_CHECK_EXTRACTABLE
parent
a46ea52e20
commit
b686dd0139
|
@ -19,7 +19,7 @@ from .psparser import PSEOF
|
|||
from .psparser import literal_name
|
||||
from .psparser import LIT
|
||||
from .psparser import KWD
|
||||
from .psparser import STRICT
|
||||
from .settings import STRICT
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFTypeError
|
||||
from .pdftypes import PDFStream
|
||||
|
|
|
@ -12,7 +12,7 @@ from .psparser import PSStackParser
|
|||
from .psparser import PSEOF
|
||||
from .psparser import LIT
|
||||
from .psparser import KWD
|
||||
from .psparser import STRICT
|
||||
from .settings import STRICT
|
||||
from .psparser import PSLiteral
|
||||
from .psparser import literal_name
|
||||
from .pdftypes import PDFException
|
||||
|
|
|
@ -12,7 +12,7 @@ from .psparser import keyword_name
|
|||
from .psparser import PSStackParser
|
||||
from .psparser import LIT
|
||||
from .psparser import KWD
|
||||
from .psparser import STRICT
|
||||
from .settings import STRICT
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import PDFObjRef
|
||||
|
|
|
@ -9,6 +9,7 @@ from .pdftypes import dict_value
|
|||
from .pdfparser import PDFParser
|
||||
from .pdfdocument import PDFDocument
|
||||
from .pdfdocument import PDFTextExtractionNotAllowed
|
||||
from .settings import ENFORCE_CHECK_EXTRACTABLE
|
||||
|
||||
import six # Python 2+3 compatibility
|
||||
|
||||
|
@ -120,6 +121,7 @@ class PDFPage(object):
|
|||
# Create a PDF document object that stores the document structure.
|
||||
doc = PDFDocument(parser, password=password, caching=caching)
|
||||
# Check if the document allows text extraction. If not, abort.
|
||||
if ENFORCE_CHECK_EXTRACTABLE:
|
||||
if check_extractable and not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
|
||||
# Process each page contained in the document.
|
||||
|
|
|
@ -5,7 +5,7 @@ from .psparser import PSStackParser
|
|||
from .psparser import PSSyntaxError
|
||||
from .psparser import PSEOF
|
||||
from .psparser import KWD
|
||||
from .psparser import STRICT
|
||||
from .settings import STRICT
|
||||
from .pdftypes import PDFException
|
||||
from .pdftypes import PDFStream
|
||||
from .pdftypes import PDFObjRef
|
||||
|
|
|
@ -8,7 +8,7 @@ from .ccitt import ccittfaxdecode
|
|||
from .psparser import PSException
|
||||
from .psparser import PSObject
|
||||
from .psparser import LIT
|
||||
from .psparser import STRICT
|
||||
from .settings import STRICT
|
||||
from .utils import apply_png_predictor
|
||||
from .utils import isnumber
|
||||
|
||||
|
|
|
@ -5,12 +5,8 @@ import re
|
|||
import logging
|
||||
|
||||
import six # Python 2+3 compatibility
|
||||
try:
|
||||
from django.conf import settings
|
||||
except ImportError:
|
||||
# in case it's not a django project
|
||||
settings = None
|
||||
|
||||
from .settings import STRICT
|
||||
|
||||
def bytesindex(s,i,j=None):
|
||||
"""implements s[i], s[i:], s[i:j] for Python2 and Python3"""
|
||||
|
@ -21,7 +17,6 @@ def bytesindex(s,i,j=None):
|
|||
|
||||
from .utils import choplist
|
||||
|
||||
STRICT = getattr(settings, 'PDF_MINER_IS_STRICT', True)
|
||||
|
||||
## PS Exceptions
|
||||
##
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
try:
|
||||
from django.conf import django_settings
|
||||
except ImportError:
|
||||
# in case it's not a django project
|
||||
django_settings = None
|
||||
|
||||
# Get defaults from django settings
|
||||
STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', True)
|
||||
ENFORCE_CHECK_EXTRACTABLE = True
|
|
@ -5,6 +5,9 @@ Converts PDF text content (though not images containing text) to plain text, htm
|
|||
import sys
|
||||
import logging
|
||||
import six
|
||||
import pdfminer.settings
|
||||
pdfminer.settings.STRICT = False
|
||||
pdfminer.settings.ENFORCE_CHECK_EXTRACTABLE = False
|
||||
import pdfminer.high_level
|
||||
import pdfminer.layout
|
||||
|
||||
|
|
Loading…
Reference in New Issue