diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index f8ac79d..964610f 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -19,7 +19,7 @@ from .psparser import PSEOF from .psparser import literal_name from .psparser import LIT from .psparser import KWD -from .psparser import STRICT +from .settings import STRICT from .pdftypes import PDFException from .pdftypes import PDFTypeError from .pdftypes import PDFStream diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index b2a9df8..8196a33 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -12,7 +12,7 @@ from .psparser import PSStackParser from .psparser import PSEOF from .psparser import LIT from .psparser import KWD -from .psparser import STRICT +from .settings import STRICT from .psparser import PSLiteral from .psparser import literal_name from .pdftypes import PDFException diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 65bf8b4..80d57ea 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -12,7 +12,7 @@ from .psparser import keyword_name from .psparser import PSStackParser from .psparser import LIT from .psparser import KWD -from .psparser import STRICT +from .settings import STRICT from .pdftypes import PDFException from .pdftypes import PDFStream from .pdftypes import PDFObjRef diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 7407ade..8e1934e 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -5,7 +5,7 @@ from .psparser import PSStackParser from .psparser import PSSyntaxError from .psparser import PSEOF from .psparser import KWD -from .psparser import STRICT +from .settings import STRICT from .pdftypes import PDFException from .pdftypes import PDFStream from .pdftypes import PDFObjRef diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 64d84bb..834675e 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -8,7 +8,7 @@ from .ccitt import ccittfaxdecode from .psparser import PSException from .psparser import PSObject from .psparser import LIT -from .psparser import STRICT +from .settings import STRICT from .utils import apply_png_predictor from .utils import isnumber diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index b1fc6ac..dff3e04 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -5,12 +5,8 @@ import re import logging import six # Python 2+3 compatibility -try: - from django.conf import settings -except ImportError: - # in case it's not a django project - settings = None +from .settings import STRICT def bytesindex(s,i,j=None): """implements s[i], s[i:], s[i:j] for Python2 and Python3""" @@ -21,7 +17,6 @@ def bytesindex(s,i,j=None): from .utils import choplist -STRICT = getattr(settings, 'PDF_MINER_IS_STRICT', True) ## PS Exceptions ## diff --git a/pdfminer/settings.py b/pdfminer/settings.py new file mode 100644 index 0000000..350b2ce --- /dev/null +++ b/pdfminer/settings.py @@ -0,0 +1,8 @@ +try: + from django.conf import django_settings +except (ImportError, NameError) as e: + # in case it's not a django project + django_settings = None + +# Get defaults from django settings +STRICT = getattr(django_settings, 'PDF_MINER_IS_STRICT', True) diff --git a/setup.py b/setup.py index 5676ca3..5fe5838 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( version=__version__, packages=['pdfminer',], package_data={'pdfminer': ['cmap/*.pickle.gz']}, - install_requires=['six', 'chardet'] if sys.version_info.major>2 else ['six'], + install_requires=['six', 'chardet'] if sys.version_info >= (3, 0) else ['six'], description='PDF parser and analyzer', long_description='''fork of PDFMiner using six for Python 2+3 compatibility diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index f449928..d9a3ebb 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -5,6 +5,8 @@ Converts PDF text content (though not images containing text) to plain text, htm import sys import logging import six +import pdfminer.settings +pdfminer.settings.STRICT = False import pdfminer.high_level import pdfminer.layout @@ -24,7 +26,7 @@ def extract_text(files=[], outfile='-', # If any LAParams group arguments were passed, create an LAParams object and # populate with given args. Otherwise, set it to None. - if not no_laparams: + if not no_laparams: laparams = pdfminer.layout.LAParams() for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"): paramv = locals().get(param, None) @@ -44,14 +46,14 @@ def extract_text(files=[], outfile='-', (".tag", "tag") ): if outfile.endswith(override): output_type = alttype - + if outfile == "-": outfp = sys.stdout if outfp.encoding is not None: codec = 'utf-8' else: outfp = open(outfile, "wb") - + for fname in files: with open(fname, "rb") as fp: @@ -90,7 +92,7 @@ def main(args=None): A.page_numbers = set([x-1 for x in A.page_numbers]) if A.pagenos: A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")]) - + imagewriter = None if A.output_dir: imagewriter = ImageWriter(A.output_dir)