From ed1b09c6f27aba97bb4632c3a26586fe59c82792 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Haso=C5=88?= Date: Wed, 6 Nov 2019 21:47:19 +0100 Subject: [PATCH] Fix debug logging for pdf2txt.py and dumppdf.py (#325) Fixes #313 --- pdfminer/high_level.py | 4 ++++ tools/dumppdf.py | 2 ++ tools/pdf2txt.py | 3 +++ tools/pdfdiff.py | 6 +++++- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pdfminer/high_level.py b/pdfminer/high_level.py index ecc934f..8728319 100644 --- a/pdfminer/high_level.py +++ b/pdfminer/high_level.py @@ -4,6 +4,7 @@ Functions that encapsulate "usual" use-cases for pdfminer, for use making bundled scripts and for using pdfminer as a module for routine tasks. """ +import logging import six import sys @@ -50,6 +51,9 @@ def extract_text_to_fp(inf, outfp, 'upgrade to Python 3. For more information see ' 'https://github.com/pdfminer/pdfminer .six/issues/194') + if debug: + logging.getLogger().setLevel(logging.DEBUG) + if six.PY2 and sys.stdin.encoding: password = password.decode(sys.stdin.encoding) diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 01654d4..0e3a4ee 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -15,6 +15,8 @@ from pdfminer.pdftypes import PDFStream, PDFObjRef, resolve1, stream_value from pdfminer.psparser import PSKeyword, PSLiteral, LIT from pdfminer.utils import isnumber +logging.basicConfig() + ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]') diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index f243601..41a7e7e 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -7,12 +7,15 @@ import argparse import logging import six import sys + import pdfminer.settings pdfminer.settings.STRICT = False import pdfminer.high_level import pdfminer.layout from pdfminer.image import ImageWriter +logging.basicConfig() + def extract_text(files=[], outfile='-', no_laparams=False, all_texts=None, detect_vertical=None, # LAParams diff --git a/tools/pdfdiff.py b/tools/pdfdiff.py index bb7b47a..e738b75 100644 --- a/tools/pdfdiff.py +++ b/tools/pdfdiff.py @@ -11,6 +11,8 @@ pdfminer.settings.STRICT = False import pdfminer.high_level import pdfminer.layout +logging.basicConfig() + def compare(file1, file2, **kwargs): if '_py2_no_more_posargs' in kwargs is not None: @@ -89,10 +91,12 @@ def main(args=None): P.add_argument("-O", "--output-dir", default=None, help="Output directory for images") P.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching") P.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode") - A = P.parse_args(args=args) + if A.debug: + logging.getLogger().setLevel(logging.DEBUG) + if A.page_numbers: A.page_numbers = set([x-1 for x in A.page_numbers]) if A.pagenos: