diff --git a/CHANGELOG.md b/CHANGELOG.md index f2957d9..66ca4af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522)) - Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525)) - Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523)) +- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673)) - Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677)) ## [20201018] diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index cac09f2..c96b974 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -25,6 +25,10 @@ class PDFNoValidXRef(PDFSyntaxError): class PDFNoValidXRefWarning(SyntaxWarning): + """Legacy warning for missing xref. + + Not used anymore because warnings.warn is replaced by logger.Logger.warn. + """ pass @@ -41,10 +45,18 @@ class PDFEncryptionError(PDFException): class PDFEncryptionWarning(UserWarning): + """Legacy warning for failed decryption. + + Not used anymore because warnings.warn is replaced by logger.Logger.warn. + """ pass class PDFTextExtractionNotAllowedWarning(UserWarning): + """Legacy warning for PDF that does not allow extraction. + + Not used anymore because warnings.warn is replaced by logger.Logger.warn. + """ pass diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 8380c23..75f77fd 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -1,7 +1,6 @@ import logging from pdfminer.utils import Rect from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple -import warnings from . import settings from .psparser import LIT from .pdftypes import PDFObjectNotFound @@ -11,7 +10,6 @@ from .pdftypes import list_value from .pdftypes import dict_value from .pdfparser import PDFParser from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed -from .pdfdocument import PDFTextExtractionNotAllowedWarning log = logging.getLogger(__name__) @@ -155,8 +153,9 @@ class PDFPage: warning_msg = 'The PDF %r contains a metadata field '\ 'indicating that it should not allow ' \ 'text extraction. Ignoring this field ' \ - 'and proceeding.' % fp - warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning) + 'and proceeding. Use the check_extractable ' \ + 'if you want to raise an error in this case' % fp + log.warning(warning_msg) # Process each page contained in the document. for (pageno, page) in enumerate(cls.create_pages(doc)): if pagenos and (pageno not in pagenos): diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index e6d94bd..b0496e8 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -1,5 +1,4 @@ import zlib -import warnings import logging import io import sys @@ -21,7 +20,7 @@ if TYPE_CHECKING: from .pdfdocument import PDFDocument -log = logging.getLogger(__name__) +logger = logging.getLogger(__name__) LITERAL_CRYPT = LIT('Crypt') @@ -205,7 +204,7 @@ def dict_value(x: object) -> Dict[Any, Any]: x = resolve1(x) if not isinstance(x, dict): if settings.STRICT: - log.error('PDFTypeError : Dict required: %r', x) + logger.error('PDFTypeError : Dict required: %r', x) raise PDFTypeError('Dict required: %r' % x) return {} return x @@ -237,9 +236,7 @@ def decompress_corrupted(data): except zlib.error: # Let the error propagates if we're not yet in the CRC checksum if i < len(data) - 3: - # Import here to prevent circualr import - from .pdfdocument import PDFEncryptionWarning - warnings.warn("Data-loss while decompressing corrupted data", PDFEncryptionWarning) + logger.warning("Data-loss while decompressing corrupted data") return result_str diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py index df1dc25..abe6718 100644 --- a/tests/test_tools_dumppdf.py +++ b/tests/test_tools_dumppdf.py @@ -1,8 +1,8 @@ -import warnings +import unittest +import logging from nose.tools import raises from helpers import absolute_sample_path from tempfilepath import TemporaryFilePath -from pdfminer.pdfdocument import PDFNoValidXRefWarning from tools import dumppdf @@ -18,12 +18,9 @@ def run(filename, options=None): dumppdf.main(s.split(' ')[1:]) -class TestDumpPDF(): +class TestDumpPDF(unittest.TestCase): def test_simple1(self): - """dumppdf.py simple1.pdf raises a warning because it has no xref""" - with warnings.catch_warnings(record=True) as ws: - run('simple1.pdf', '-t -a') - assert any(w.category == PDFNoValidXRefWarning for w in ws) + run('simple1.pdf', '-t -a') def test_simple2(self): run('simple2.pdf', '-t -a') @@ -32,10 +29,7 @@ class TestDumpPDF(): run('jo.pdf', '-t -a') def test_simple3(self): - """dumppdf.py simple3.pdf raises a warning because it has no xref""" - with warnings.catch_warnings(record=True) as ws: - run('simple3.pdf', '-t -a') - assert any(w.category == PDFNoValidXRefWarning for w in ws) + run('simple3.pdf', '-t -a') def test_2(self): run('nonfree/dmca.pdf', '-t -a') diff --git a/tools/dumppdf.py b/tools/dumppdf.py index ffdf424..2199b9d 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -6,12 +6,10 @@ import re import sys from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \ Union, cast -import warnings from argparse import ArgumentParser import pdfminer -from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback, \ - PDFNoValidXRefWarning +from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback from pdfminer.pdfpage import PDFPage from pdfminer.pdfparser import PDFParser from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError @@ -20,6 +18,7 @@ from pdfminer.psparser import PSKeyword, PSLiteral, LIT from pdfminer.utils import isnumber logging.basicConfig() +logger = logging.getLogger(__name__) ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]') @@ -115,7 +114,7 @@ def dumptrailers( msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \ 'you want to display the content of a fallback xref that ' \ 'contains all objects.' - warnings.warn(msg, PDFNoValidXRefWarning) + logger.warning(msg) return