Use logger.warn instead of warnings.warn if warning cannot be prevented by user (#673)

* Use logging.Logger.warning instead of warning.warn in most cases, following
 the Python official guidance that warning.warn is directed at _developers_,
 not users

 * (pdfdocument.py) remove declarations of PDFTextExtractionNotAllowedWarning,
			PDFNoValidXRefWarning

 * (pdfpage.py) Don't import warning, don't use PDFTextExtractionNotAllowedWarning

 * (tools/dumppdf.py) Don't import warning, don't use PDFNoValidXRefWarning

 * (tests/test_tools_dumppdf.py) Don't import warning, check for logging.WARN rather
				  than PDFNoValidXRefWarning

* get name right

* make flake8 happy

* Keep warning classes such that this does not crash code when these warnings are explictly ignored

* Update changelog to include pr ref

* Small textual change

* Remove patch

* No need for testing if the warning is actually raised. The test_tootls_dumppdf.py are just test cases if these pdfs are supported.

* Use logger as name for logger

* Add docs to legacy warnings

* Use logger.Logger.warn for failed decompression

* Add reference to docs describing when to use logger and warnings

Co-authored-by: Henry S. Thompson <ht@home.hst.name>
Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>
pull/684/head^2
htInEdin 2022-01-26 19:41:12 +00:00 committed by GitHub
parent c4ac514984
commit dc530f3a6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 27 additions and 25 deletions

View File

@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
- Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677))
## [20201018]

View File

@ -25,6 +25,10 @@ class PDFNoValidXRef(PDFSyntaxError):
class PDFNoValidXRefWarning(SyntaxWarning):
"""Legacy warning for missing xref.
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass
@ -41,10 +45,18 @@ class PDFEncryptionError(PDFException):
class PDFEncryptionWarning(UserWarning):
"""Legacy warning for failed decryption.
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass
class PDFTextExtractionNotAllowedWarning(UserWarning):
"""Legacy warning for PDF that does not allow extraction.
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass

View File

@ -1,7 +1,6 @@
import logging
from pdfminer.utils import Rect
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
import warnings
from . import settings
from .psparser import LIT
from .pdftypes import PDFObjectNotFound
@ -11,7 +10,6 @@ from .pdftypes import list_value
from .pdftypes import dict_value
from .pdfparser import PDFParser
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
from .pdfdocument import PDFTextExtractionNotAllowedWarning
log = logging.getLogger(__name__)
@ -155,8 +153,9 @@ class PDFPage:
warning_msg = 'The PDF %r contains a metadata field '\
'indicating that it should not allow ' \
'text extraction. Ignoring this field ' \
'and proceeding.' % fp
warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning)
'and proceeding. Use the check_extractable ' \
'if you want to raise an error in this case' % fp
log.warning(warning_msg)
# Process each page contained in the document.
for (pageno, page) in enumerate(cls.create_pages(doc)):
if pagenos and (pageno not in pagenos):

View File

@ -1,5 +1,4 @@
import zlib
import warnings
import logging
import io
import sys
@ -21,7 +20,7 @@ if TYPE_CHECKING:
from .pdfdocument import PDFDocument
log = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
LITERAL_CRYPT = LIT('Crypt')
@ -205,7 +204,7 @@ def dict_value(x: object) -> Dict[Any, Any]:
x = resolve1(x)
if not isinstance(x, dict):
if settings.STRICT:
log.error('PDFTypeError : Dict required: %r', x)
logger.error('PDFTypeError : Dict required: %r', x)
raise PDFTypeError('Dict required: %r' % x)
return {}
return x
@ -237,9 +236,7 @@ def decompress_corrupted(data):
except zlib.error:
# Let the error propagates if we're not yet in the CRC checksum
if i < len(data) - 3:
# Import here to prevent circualr import
from .pdfdocument import PDFEncryptionWarning
warnings.warn("Data-loss while decompressing corrupted data", PDFEncryptionWarning)
logger.warning("Data-loss while decompressing corrupted data")
return result_str

View File

@ -1,8 +1,8 @@
import warnings
import unittest
import logging
from nose.tools import raises
from helpers import absolute_sample_path
from tempfilepath import TemporaryFilePath
from pdfminer.pdfdocument import PDFNoValidXRefWarning
from tools import dumppdf
@ -18,12 +18,9 @@ def run(filename, options=None):
dumppdf.main(s.split(' ')[1:])
class TestDumpPDF():
class TestDumpPDF(unittest.TestCase):
def test_simple1(self):
"""dumppdf.py simple1.pdf raises a warning because it has no xref"""
with warnings.catch_warnings(record=True) as ws:
run('simple1.pdf', '-t -a')
assert any(w.category == PDFNoValidXRefWarning for w in ws)
run('simple1.pdf', '-t -a')
def test_simple2(self):
run('simple2.pdf', '-t -a')
@ -32,10 +29,7 @@ class TestDumpPDF():
run('jo.pdf', '-t -a')
def test_simple3(self):
"""dumppdf.py simple3.pdf raises a warning because it has no xref"""
with warnings.catch_warnings(record=True) as ws:
run('simple3.pdf', '-t -a')
assert any(w.category == PDFNoValidXRefWarning for w in ws)
run('simple3.pdf', '-t -a')
def test_2(self):
run('nonfree/dmca.pdf', '-t -a')

View File

@ -6,12 +6,10 @@ import re
import sys
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
Union, cast
import warnings
from argparse import ArgumentParser
import pdfminer
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback, \
PDFNoValidXRefWarning
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
@ -20,6 +18,7 @@ from pdfminer.psparser import PSKeyword, PSLiteral, LIT
from pdfminer.utils import isnumber
logging.basicConfig()
logger = logging.getLogger(__name__)
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
@ -115,7 +114,7 @@ def dumptrailers(
msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
'you want to display the content of a fallback xref that ' \
'contains all objects.'
warnings.warn(msg, PDFNoValidXRefWarning)
logger.warning(msg)
return