Use logger.warn instead of warnings.warn if warning cannot be prevented by user (#673)
* Use logging.Logger.warning instead of warning.warn in most cases, following the Python official guidance that warning.warn is directed at _developers_, not users * (pdfdocument.py) remove declarations of PDFTextExtractionNotAllowedWarning, PDFNoValidXRefWarning * (pdfpage.py) Don't import warning, don't use PDFTextExtractionNotAllowedWarning * (tools/dumppdf.py) Don't import warning, don't use PDFNoValidXRefWarning * (tests/test_tools_dumppdf.py) Don't import warning, check for logging.WARN rather than PDFNoValidXRefWarning * get name right * make flake8 happy * Keep warning classes such that this does not crash code when these warnings are explictly ignored * Update changelog to include pr ref * Small textual change * Remove patch * No need for testing if the warning is actually raised. The test_tootls_dumppdf.py are just test cases if these pdfs are supported. * Use logger as name for logger * Add docs to legacy warnings * Use logger.Logger.warn for failed decompression * Add reference to docs describing when to use logger and warnings Co-authored-by: Henry S. Thompson <ht@home.hst.name> Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>pull/684/head^2
parent
c4ac514984
commit
dc530f3a6f
|
@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|||
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
|
||||
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
|
||||
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
|
||||
- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
|
||||
- Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677))
|
||||
|
||||
## [20201018]
|
||||
|
|
|
@ -25,6 +25,10 @@ class PDFNoValidXRef(PDFSyntaxError):
|
|||
|
||||
|
||||
class PDFNoValidXRefWarning(SyntaxWarning):
|
||||
"""Legacy warning for missing xref.
|
||||
|
||||
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
|
@ -41,10 +45,18 @@ class PDFEncryptionError(PDFException):
|
|||
|
||||
|
||||
class PDFEncryptionWarning(UserWarning):
|
||||
"""Legacy warning for failed decryption.
|
||||
|
||||
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class PDFTextExtractionNotAllowedWarning(UserWarning):
|
||||
"""Legacy warning for PDF that does not allow extraction.
|
||||
|
||||
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import logging
|
||||
from pdfminer.utils import Rect
|
||||
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
|
||||
import warnings
|
||||
from . import settings
|
||||
from .psparser import LIT
|
||||
from .pdftypes import PDFObjectNotFound
|
||||
|
@ -11,7 +10,6 @@ from .pdftypes import list_value
|
|||
from .pdftypes import dict_value
|
||||
from .pdfparser import PDFParser
|
||||
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
|
||||
from .pdfdocument import PDFTextExtractionNotAllowedWarning
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
@ -155,8 +153,9 @@ class PDFPage:
|
|||
warning_msg = 'The PDF %r contains a metadata field '\
|
||||
'indicating that it should not allow ' \
|
||||
'text extraction. Ignoring this field ' \
|
||||
'and proceeding.' % fp
|
||||
warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning)
|
||||
'and proceeding. Use the check_extractable ' \
|
||||
'if you want to raise an error in this case' % fp
|
||||
log.warning(warning_msg)
|
||||
# Process each page contained in the document.
|
||||
for (pageno, page) in enumerate(cls.create_pages(doc)):
|
||||
if pagenos and (pageno not in pagenos):
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import zlib
|
||||
import warnings
|
||||
import logging
|
||||
import io
|
||||
import sys
|
||||
|
@ -21,7 +20,7 @@ if TYPE_CHECKING:
|
|||
from .pdfdocument import PDFDocument
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LITERAL_CRYPT = LIT('Crypt')
|
||||
|
||||
|
@ -205,7 +204,7 @@ def dict_value(x: object) -> Dict[Any, Any]:
|
|||
x = resolve1(x)
|
||||
if not isinstance(x, dict):
|
||||
if settings.STRICT:
|
||||
log.error('PDFTypeError : Dict required: %r', x)
|
||||
logger.error('PDFTypeError : Dict required: %r', x)
|
||||
raise PDFTypeError('Dict required: %r' % x)
|
||||
return {}
|
||||
return x
|
||||
|
@ -237,9 +236,7 @@ def decompress_corrupted(data):
|
|||
except zlib.error:
|
||||
# Let the error propagates if we're not yet in the CRC checksum
|
||||
if i < len(data) - 3:
|
||||
# Import here to prevent circualr import
|
||||
from .pdfdocument import PDFEncryptionWarning
|
||||
warnings.warn("Data-loss while decompressing corrupted data", PDFEncryptionWarning)
|
||||
logger.warning("Data-loss while decompressing corrupted data")
|
||||
return result_str
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import warnings
|
||||
import unittest
|
||||
import logging
|
||||
from nose.tools import raises
|
||||
from helpers import absolute_sample_path
|
||||
from tempfilepath import TemporaryFilePath
|
||||
from pdfminer.pdfdocument import PDFNoValidXRefWarning
|
||||
from tools import dumppdf
|
||||
|
||||
|
||||
|
@ -18,12 +18,9 @@ def run(filename, options=None):
|
|||
dumppdf.main(s.split(' ')[1:])
|
||||
|
||||
|
||||
class TestDumpPDF():
|
||||
class TestDumpPDF(unittest.TestCase):
|
||||
def test_simple1(self):
|
||||
"""dumppdf.py simple1.pdf raises a warning because it has no xref"""
|
||||
with warnings.catch_warnings(record=True) as ws:
|
||||
run('simple1.pdf', '-t -a')
|
||||
assert any(w.category == PDFNoValidXRefWarning for w in ws)
|
||||
|
||||
def test_simple2(self):
|
||||
run('simple2.pdf', '-t -a')
|
||||
|
@ -32,10 +29,7 @@ class TestDumpPDF():
|
|||
run('jo.pdf', '-t -a')
|
||||
|
||||
def test_simple3(self):
|
||||
"""dumppdf.py simple3.pdf raises a warning because it has no xref"""
|
||||
with warnings.catch_warnings(record=True) as ws:
|
||||
run('simple3.pdf', '-t -a')
|
||||
assert any(w.category == PDFNoValidXRefWarning for w in ws)
|
||||
|
||||
def test_2(self):
|
||||
run('nonfree/dmca.pdf', '-t -a')
|
||||
|
|
|
@ -6,12 +6,10 @@ import re
|
|||
import sys
|
||||
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
|
||||
Union, cast
|
||||
import warnings
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import pdfminer
|
||||
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback, \
|
||||
PDFNoValidXRefWarning
|
||||
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
|
||||
from pdfminer.pdfpage import PDFPage
|
||||
from pdfminer.pdfparser import PDFParser
|
||||
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
|
||||
|
@ -20,6 +18,7 @@ from pdfminer.psparser import PSKeyword, PSLiteral, LIT
|
|||
from pdfminer.utils import isnumber
|
||||
|
||||
logging.basicConfig()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
||||
|
||||
|
@ -115,7 +114,7 @@ def dumptrailers(
|
|||
msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
|
||||
'you want to display the content of a fallback xref that ' \
|
||||
'contains all objects.'
|
||||
warnings.warn(msg, PDFNoValidXRefWarning)
|
||||
logger.warning(msg)
|
||||
return
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue