Use logger.warn instead of warnings.warn if warning cannot be prevented by user (#673)
* Use logging.Logger.warning instead of warning.warn in most cases, following the Python official guidance that warning.warn is directed at _developers_, not users * (pdfdocument.py) remove declarations of PDFTextExtractionNotAllowedWarning, PDFNoValidXRefWarning * (pdfpage.py) Don't import warning, don't use PDFTextExtractionNotAllowedWarning * (tools/dumppdf.py) Don't import warning, don't use PDFNoValidXRefWarning * (tests/test_tools_dumppdf.py) Don't import warning, check for logging.WARN rather than PDFNoValidXRefWarning * get name right * make flake8 happy * Keep warning classes such that this does not crash code when these warnings are explictly ignored * Update changelog to include pr ref * Small textual change * Remove patch * No need for testing if the warning is actually raised. The test_tootls_dumppdf.py are just test cases if these pdfs are supported. * Use logger as name for logger * Add docs to legacy warnings * Use logger.Logger.warn for failed decompression * Add reference to docs describing when to use logger and warnings Co-authored-by: Henry S. Thompson <ht@home.hst.name> Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>pull/684/head^2
parent
c4ac514984
commit
dc530f3a6f
|
@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
|
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
|
||||||
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
|
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
|
||||||
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
|
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
|
||||||
|
- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
|
||||||
- Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677))
|
- Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677))
|
||||||
|
|
||||||
## [20201018]
|
## [20201018]
|
||||||
|
|
|
@ -25,6 +25,10 @@ class PDFNoValidXRef(PDFSyntaxError):
|
||||||
|
|
||||||
|
|
||||||
class PDFNoValidXRefWarning(SyntaxWarning):
|
class PDFNoValidXRefWarning(SyntaxWarning):
|
||||||
|
"""Legacy warning for missing xref.
|
||||||
|
|
||||||
|
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,10 +45,18 @@ class PDFEncryptionError(PDFException):
|
||||||
|
|
||||||
|
|
||||||
class PDFEncryptionWarning(UserWarning):
|
class PDFEncryptionWarning(UserWarning):
|
||||||
|
"""Legacy warning for failed decryption.
|
||||||
|
|
||||||
|
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PDFTextExtractionNotAllowedWarning(UserWarning):
|
class PDFTextExtractionNotAllowedWarning(UserWarning):
|
||||||
|
"""Legacy warning for PDF that does not allow extraction.
|
||||||
|
|
||||||
|
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
from pdfminer.utils import Rect
|
from pdfminer.utils import Rect
|
||||||
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
|
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
|
||||||
import warnings
|
|
||||||
from . import settings
|
from . import settings
|
||||||
from .psparser import LIT
|
from .psparser import LIT
|
||||||
from .pdftypes import PDFObjectNotFound
|
from .pdftypes import PDFObjectNotFound
|
||||||
|
@ -11,7 +10,6 @@ from .pdftypes import list_value
|
||||||
from .pdftypes import dict_value
|
from .pdftypes import dict_value
|
||||||
from .pdfparser import PDFParser
|
from .pdfparser import PDFParser
|
||||||
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
|
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
|
||||||
from .pdfdocument import PDFTextExtractionNotAllowedWarning
|
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
@ -155,8 +153,9 @@ class PDFPage:
|
||||||
warning_msg = 'The PDF %r contains a metadata field '\
|
warning_msg = 'The PDF %r contains a metadata field '\
|
||||||
'indicating that it should not allow ' \
|
'indicating that it should not allow ' \
|
||||||
'text extraction. Ignoring this field ' \
|
'text extraction. Ignoring this field ' \
|
||||||
'and proceeding.' % fp
|
'and proceeding. Use the check_extractable ' \
|
||||||
warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning)
|
'if you want to raise an error in this case' % fp
|
||||||
|
log.warning(warning_msg)
|
||||||
# Process each page contained in the document.
|
# Process each page contained in the document.
|
||||||
for (pageno, page) in enumerate(cls.create_pages(doc)):
|
for (pageno, page) in enumerate(cls.create_pages(doc)):
|
||||||
if pagenos and (pageno not in pagenos):
|
if pagenos and (pageno not in pagenos):
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import zlib
|
import zlib
|
||||||
import warnings
|
|
||||||
import logging
|
import logging
|
||||||
import io
|
import io
|
||||||
import sys
|
import sys
|
||||||
|
@ -21,7 +20,7 @@ if TYPE_CHECKING:
|
||||||
from .pdfdocument import PDFDocument
|
from .pdfdocument import PDFDocument
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
LITERAL_CRYPT = LIT('Crypt')
|
LITERAL_CRYPT = LIT('Crypt')
|
||||||
|
|
||||||
|
@ -205,7 +204,7 @@ def dict_value(x: object) -> Dict[Any, Any]:
|
||||||
x = resolve1(x)
|
x = resolve1(x)
|
||||||
if not isinstance(x, dict):
|
if not isinstance(x, dict):
|
||||||
if settings.STRICT:
|
if settings.STRICT:
|
||||||
log.error('PDFTypeError : Dict required: %r', x)
|
logger.error('PDFTypeError : Dict required: %r', x)
|
||||||
raise PDFTypeError('Dict required: %r' % x)
|
raise PDFTypeError('Dict required: %r' % x)
|
||||||
return {}
|
return {}
|
||||||
return x
|
return x
|
||||||
|
@ -237,9 +236,7 @@ def decompress_corrupted(data):
|
||||||
except zlib.error:
|
except zlib.error:
|
||||||
# Let the error propagates if we're not yet in the CRC checksum
|
# Let the error propagates if we're not yet in the CRC checksum
|
||||||
if i < len(data) - 3:
|
if i < len(data) - 3:
|
||||||
# Import here to prevent circualr import
|
logger.warning("Data-loss while decompressing corrupted data")
|
||||||
from .pdfdocument import PDFEncryptionWarning
|
|
||||||
warnings.warn("Data-loss while decompressing corrupted data", PDFEncryptionWarning)
|
|
||||||
return result_str
|
return result_str
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import warnings
|
import unittest
|
||||||
|
import logging
|
||||||
from nose.tools import raises
|
from nose.tools import raises
|
||||||
from helpers import absolute_sample_path
|
from helpers import absolute_sample_path
|
||||||
from tempfilepath import TemporaryFilePath
|
from tempfilepath import TemporaryFilePath
|
||||||
from pdfminer.pdfdocument import PDFNoValidXRefWarning
|
|
||||||
from tools import dumppdf
|
from tools import dumppdf
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,12 +18,9 @@ def run(filename, options=None):
|
||||||
dumppdf.main(s.split(' ')[1:])
|
dumppdf.main(s.split(' ')[1:])
|
||||||
|
|
||||||
|
|
||||||
class TestDumpPDF():
|
class TestDumpPDF(unittest.TestCase):
|
||||||
def test_simple1(self):
|
def test_simple1(self):
|
||||||
"""dumppdf.py simple1.pdf raises a warning because it has no xref"""
|
run('simple1.pdf', '-t -a')
|
||||||
with warnings.catch_warnings(record=True) as ws:
|
|
||||||
run('simple1.pdf', '-t -a')
|
|
||||||
assert any(w.category == PDFNoValidXRefWarning for w in ws)
|
|
||||||
|
|
||||||
def test_simple2(self):
|
def test_simple2(self):
|
||||||
run('simple2.pdf', '-t -a')
|
run('simple2.pdf', '-t -a')
|
||||||
|
@ -32,10 +29,7 @@ class TestDumpPDF():
|
||||||
run('jo.pdf', '-t -a')
|
run('jo.pdf', '-t -a')
|
||||||
|
|
||||||
def test_simple3(self):
|
def test_simple3(self):
|
||||||
"""dumppdf.py simple3.pdf raises a warning because it has no xref"""
|
run('simple3.pdf', '-t -a')
|
||||||
with warnings.catch_warnings(record=True) as ws:
|
|
||||||
run('simple3.pdf', '-t -a')
|
|
||||||
assert any(w.category == PDFNoValidXRefWarning for w in ws)
|
|
||||||
|
|
||||||
def test_2(self):
|
def test_2(self):
|
||||||
run('nonfree/dmca.pdf', '-t -a')
|
run('nonfree/dmca.pdf', '-t -a')
|
||||||
|
|
|
@ -6,12 +6,10 @@ import re
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
|
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
|
||||||
Union, cast
|
Union, cast
|
||||||
import warnings
|
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
import pdfminer
|
import pdfminer
|
||||||
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback, \
|
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
|
||||||
PDFNoValidXRefWarning
|
|
||||||
from pdfminer.pdfpage import PDFPage
|
from pdfminer.pdfpage import PDFPage
|
||||||
from pdfminer.pdfparser import PDFParser
|
from pdfminer.pdfparser import PDFParser
|
||||||
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
|
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
|
||||||
|
@ -20,6 +18,7 @@ from pdfminer.psparser import PSKeyword, PSLiteral, LIT
|
||||||
from pdfminer.utils import isnumber
|
from pdfminer.utils import isnumber
|
||||||
|
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
||||||
|
|
||||||
|
@ -115,7 +114,7 @@ def dumptrailers(
|
||||||
msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
|
msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
|
||||||
'you want to display the content of a fallback xref that ' \
|
'you want to display the content of a fallback xref that ' \
|
||||||
'contains all objects.'
|
'contains all objects.'
|
||||||
warnings.warn(msg, PDFNoValidXRefWarning)
|
logger.warning(msg)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue