Remove supoprt for non standard output streams that are not binary by removing the try-except check that writes a unicode character to the stream (#523)
Closes #191 * Remove supoprt for non standard output streams that are not binary by removing the try-except check that writes a unicode character to the stream * Add docstring * Fix flake8pull/484/head^2
parent
fc75972bbd
commit
f8e6ad6ac1
|
@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
## Removed
|
## Removed
|
||||||
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
|
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
|
||||||
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
|
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
|
||||||
|
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
|
||||||
|
|
||||||
## [20201018]
|
## [20201018]
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import io
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
@ -167,24 +168,22 @@ class PDFConverter(PDFLayoutAnalyzer):
|
||||||
laparams=laparams)
|
laparams=laparams)
|
||||||
self.outfp = outfp
|
self.outfp = outfp
|
||||||
self.codec = codec
|
self.codec = codec
|
||||||
if hasattr(self.outfp, 'mode'):
|
self.outfp_binary = self._is_binary_stream(self.outfp)
|
||||||
if 'b' in self.outfp.mode:
|
|
||||||
self.outfp_binary = True
|
@staticmethod
|
||||||
else:
|
def _is_binary_stream(outfp):
|
||||||
self.outfp_binary = False
|
"""Test if an stream is binary or not"""
|
||||||
else:
|
if 'b' in getattr(outfp, 'mode', ''):
|
||||||
import io
|
return True
|
||||||
if isinstance(self.outfp, io.BytesIO):
|
elif hasattr(outfp, 'mode'):
|
||||||
self.outfp_binary = True
|
# output stream has a mode, but it does not contain 'b'
|
||||||
elif isinstance(self.outfp, io.StringIO):
|
return False
|
||||||
self.outfp_binary = False
|
elif isinstance(outfp, io.BytesIO):
|
||||||
else:
|
return True
|
||||||
try:
|
elif isinstance(outfp, io.StringIO):
|
||||||
self.outfp.write("é")
|
return False
|
||||||
self.outfp_binary = False
|
|
||||||
except TypeError:
|
return True
|
||||||
self.outfp_binary = True
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
class TextConverter(PDFConverter):
|
class TextConverter(PDFConverter):
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
from nose.tools import assert_equal
|
import io
|
||||||
|
from tempfile import TemporaryFile
|
||||||
|
|
||||||
from pdfminer.converter import PDFLayoutAnalyzer
|
from nose.tools import assert_equal, assert_false, assert_true
|
||||||
|
|
||||||
|
from pdfminer.converter import PDFLayoutAnalyzer, PDFConverter
|
||||||
from pdfminer.layout import LTContainer, LTRect, LTCurve
|
from pdfminer.layout import LTContainer, LTRect, LTCurve
|
||||||
from pdfminer.pdfinterp import PDFGraphicState
|
from pdfminer.pdfinterp import PDFGraphicState
|
||||||
|
|
||||||
|
@ -99,3 +102,22 @@ class TestPaintPath():
|
||||||
analyzer = PDFLayoutAnalyzer(None)
|
analyzer = PDFLayoutAnalyzer(None)
|
||||||
analyzer.set_ctm([1, 0, 0, 1, 0, 0])
|
analyzer.set_ctm([1, 0, 0, 1, 0, 0])
|
||||||
return analyzer
|
return analyzer
|
||||||
|
|
||||||
|
|
||||||
|
class TestBinaryDetector():
|
||||||
|
def test_stringio(self):
|
||||||
|
assert_false(PDFConverter._is_binary_stream(io.StringIO()))
|
||||||
|
|
||||||
|
def test_bytesio(self):
|
||||||
|
assert_true(PDFConverter._is_binary_stream(io.BytesIO()))
|
||||||
|
|
||||||
|
def test_tmpfile(self):
|
||||||
|
with TemporaryFile(mode='w') as f:
|
||||||
|
assert_false(PDFConverter._is_binary_stream(f))
|
||||||
|
|
||||||
|
def test_binary_tmpfile(self):
|
||||||
|
with TemporaryFile(mode='wb') as f:
|
||||||
|
assert_true(PDFConverter._is_binary_stream(f))
|
||||||
|
|
||||||
|
def test_non_file_like_object_defaults_to_binary(self):
|
||||||
|
assert_true(PDFConverter._is_binary_stream(object()))
|
||||||
|
|
Loading…
Reference in New Issue