Remove supoprt for non standard output streams that are not binary by removing the try-except check that writes a unicode character to the stream (#523)
Closes #191 * Remove supoprt for non standard output streams that are not binary by removing the try-except check that writes a unicode character to the stream * Add docstring * Fix flake8pull/484/head^2
parent
fc75972bbd
commit
f8e6ad6ac1
|
@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|||
## Removed
|
||||
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
|
||||
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
|
||||
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
|
||||
|
||||
## [20201018]
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import io
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
|
@ -167,24 +168,22 @@ class PDFConverter(PDFLayoutAnalyzer):
|
|||
laparams=laparams)
|
||||
self.outfp = outfp
|
||||
self.codec = codec
|
||||
if hasattr(self.outfp, 'mode'):
|
||||
if 'b' in self.outfp.mode:
|
||||
self.outfp_binary = True
|
||||
else:
|
||||
self.outfp_binary = False
|
||||
else:
|
||||
import io
|
||||
if isinstance(self.outfp, io.BytesIO):
|
||||
self.outfp_binary = True
|
||||
elif isinstance(self.outfp, io.StringIO):
|
||||
self.outfp_binary = False
|
||||
else:
|
||||
try:
|
||||
self.outfp.write("é")
|
||||
self.outfp_binary = False
|
||||
except TypeError:
|
||||
self.outfp_binary = True
|
||||
return
|
||||
self.outfp_binary = self._is_binary_stream(self.outfp)
|
||||
|
||||
@staticmethod
|
||||
def _is_binary_stream(outfp):
|
||||
"""Test if an stream is binary or not"""
|
||||
if 'b' in getattr(outfp, 'mode', ''):
|
||||
return True
|
||||
elif hasattr(outfp, 'mode'):
|
||||
# output stream has a mode, but it does not contain 'b'
|
||||
return False
|
||||
elif isinstance(outfp, io.BytesIO):
|
||||
return True
|
||||
elif isinstance(outfp, io.StringIO):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class TextConverter(PDFConverter):
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
from nose.tools import assert_equal
|
||||
import io
|
||||
from tempfile import TemporaryFile
|
||||
|
||||
from pdfminer.converter import PDFLayoutAnalyzer
|
||||
from nose.tools import assert_equal, assert_false, assert_true
|
||||
|
||||
from pdfminer.converter import PDFLayoutAnalyzer, PDFConverter
|
||||
from pdfminer.layout import LTContainer, LTRect, LTCurve
|
||||
from pdfminer.pdfinterp import PDFGraphicState
|
||||
|
||||
|
@ -99,3 +102,22 @@ class TestPaintPath():
|
|||
analyzer = PDFLayoutAnalyzer(None)
|
||||
analyzer.set_ctm([1, 0, 0, 1, 0, 0])
|
||||
return analyzer
|
||||
|
||||
|
||||
class TestBinaryDetector():
|
||||
def test_stringio(self):
|
||||
assert_false(PDFConverter._is_binary_stream(io.StringIO()))
|
||||
|
||||
def test_bytesio(self):
|
||||
assert_true(PDFConverter._is_binary_stream(io.BytesIO()))
|
||||
|
||||
def test_tmpfile(self):
|
||||
with TemporaryFile(mode='w') as f:
|
||||
assert_false(PDFConverter._is_binary_stream(f))
|
||||
|
||||
def test_binary_tmpfile(self):
|
||||
with TemporaryFile(mode='wb') as f:
|
||||
assert_true(PDFConverter._is_binary_stream(f))
|
||||
|
||||
def test_non_file_like_object_defaults_to_binary(self):
|
||||
assert_true(PDFConverter._is_binary_stream(object()))
|
||||
|
|
Loading…
Reference in New Issue