Remove supoprt for non standard output streams that are not binary by removing the try-except check that writes a unicode character to the stream (#523)

Closes #191 

* Remove supoprt for non standard output streams that are not binary by removing the try-except check that writes a unicode character to the stream

* Add docstring

* Fix flake8
pull/484/head^2
Pieter Marsman 2020-10-25 14:37:12 +01:00 committed by GitHub
parent fc75972bbd
commit f8e6ad6ac1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 20 deletions

View File

@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## Removed
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
## [20201018]

View File

@ -1,3 +1,4 @@
import io
import logging
import re
import sys
@ -167,24 +168,22 @@ class PDFConverter(PDFLayoutAnalyzer):
laparams=laparams)
self.outfp = outfp
self.codec = codec
if hasattr(self.outfp, 'mode'):
if 'b' in self.outfp.mode:
self.outfp_binary = True
else:
self.outfp_binary = False
else:
import io
if isinstance(self.outfp, io.BytesIO):
self.outfp_binary = True
elif isinstance(self.outfp, io.StringIO):
self.outfp_binary = False
else:
try:
self.outfp.write("é")
self.outfp_binary = False
except TypeError:
self.outfp_binary = True
return
self.outfp_binary = self._is_binary_stream(self.outfp)
@staticmethod
def _is_binary_stream(outfp):
"""Test if an stream is binary or not"""
if 'b' in getattr(outfp, 'mode', ''):
return True
elif hasattr(outfp, 'mode'):
# output stream has a mode, but it does not contain 'b'
return False
elif isinstance(outfp, io.BytesIO):
return True
elif isinstance(outfp, io.StringIO):
return False
return True
class TextConverter(PDFConverter):

View File

@ -1,6 +1,9 @@
from nose.tools import assert_equal
import io
from tempfile import TemporaryFile
from pdfminer.converter import PDFLayoutAnalyzer
from nose.tools import assert_equal, assert_false, assert_true
from pdfminer.converter import PDFLayoutAnalyzer, PDFConverter
from pdfminer.layout import LTContainer, LTRect, LTCurve
from pdfminer.pdfinterp import PDFGraphicState
@ -99,3 +102,22 @@ class TestPaintPath():
analyzer = PDFLayoutAnalyzer(None)
analyzer.set_ctm([1, 0, 0, 1, 0, 0])
return analyzer
class TestBinaryDetector():
def test_stringio(self):
assert_false(PDFConverter._is_binary_stream(io.StringIO()))
def test_bytesio(self):
assert_true(PDFConverter._is_binary_stream(io.BytesIO()))
def test_tmpfile(self):
with TemporaryFile(mode='w') as f:
assert_false(PDFConverter._is_binary_stream(f))
def test_binary_tmpfile(self):
with TemporaryFile(mode='wb') as f:
assert_true(PDFConverter._is_binary_stream(f))
def test_non_file_like_object_defaults_to_binary(self):
assert_true(PDFConverter._is_binary_stream(object()))