diff --git a/docs/source/tutorial/highlevel.rst b/docs/source/tutorial/highlevel.rst index ffca472..fff47cb 100644 --- a/docs/source/tutorial/highlevel.rst +++ b/docs/source/tutorial/highlevel.rst @@ -1,8 +1,3 @@ -.. testsetup:: - - import sys - from pdfminer.high_level import extract_text_to_fp, extract_text - .. _tutorial_highlevel: Extract text from a PDF using Python @@ -15,6 +10,7 @@ The most simple way to extract text from a PDF is to use .. doctest:: + >>> from pdfminer.high_level import extract_text >>> text = extract_text('samples/simple1.pdf') >>> print(repr(text)) 'Hello \n\nWorld\n\nHello \n\nWorld\n\nH e l l o \n\nW o r l d\n\nH e l l o \n\nW o r l d\n\n\x0c' @@ -42,10 +38,8 @@ To read text from a PDF and print it on the command line: .. doctest:: - >>> if sys.version_info > (3, 0): - ... from io import StringIO - ... else: - ... from io import BytesIO as StringIO + >>> from io import StringIO + >>> from pdfminer.high_level import extract_text_to_fp >>> output_string = StringIO() >>> with open('samples/simple1.pdf', 'rb') as fin: ... extract_text_to_fp(fin, output_string) @@ -56,10 +50,8 @@ Or to convert it to html and use layout analysis: .. doctest:: - >>> if sys.version_info > (3, 0): - ... from io import StringIO - ... else: - ... from io import BytesIO as StringIO + >>> from io import StringIO + >>> from pdfminer.high_level import extract_text_to_fp >>> from pdfminer.layout import LAParams >>> output_string = StringIO() >>> with open('samples/simple1.pdf', 'rb') as fin: diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 4b2b62e..de58f30 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,7 +1,6 @@ import io import logging import re -import sys from . import utils from .layout import LTChar @@ -279,8 +278,6 @@ class HTMLConverter(PDFConverter): def write(self, text): if self.codec: text = text.encode(self.codec) - if sys.version_info < (3, 0): - text = str(text) self.outfp.write(text) return diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 8baddc8..8724c81 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -66,7 +66,8 @@ def dumpxml(out, obj, codec=None): out.write('\n\n') if codec == 'text': data = obj.get_data() - out.write('%s\n' % (len(data), escape(data))) + out.write('%s\n' + % (len(data), escape(data))) out.write('') return