Use visible imports in highlevel.rst documentation (#609)
* add missing import for extract_text_to_fp * Replace testsetup with visible imports in documentation * Remove obsolete check for python version; python 2 is not supported anymore * (Unrelated to this MR) Remove sys from converter.py * Optimize imports * (Unrelated to this MR) fix line length error Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>pull/610/head
parent
1d33c026e4
commit
7f54cefe02
|
@ -1,8 +1,3 @@
|
|||
.. testsetup::
|
||||
|
||||
import sys
|
||||
from pdfminer.high_level import extract_text_to_fp, extract_text
|
||||
|
||||
.. _tutorial_highlevel:
|
||||
|
||||
Extract text from a PDF using Python
|
||||
|
@ -15,6 +10,7 @@ The most simple way to extract text from a PDF is to use
|
|||
|
||||
.. doctest::
|
||||
|
||||
>>> from pdfminer.high_level import extract_text
|
||||
>>> text = extract_text('samples/simple1.pdf')
|
||||
>>> print(repr(text))
|
||||
'Hello \n\nWorld\n\nHello \n\nWorld\n\nH e l l o \n\nW o r l d\n\nH e l l o \n\nW o r l d\n\n\x0c'
|
||||
|
@ -42,10 +38,8 @@ To read text from a PDF and print it on the command line:
|
|||
|
||||
.. doctest::
|
||||
|
||||
>>> if sys.version_info > (3, 0):
|
||||
... from io import StringIO
|
||||
... else:
|
||||
... from io import BytesIO as StringIO
|
||||
>>> from io import StringIO
|
||||
>>> from pdfminer.high_level import extract_text_to_fp
|
||||
>>> output_string = StringIO()
|
||||
>>> with open('samples/simple1.pdf', 'rb') as fin:
|
||||
... extract_text_to_fp(fin, output_string)
|
||||
|
@ -56,10 +50,8 @@ Or to convert it to html and use layout analysis:
|
|||
|
||||
.. doctest::
|
||||
|
||||
>>> if sys.version_info > (3, 0):
|
||||
... from io import StringIO
|
||||
... else:
|
||||
... from io import BytesIO as StringIO
|
||||
>>> from io import StringIO
|
||||
>>> from pdfminer.high_level import extract_text_to_fp
|
||||
>>> from pdfminer.layout import LAParams
|
||||
>>> output_string = StringIO()
|
||||
>>> with open('samples/simple1.pdf', 'rb') as fin:
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import io
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
|
||||
from . import utils
|
||||
from .layout import LTChar
|
||||
|
@ -279,8 +278,6 @@ class HTMLConverter(PDFConverter):
|
|||
def write(self, text):
|
||||
if self.codec:
|
||||
text = text.encode(self.codec)
|
||||
if sys.version_info < (3, 0):
|
||||
text = str(text)
|
||||
self.outfp.write(text)
|
||||
return
|
||||
|
||||
|
|
|
@ -66,7 +66,8 @@ def dumpxml(out, obj, codec=None):
|
|||
out.write('\n</props>\n')
|
||||
if codec == 'text':
|
||||
data = obj.get_data()
|
||||
out.write('<data size="%d">%s</data>\n' % (len(data), escape(data)))
|
||||
out.write('<data size="%d">%s</data>\n'
|
||||
% (len(data), escape(data)))
|
||||
out.write('</stream>')
|
||||
return
|
||||
|
||||
|
|
Loading…
Reference in New Issue