34 lines
1.2 KiB
ReStructuredText
34 lines
1.2 KiB
ReStructuredText
.. _tutorial_composable:
|
|
|
|
Get started using the composable components API
|
|
***********************************************
|
|
|
|
The command line tools and the high-level API are just shortcuts for often
|
|
used combinations of pdfminer.six components. You can use these components to
|
|
modify pdfminer.six to your own needs.
|
|
|
|
For example, to extract the text from a PDF file and save it in a python
|
|
variable::
|
|
|
|
from io import StringIO
|
|
|
|
from pdfminer.converter import TextConverter
|
|
from pdfminer.layout import LAParams
|
|
from pdfminer.pdfdocument import PDFDocument
|
|
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
from pdfminer.pdfpage import PDFPage
|
|
from pdfminer.pdfparser import PDFParser
|
|
|
|
output_string = StringIO()
|
|
with open('samples/simple1.pdf', 'rb') as in_file:
|
|
parser = PDFParser(in_file)
|
|
doc = PDFDocument(parser)
|
|
rsrcmgr = PDFResourceManager()
|
|
device = TextConverter(rsrcmgr, output_string, laparams=LAParams())
|
|
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
|
for page in PDFPage.create_pages(doc):
|
|
interpreter.process_page(page)
|
|
|
|
print(output_string.getvalue())
|
|
|