diff --git a/docs/usage.html b/docs/usage.html index b2fb787..b1b5677 100644 --- a/docs/usage.html +++ b/docs/usage.html @@ -25,6 +25,10 @@ from other applications.
A typical way to parse a PDF file is the following:
+from pdfminer.pdfparser import PDFParser, PDFDocument +from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter +from pdfminer.pdfdevice import PDFDevice + # Open a PDF file. fp = open('mypdf.pdf', 'rb') # Create a PDF parser object associated with the file object. @@ -34,7 +38,7 @@ doc = PDFDocument() # Connect the parser and document objects. parser.set_document(doc) doc.set_parser(parser) -# Supply the document password for initialization. +# Supply the password for initialization. # (If no password is set, give an empty string.) doc.initialize(password) # Check if the document allows text extraction. If not, abort. @@ -52,12 +56,12 @@ for page in doc.get_pages():
-In PDFMiner, there are several objects involved in parsing a PDF file. -Figure 1. shows the relationships between these objects. +In PDFMiner, there are several objects involved in parsing a PDF file, +as shown in Figure 1.
+from pdfminer.layout import LAParams +from pdfminer.converter import PDFPageAggregator + # Set parameters for analysis. laparams = LAParams() # Create a PDF page aggregator object.