trivial grammar errors

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@173 1aa58f4a-7d42-0410-adbc-911cccaed67c
2010-01-10 07:18:05 +00:00 · 2010-01-10 07:18:05 +00:00 · a9d7a00ccd
parent 665f2bd710
commit a9d7a00ccd
2 changed files with 18 additions and 14 deletions
--- a/docs/index.html
+++ b/docs/index.html
@ -19,7 +19,7 @@ Python PDF parser and analyzer

 <div align=right class=lastmod>
 <!-- hhmts start -->
-Last Modified: Mon Jan  4 21:44:43 JST 2010
+Last Modified: Mon Jan  4 23:23:00 JST 2010
 <!-- hhmts end -->
 </div>

@ -197,7 +197,7 @@ By default, it extracts texts from all the pages.
 <dd> Specifies the output format. The following formats are currently supported.
 <ul>
 <li> <code>text</code> : TEXT format. (Default)
-<li> <code>html</code> : HTML format. Not recommended for extraction purpose because the markup is very messy.
+<li> <code>html</code> : HTML format. Not recommended for extraction purposes because the markup is messy.
 <li> <code>xml</code> : XML format. Provides the most information available.
 <li> <code>tag</code> : "Tagged PDF" format. A tagged PDF has its own contents annotated with
 HTML-like tags. pdf2txt tries to extract its content streams rather than inferring its text locations.
@ -269,7 +269,7 @@ By default, it extracts all the pages in a document.
 <h3>dumppdf.py</h3>
 <p>
 <code>dumppdf.py</code> dumps the internal contents of a PDF file
-in pseudo-XML format. This program is primarily for debugging purpose,
+in pseudo-XML format. This program is primarily for debugging purposes,
 but it's also possible to extract some meaningful contents
 (such as images).

--- a/tools/pdf2html.cgi
+++ b/tools/pdf2html.cgi
@ -1,14 +1,14 @@
-#!/usr/bin/python
+#!/usr/bin/python -O
 #
-# pdf2html.cgi - Gateway for converting PDF into HTML.
+# pdf2html.cgi - Gateway script for converting PDF into HTML.
 #
 # Security consideration for public access:
 #
-#   Limit the process size and/or running time.
+#   Limit the process size and/or maximum cpu time.
 #   The process should be chrooted.
 #   The user should be imposed quota.
 #
-# Setup:
+# How to Setup:
 #   $ mkdir $CGIDIR
 #   $ mkdir $CGIDIR/var
 #   $ python setup.py install_lib --install-dir=$CGIDIR
@ -16,9 +16,10 @@
 #

 import sys
-# comment out at runtime.
-import cgitb; cgitb.enable()
+# comment out at this at runtime.
+#import cgitb; cgitb.enable()
 import os, os.path, re, cgi, time, random, codecs, logging, traceback
+import pdfminer
 from pdfminer.pdfinterp import PDFResourceManager, process_pdf
 from pdfminer.converter import HTMLConverter, TextConverter
 from pdfminer.layout import LAParams
@ -138,26 +139,29 @@ class PDF2HTMLApp(object):
          '<input type="submit" name="c" value="Convert to TEXT">\n',
          '<input type="reset" value="Reset">\n',
          '</form><hr>\n',
-          '<p>Powered by <a href="http://www.unixuser.org/~euske/python/pdfminer/">PDFMiner</a>\n',
+          '<p>Powered by <a href="http://www.unixuser.org/~euske/python/pdfminer/">PDFMiner</a>-%s\n' % pdfminer.__version__,
          '</body></html>\n',
          )
        return

    def run(self, argv):
-        logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s')
        if self.debug:
-            logging.basicConfig(level=logging.DEBUG)
+            logging.basicConfig(level=logging.DEBUG,
+                                format='%(asctime)s %(levelname)s %(message)s')
        else:
            logging.basicConfig(level=logging.ERROR,
+                                format='%(asctime)s %(levelname)s %(message)s',
                                filename=self.logpath, filemode='a')
        if self.path_info == '/':
            self.http_200()
            self.coverpage()
            return
        if self.path_info != self.APPURL:
+            logging.error('invalid path: %r' % self.path_info)
            self.http_404()
            return
        if not os.path.isdir(self.tmpdir):
+            logging.error('no tmpdir')
            self.bummer('error')
            return
        form = cgi.FieldStorage()
@ -180,7 +184,7 @@ class PDF2HTMLApp(object):
                    pagenos.append(int(m.group(0)))
                except ValueError:
                    pass
-        logging.info('process: host=%s, name=%r, pagenos=%r' %
+        logging.info('received: host=%s, name=%r, pagenos=%r' %
                     (self.remote_addr, item.filename, pagenos))
        h = abs(hash((random.random(), self.remote_addr, item.filename)))
        tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (self.cur_time, h))
@ -193,7 +197,7 @@ class PDF2HTMLApp(object):
                        maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
            except Exception, e:
                self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
-                logging.error('error: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
+                logging.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
        finally:
            try:
                os.remove(tmppath)