trivial grammar errors

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@173 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-01-10 07:18:05 +00:00
parent 665f2bd710
commit a9d7a00ccd
2 changed files with 18 additions and 14 deletions

View File

@ -19,7 +19,7 @@ Python PDF parser and analyzer
<div align=right class=lastmod>
<!-- hhmts start -->
Last Modified: Mon Jan 4 21:44:43 JST 2010
Last Modified: Mon Jan 4 23:23:00 JST 2010
<!-- hhmts end -->
</div>
@ -197,7 +197,7 @@ By default, it extracts texts from all the pages.
<dd> Specifies the output format. The following formats are currently supported.
<ul>
<li> <code>text</code> : TEXT format. (Default)
<li> <code>html</code> : HTML format. Not recommended for extraction purpose because the markup is very messy.
<li> <code>html</code> : HTML format. Not recommended for extraction purposes because the markup is messy.
<li> <code>xml</code> : XML format. Provides the most information available.
<li> <code>tag</code> : "Tagged PDF" format. A tagged PDF has its own contents annotated with
HTML-like tags. pdf2txt tries to extract its content streams rather than inferring its text locations.
@ -269,7 +269,7 @@ By default, it extracts all the pages in a document.
<h3>dumppdf.py</h3>
<p>
<code>dumppdf.py</code> dumps the internal contents of a PDF file
in pseudo-XML format. This program is primarily for debugging purpose,
in pseudo-XML format. This program is primarily for debugging purposes,
but it's also possible to extract some meaningful contents
(such as images).

View File

@ -1,14 +1,14 @@
#!/usr/bin/python
#!/usr/bin/python -O
#
# pdf2html.cgi - Gateway for converting PDF into HTML.
# pdf2html.cgi - Gateway script for converting PDF into HTML.
#
# Security consideration for public access:
#
# Limit the process size and/or running time.
# Limit the process size and/or maximum cpu time.
# The process should be chrooted.
# The user should be imposed quota.
#
# Setup:
# How to Setup:
# $ mkdir $CGIDIR
# $ mkdir $CGIDIR/var
# $ python setup.py install_lib --install-dir=$CGIDIR
@ -16,9 +16,10 @@
#
import sys
# comment out at runtime.
import cgitb; cgitb.enable()
# comment out at this at runtime.
#import cgitb; cgitb.enable()
import os, os.path, re, cgi, time, random, codecs, logging, traceback
import pdfminer
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import HTMLConverter, TextConverter
from pdfminer.layout import LAParams
@ -138,26 +139,29 @@ class PDF2HTMLApp(object):
'<input type="submit" name="c" value="Convert to TEXT">\n',
'<input type="reset" value="Reset">\n',
'</form><hr>\n',
'<p>Powered by <a href="http://www.unixuser.org/~euske/python/pdfminer/">PDFMiner</a>\n',
'<p>Powered by <a href="http://www.unixuser.org/~euske/python/pdfminer/">PDFMiner</a>-%s\n' % pdfminer.__version__,
'</body></html>\n',
)
return
def run(self, argv):
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s')
if self.debug:
logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s')
else:
logging.basicConfig(level=logging.ERROR,
format='%(asctime)s %(levelname)s %(message)s',
filename=self.logpath, filemode='a')
if self.path_info == '/':
self.http_200()
self.coverpage()
return
if self.path_info != self.APPURL:
logging.error('invalid path: %r' % self.path_info)
self.http_404()
return
if not os.path.isdir(self.tmpdir):
logging.error('no tmpdir')
self.bummer('error')
return
form = cgi.FieldStorage()
@ -180,7 +184,7 @@ class PDF2HTMLApp(object):
pagenos.append(int(m.group(0)))
except ValueError:
pass
logging.info('process: host=%s, name=%r, pagenos=%r' %
logging.info('received: host=%s, name=%r, pagenos=%r' %
(self.remote_addr, item.filename, pagenos))
h = abs(hash((random.random(), self.remote_addr, item.filename)))
tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (self.cur_time, h))
@ -193,7 +197,7 @@ class PDF2HTMLApp(object):
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
except Exception, e:
self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
logging.error('error: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
logging.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
finally:
try:
os.remove(tmppath)