webapp fixed

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@283 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-12-25 08:41:35 +00:00
parent 5d98a27d9c
commit 866f2bbb75
3 changed files with 144 additions and 30 deletions

View File

@ -5,4 +5,4 @@ RM=rm -f
all:
clean:
-$(RM) *.pyc *.pyo
-$(RM) *.pyc *.pyo *.cgic *.cgio

View File

@ -77,21 +77,23 @@ class WebApp(object):
TITLE = 'pdf2html demo'
MAXFILESIZE = 10000000 # set to zero if unlimited.
MAXPAGES = 10 # set to zero if unlimited.
MAXPAGES = 100 # set to zero if unlimited.
def __init__(self, infp=sys.stdin, outfp=sys.stdout, environ=os.environ,
codec='utf-8', apppath='/'):
self.infp = infp
self.outfp = outfp
self.environ = environ
self.codec = codec
self.apppath = apppath
self.remote_addr = environ.get('REMOTE_ADDR')
self.path_info = environ.get('PATH_INFO')
self.method = environ.get('REQUEST_METHOD', 'GET').upper()
self.server = environ.get('SERVER_SOFTWARE', '')
self.tmpdir = environ.get('TEMP', './var/')
self.remote_addr = self.environ.get('REMOTE_ADDR')
self.path_info = self.environ.get('PATH_INFO')
self.method = self.environ.get('REQUEST_METHOD', 'GET').upper()
self.server = self.environ.get('SERVER_SOFTWARE', '')
self.tmpdir = self.environ.get('TEMP', './var/')
self.content_type = 'text/html; charset=%s' % codec
self.logger = logging.getLogger()
logging.basicConfig(level=10,stream=sys.stderr)
return
def put(self, *args):
@ -102,7 +104,7 @@ class WebApp(object):
self.outfp.write(x.encode(self.codec, 'xmlcharrefreplace'))
return
def http_200(self):
def response_200(self):
if self.server.startswith('cgi-httpd'):
# required for cgi-httpd
self.outfp.write('HTTP/1.0 200 OK\r\n')
@ -110,7 +112,7 @@ class WebApp(object):
self.outfp.write('Connection: close\r\n\r\n')
return
def http_404(self):
def response_404(self):
if self.server.startswith('cgi-httpd'):
# required for cgi-httpd
self.outfp.write('HTTP/1.0 404 Not Found\r\n')
@ -119,7 +121,7 @@ class WebApp(object):
self.outfp.write('<html><body>page does not exist</body></body>\n')
return
def http_301(self, url):
def response_301(self, url):
if self.server.startswith('cgi-httpd'):
# required for cgi-httpd
self.outfp.write('HTTP/1.0 301 Moved\r\n')
@ -146,53 +148,52 @@ class WebApp(object):
return
def setup(self):
self.run = self.response_404
status = 404
if not os.path.isdir(self.tmpdir):
self.logger.error('no tmpdir')
status = 304
elif self.path_info != self.apppath:
status = 404
else:
elif self.path_info == self.apppath:
self.run = self.convert
status = 200
self._status = status
return status
def run(self):
form = cgi.FieldStorage(self.infp)
if self._status != 200:
self.http_404()
return
def convert(self):
self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
if (self.method != 'POST' or
'c' not in form or
'f' not in form):
'c' not in self.form or
'f' not in self.form):
self.response_200()
self.coverpage()
return
item = form['f']
item = self.form['f']
if not (item.file and item.filename):
self.response_200()
self.coverpage()
return
cmd = form.getvalue('c')
cmd = self.form.getvalue('c')
html = (cmd == 'Convert to HTML')
pagenos = []
if 'p' in form:
for m in re.finditer(r'\d+', form.getvalue('p')):
if 'p' in self.form:
for m in re.finditer(r'\d+', self.form.getvalue('p')):
try:
pagenos.append(int(m.group(0)))
except ValueError:
pass
self.logger.info('received: host=%s, name=%r, pagenos=%r' %
(self.remote_addr, item.filename, pagenos))
h = abs(hash((random.random(), self.remote_addr, item.filename)))
tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (time.time(), h))
self.logger.info('received: host=%s, name=%r, pagenos=%r, tmppath=%r' %
(self.remote_addr, item.filename, pagenos, tmppath))
try:
if not html:
self.content_type = 'text/plain; charset=%s' % self.codec
self.http_200()
self.response_200()
try:
convert(item.file, sys.stdout, tmppath, pagenos=pagenos, codec=self.codec,
convert(item.file, self.outfp, tmppath, pagenos=pagenos, codec=self.codec,
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
except Exception, e:
self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
self.logger.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
self.logger.error('convert: %r: path=%r: %s' % (e, traceback.format_exc()))
finally:
try:
os.remove(tmppath)

113
tools/runapp.py Executable file
View File

@ -0,0 +1,113 @@
#!/usr/bin/env python2
##
## WebApp class runner
##
## usage:
## $ runapp.py pdf2html.cgi
##
import sys
import urllib
from httplib import responses
from BaseHTTPServer import HTTPServer
from SimpleHTTPServer import SimpleHTTPRequestHandler
## WebAppHandler
##
class WebAppHandler(SimpleHTTPRequestHandler):
APP_CLASS = None
def do_POST(self):
return self.run_cgi()
def send_head(self):
return self.run_cgi()
def run_cgi(self):
rest = self.path
i = rest.rfind('?')
if i >= 0:
rest, query = rest[:i], rest[i+1:]
else:
query = ''
i = rest.find('/')
if i >= 0:
script, rest = rest[:i], rest[i:]
else:
script, rest = rest, ''
scriptname = '/' + script
scriptfile = self.translate_path(scriptname)
env = {}
env['SERVER_SOFTWARE'] = self.version_string()
env['SERVER_NAME'] = self.server.server_name
env['GATEWAY_INTERFACE'] = 'CGI/1.1'
env['SERVER_PROTOCOL'] = self.protocol_version
env['SERVER_PORT'] = str(self.server.server_port)
env['REQUEST_METHOD'] = self.command
uqrest = urllib.unquote(rest)
env['PATH_INFO'] = uqrest
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
env['SCRIPT_NAME'] = scriptname
if query:
env['QUERY_STRING'] = query
host = self.address_string()
if host != self.client_address[0]:
env['REMOTE_HOST'] = host
env['REMOTE_ADDR'] = self.client_address[0]
if self.headers.typeheader is None:
env['CONTENT_TYPE'] = self.headers.type
else:
env['CONTENT_TYPE'] = self.headers.typeheader
length = self.headers.getheader('content-length')
if length:
env['CONTENT_LENGTH'] = length
accept = []
for line in self.headers.getallmatchingheaders('accept'):
if line[:1] in "\t\n\r ":
accept.append(line.strip())
else:
accept = accept + line[7:].split(',')
env['HTTP_ACCEPT'] = ','.join(accept)
ua = self.headers.getheader('user-agent')
if ua:
env['HTTP_USER_AGENT'] = ua
co = filter(None, self.headers.getheaders('cookie'))
if co:
env['HTTP_COOKIE'] = ', '.join(co)
for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
'HTTP_USER_AGENT', 'HTTP_COOKIE'):
env.setdefault(k, "")
app = self.APP_CLASS(infp=self.rfile, outfp=self.wfile, environ=env)
status = app.setup()
self.send_response(status, responses[status])
app.run()
return
# main
def main(argv):
import getopt, imp
def usage():
print 'usage: %s [-h host] [-p port] [-n name] module.class' % argv[0]
return 100
try:
(opts, args) = getopt.getopt(argv[1:], 'h:p:n:')
except getopt.GetoptError:
return usage()
host = ''
port = 8080
name = 'WebApp'
for (k, v) in opts:
if k == '-h': host = v
elif k == '-p': port = int(v)
elif k == '-n': name = v
if not args: return usage()
path = args.pop(0)
module = imp.load_source('app', path)
WebAppHandler.APP_CLASS = getattr(module, name)
print 'Listening %s:%d...' % (host,port)
httpd = HTTPServer((host,port), WebAppHandler)
httpd.serve_forever()
return
if __name__ == '__main__': sys.exit(main(sys.argv))