webapp fixed
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@283 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
5d98a27d9c
commit
866f2bbb75
|
@ -5,4 +5,4 @@ RM=rm -f
|
|||
all:
|
||||
|
||||
clean:
|
||||
-$(RM) *.pyc *.pyo
|
||||
-$(RM) *.pyc *.pyo *.cgic *.cgio
|
||||
|
|
|
@ -77,21 +77,23 @@ class WebApp(object):
|
|||
|
||||
TITLE = 'pdf2html demo'
|
||||
MAXFILESIZE = 10000000 # set to zero if unlimited.
|
||||
MAXPAGES = 10 # set to zero if unlimited.
|
||||
MAXPAGES = 100 # set to zero if unlimited.
|
||||
|
||||
def __init__(self, infp=sys.stdin, outfp=sys.stdout, environ=os.environ,
|
||||
codec='utf-8', apppath='/'):
|
||||
self.infp = infp
|
||||
self.outfp = outfp
|
||||
self.environ = environ
|
||||
self.codec = codec
|
||||
self.apppath = apppath
|
||||
self.remote_addr = environ.get('REMOTE_ADDR')
|
||||
self.path_info = environ.get('PATH_INFO')
|
||||
self.method = environ.get('REQUEST_METHOD', 'GET').upper()
|
||||
self.server = environ.get('SERVER_SOFTWARE', '')
|
||||
self.tmpdir = environ.get('TEMP', './var/')
|
||||
self.remote_addr = self.environ.get('REMOTE_ADDR')
|
||||
self.path_info = self.environ.get('PATH_INFO')
|
||||
self.method = self.environ.get('REQUEST_METHOD', 'GET').upper()
|
||||
self.server = self.environ.get('SERVER_SOFTWARE', '')
|
||||
self.tmpdir = self.environ.get('TEMP', './var/')
|
||||
self.content_type = 'text/html; charset=%s' % codec
|
||||
self.logger = logging.getLogger()
|
||||
logging.basicConfig(level=10,stream=sys.stderr)
|
||||
return
|
||||
|
||||
def put(self, *args):
|
||||
|
@ -102,7 +104,7 @@ class WebApp(object):
|
|||
self.outfp.write(x.encode(self.codec, 'xmlcharrefreplace'))
|
||||
return
|
||||
|
||||
def http_200(self):
|
||||
def response_200(self):
|
||||
if self.server.startswith('cgi-httpd'):
|
||||
# required for cgi-httpd
|
||||
self.outfp.write('HTTP/1.0 200 OK\r\n')
|
||||
|
@ -110,7 +112,7 @@ class WebApp(object):
|
|||
self.outfp.write('Connection: close\r\n\r\n')
|
||||
return
|
||||
|
||||
def http_404(self):
|
||||
def response_404(self):
|
||||
if self.server.startswith('cgi-httpd'):
|
||||
# required for cgi-httpd
|
||||
self.outfp.write('HTTP/1.0 404 Not Found\r\n')
|
||||
|
@ -119,7 +121,7 @@ class WebApp(object):
|
|||
self.outfp.write('<html><body>page does not exist</body></body>\n')
|
||||
return
|
||||
|
||||
def http_301(self, url):
|
||||
def response_301(self, url):
|
||||
if self.server.startswith('cgi-httpd'):
|
||||
# required for cgi-httpd
|
||||
self.outfp.write('HTTP/1.0 301 Moved\r\n')
|
||||
|
@ -146,53 +148,52 @@ class WebApp(object):
|
|||
return
|
||||
|
||||
def setup(self):
|
||||
self.run = self.response_404
|
||||
status = 404
|
||||
if not os.path.isdir(self.tmpdir):
|
||||
self.logger.error('no tmpdir')
|
||||
status = 304
|
||||
elif self.path_info != self.apppath:
|
||||
status = 404
|
||||
else:
|
||||
elif self.path_info == self.apppath:
|
||||
self.run = self.convert
|
||||
status = 200
|
||||
self._status = status
|
||||
return status
|
||||
|
||||
def run(self):
|
||||
form = cgi.FieldStorage(self.infp)
|
||||
if self._status != 200:
|
||||
self.http_404()
|
||||
return
|
||||
def convert(self):
|
||||
self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
|
||||
if (self.method != 'POST' or
|
||||
'c' not in form or
|
||||
'f' not in form):
|
||||
'c' not in self.form or
|
||||
'f' not in self.form):
|
||||
self.response_200()
|
||||
self.coverpage()
|
||||
return
|
||||
item = form['f']
|
||||
item = self.form['f']
|
||||
if not (item.file and item.filename):
|
||||
self.response_200()
|
||||
self.coverpage()
|
||||
return
|
||||
cmd = form.getvalue('c')
|
||||
cmd = self.form.getvalue('c')
|
||||
html = (cmd == 'Convert to HTML')
|
||||
pagenos = []
|
||||
if 'p' in form:
|
||||
for m in re.finditer(r'\d+', form.getvalue('p')):
|
||||
if 'p' in self.form:
|
||||
for m in re.finditer(r'\d+', self.form.getvalue('p')):
|
||||
try:
|
||||
pagenos.append(int(m.group(0)))
|
||||
except ValueError:
|
||||
pass
|
||||
self.logger.info('received: host=%s, name=%r, pagenos=%r' %
|
||||
(self.remote_addr, item.filename, pagenos))
|
||||
h = abs(hash((random.random(), self.remote_addr, item.filename)))
|
||||
tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (time.time(), h))
|
||||
self.logger.info('received: host=%s, name=%r, pagenos=%r, tmppath=%r' %
|
||||
(self.remote_addr, item.filename, pagenos, tmppath))
|
||||
try:
|
||||
if not html:
|
||||
self.content_type = 'text/plain; charset=%s' % self.codec
|
||||
self.http_200()
|
||||
self.response_200()
|
||||
try:
|
||||
convert(item.file, sys.stdout, tmppath, pagenos=pagenos, codec=self.codec,
|
||||
convert(item.file, self.outfp, tmppath, pagenos=pagenos, codec=self.codec,
|
||||
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
|
||||
except Exception, e:
|
||||
self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
|
||||
self.logger.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
|
||||
self.logger.error('convert: %r: path=%r: %s' % (e, traceback.format_exc()))
|
||||
finally:
|
||||
try:
|
||||
os.remove(tmppath)
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/env python2
|
||||
##
|
||||
## WebApp class runner
|
||||
##
|
||||
## usage:
|
||||
## $ runapp.py pdf2html.cgi
|
||||
##
|
||||
|
||||
import sys
|
||||
import urllib
|
||||
from httplib import responses
|
||||
from BaseHTTPServer import HTTPServer
|
||||
from SimpleHTTPServer import SimpleHTTPRequestHandler
|
||||
|
||||
## WebAppHandler
|
||||
##
|
||||
class WebAppHandler(SimpleHTTPRequestHandler):
|
||||
|
||||
APP_CLASS = None
|
||||
|
||||
def do_POST(self):
|
||||
return self.run_cgi()
|
||||
|
||||
def send_head(self):
|
||||
return self.run_cgi()
|
||||
|
||||
def run_cgi(self):
|
||||
rest = self.path
|
||||
i = rest.rfind('?')
|
||||
if i >= 0:
|
||||
rest, query = rest[:i], rest[i+1:]
|
||||
else:
|
||||
query = ''
|
||||
i = rest.find('/')
|
||||
if i >= 0:
|
||||
script, rest = rest[:i], rest[i:]
|
||||
else:
|
||||
script, rest = rest, ''
|
||||
scriptname = '/' + script
|
||||
scriptfile = self.translate_path(scriptname)
|
||||
env = {}
|
||||
env['SERVER_SOFTWARE'] = self.version_string()
|
||||
env['SERVER_NAME'] = self.server.server_name
|
||||
env['GATEWAY_INTERFACE'] = 'CGI/1.1'
|
||||
env['SERVER_PROTOCOL'] = self.protocol_version
|
||||
env['SERVER_PORT'] = str(self.server.server_port)
|
||||
env['REQUEST_METHOD'] = self.command
|
||||
uqrest = urllib.unquote(rest)
|
||||
env['PATH_INFO'] = uqrest
|
||||
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
|
||||
env['SCRIPT_NAME'] = scriptname
|
||||
if query:
|
||||
env['QUERY_STRING'] = query
|
||||
host = self.address_string()
|
||||
if host != self.client_address[0]:
|
||||
env['REMOTE_HOST'] = host
|
||||
env['REMOTE_ADDR'] = self.client_address[0]
|
||||
if self.headers.typeheader is None:
|
||||
env['CONTENT_TYPE'] = self.headers.type
|
||||
else:
|
||||
env['CONTENT_TYPE'] = self.headers.typeheader
|
||||
length = self.headers.getheader('content-length')
|
||||
if length:
|
||||
env['CONTENT_LENGTH'] = length
|
||||
accept = []
|
||||
for line in self.headers.getallmatchingheaders('accept'):
|
||||
if line[:1] in "\t\n\r ":
|
||||
accept.append(line.strip())
|
||||
else:
|
||||
accept = accept + line[7:].split(',')
|
||||
env['HTTP_ACCEPT'] = ','.join(accept)
|
||||
ua = self.headers.getheader('user-agent')
|
||||
if ua:
|
||||
env['HTTP_USER_AGENT'] = ua
|
||||
co = filter(None, self.headers.getheaders('cookie'))
|
||||
if co:
|
||||
env['HTTP_COOKIE'] = ', '.join(co)
|
||||
for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
|
||||
'HTTP_USER_AGENT', 'HTTP_COOKIE'):
|
||||
env.setdefault(k, "")
|
||||
app = self.APP_CLASS(infp=self.rfile, outfp=self.wfile, environ=env)
|
||||
status = app.setup()
|
||||
self.send_response(status, responses[status])
|
||||
app.run()
|
||||
return
|
||||
|
||||
# main
|
||||
def main(argv):
|
||||
import getopt, imp
|
||||
def usage():
|
||||
print 'usage: %s [-h host] [-p port] [-n name] module.class' % argv[0]
|
||||
return 100
|
||||
try:
|
||||
(opts, args) = getopt.getopt(argv[1:], 'h:p:n:')
|
||||
except getopt.GetoptError:
|
||||
return usage()
|
||||
host = ''
|
||||
port = 8080
|
||||
name = 'WebApp'
|
||||
for (k, v) in opts:
|
||||
if k == '-h': host = v
|
||||
elif k == '-p': port = int(v)
|
||||
elif k == '-n': name = v
|
||||
if not args: return usage()
|
||||
path = args.pop(0)
|
||||
module = imp.load_source('app', path)
|
||||
WebAppHandler.APP_CLASS = getattr(module, name)
|
||||
print 'Listening %s:%d...' % (host,port)
|
||||
httpd = HTTPServer((host,port), WebAppHandler)
|
||||
httpd.serve_forever()
|
||||
return
|
||||
|
||||
if __name__ == '__main__': sys.exit(main(sys.argv))
|
Loading…
Reference in New Issue