Fixed for newer version: pdf2html.cgi

pull/1/head
Yusuke Shinyama 2014-04-02 18:54:50 +09:00
parent 9242356357
commit 17b9b19a26
1 changed files with 12 additions and 6 deletions

View File

@ -20,7 +20,9 @@ import cgi, logging, traceback, random
# comment out at this at runtime. # comment out at this at runtime.
#import cgitb; cgitb.enable() #import cgitb; cgitb.enable()
import pdfminer import pdfminer
from pdfminer.pdfinterp import PDFResourceManager, process_pdf from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import HTMLConverter, TextConverter from pdfminer.converter import HTMLConverter, TextConverter
from pdfminer.layout import LAParams from pdfminer.layout import LAParams
@ -62,12 +64,16 @@ def convert(infp, outfp, path, codec='utf-8',
rsrcmgr = PDFResourceManager() rsrcmgr = PDFResourceManager()
laparams = LAParams() laparams = LAParams()
if html: if html:
device = HTMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams) device = HTMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
layoutmode='exact')
else: else:
device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams) device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams)
fp = file(path, 'rb') fp = file(path, 'rb')
process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages) interpreter = PDFPageInterpreter(rsrcmgr, device)
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages):
interpreter.process_page(page)
fp.close() fp.close()
device.close()
return return
@ -158,10 +164,10 @@ class WebApp(object):
return status return status
def convert(self): def convert(self):
self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ) form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
if (self.method != 'POST' or if (self.method != 'POST' or
'c' not in self.form or 'c' not in form or
'f' not in self.form): 'f' not in form):
self.response_200() self.response_200()
self.coverpage() self.coverpage()
return return