diff --git a/tools/pdf2html.cgi b/tools/pdf2html.cgi index 828ef36..29e1b01 100755 --- a/tools/pdf2html.cgi +++ b/tools/pdf2html.cgi @@ -12,8 +12,6 @@ # $ mkdir CGIDIR # $ mkdir CGIDIR/var # $ cp -a pdfminer/pdflib CGIDIR -# $ cp -a pdfminer/tools CGIDIR -# $ cp -a pdfminer/CDBCMap CGIDIR # $ PYTHONPATH=CGIDIR pdfminer/tools/pdf2html.cgi # @@ -21,6 +19,7 @@ import sys # comment out at runtime. import cgitb; cgitb.enable() import os, os.path, re, cgi, time, random, codecs, logging, traceback +import pdflib.pdf2txt # quote HTML metacharacters @@ -36,11 +35,11 @@ def url(base, **kw): r.append('%s=%s' % (k, v)) return base+'&'.join(r) -## convert(outfp, infp, path, codec='utf-8', maxpages=10, pagenos=None) +## convert ## class FileSizeExceeded(ValueError): pass -def convert(outfp, infp, path, codec='utf-8', maxpages=10, maxfilesize=5000000, pagenos=None): - from tools.pdf2txt import CMapDB, PDFResourceManager, HTMLConverter, convert +def convert(outfp, infp, path, codec='utf-8', maxpages=10, + maxfilesize=5000000, pagenos=None, html=True): # save the input file. src = file(path, 'wb') nbytes = 0 @@ -55,10 +54,13 @@ def convert(outfp, infp, path, codec='utf-8', maxpages=10, maxfilesize=5000000, infp.close() # perform conversion and # send the results over the network. - CMapDB.initialize('.', './CDBCMap') - rsrc = PDFResourceManager() - device = HTMLConverter(rsrc, outfp, codec=codec) - convert(rsrc, device, path, pagenos, maxpages=maxpages) + pdflib.pdf2txt.CMapDB.initialize('.', './CDBCMap') + rsrc = pdflib.pdf2txt.PDFResourceManager() + if html: + device = pdflib.pdf2txt.HTMLConverter(rsrc, outfp, codec=codec) + else: + device = pdflib.pdf2txt.TextConverter(rsrc, outfp, codec=codec) + pdflib.pdf2txt.convert(rsrc, device, path, pagenos, maxpages=maxpages) return @@ -127,7 +129,9 @@ class PDF2HTMLApp(object): ' Page numbers (comma-separated): \n', '
(Text extraction is limited to maximum %d pages.\n' % self.MAXPAGES, 'Maximum file size for input is %d bytes.)\n' % self.MAXFILESIZE, - '
\n', + '
\n', + '\n', + '\n', '
Powered by PDFMiner\n', '