auto detect output type

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@115 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2009-06-20 10:00:51 +00:00
parent fc453e2061
commit c7a0894182
2 changed files with 18 additions and 4 deletions

View File

@ -5,6 +5,7 @@ PACKAGE=pdfminer
SVN=svn
GNUTAR=tar
PYTHON=python
PREFIX=/usr/local
TMPDIR=/tmp
VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
DISTNAME=$(PACKAGE)-dist-$(VERSION)
@ -15,7 +16,7 @@ CONV_CMAP=$(PYTHON) pdfminer/cmap.py
all:
install:
$(PYTHON) setup.py install
$(PYTHON) setup.py install --prefix=$(PREFIX)
clean:
-rm -rf build

View File

@ -26,9 +26,9 @@ def main(argv):
pagenos = set()
maxpages = 0
# output option
outtype = 'html'
outfile = None
outtype = None
codec = 'utf-8'
outfp = sys.stdout
cluster_margin = None
pageno = 1
scale = 1
@ -41,7 +41,7 @@ def main(argv):
elif k == '-m': maxpages = int(v)
elif k == '-t': outtype = v
elif k == '-c': codec = v
elif k == '-o': outfp = file(v, 'wb')
elif k == '-o': outfile = v
elif k == '-s': scale = float(v)
elif k == '-T': cluster_margin = float(v)
#
@ -54,6 +54,19 @@ def main(argv):
#
CMapDB.initialize(cmapdir)
rsrc = PDFResourceManager()
if not outtype:
outtype = 'text'
if outfile:
if outfile.endswith('.htm') or outfile.endswith('.html'):
outtype = 'html'
elif outfile.endswith('.sgml'):
outtype = 'sgml'
elif outfile.endswith('.tag'):
outtype = 'tag'
if outfile:
outfp = file(outfile, 'w')
else:
outfp = sys.stdout
if outtype == 'sgml':
device = SGMLConverter(rsrc, outfp, codec=codec, cluster_margin=cluster_margin)
elif outtype == 'html':