From c7a089418229cee6d2cc6b6f8f1e32e89af42aaa Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Sat, 20 Jun 2009 10:00:51 +0000 Subject: [PATCH] auto detect output type git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@115 1aa58f4a-7d42-0410-adbc-911cccaed67c --- Makefile | 3 ++- tools/pdf2txt.py | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6010a59..6630b23 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ PACKAGE=pdfminer SVN=svn GNUTAR=tar PYTHON=python +PREFIX=/usr/local TMPDIR=/tmp VERSION=`$(PYTHON) $(PACKAGE)/__init__.py` DISTNAME=$(PACKAGE)-dist-$(VERSION) @@ -15,7 +16,7 @@ CONV_CMAP=$(PYTHON) pdfminer/cmap.py all: install: - $(PYTHON) setup.py install + $(PYTHON) setup.py install --prefix=$(PREFIX) clean: -rm -rf build diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index 39d283d..8a5086e 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -26,9 +26,9 @@ def main(argv): pagenos = set() maxpages = 0 # output option - outtype = 'html' + outfile = None + outtype = None codec = 'utf-8' - outfp = sys.stdout cluster_margin = None pageno = 1 scale = 1 @@ -41,7 +41,7 @@ def main(argv): elif k == '-m': maxpages = int(v) elif k == '-t': outtype = v elif k == '-c': codec = v - elif k == '-o': outfp = file(v, 'wb') + elif k == '-o': outfile = v elif k == '-s': scale = float(v) elif k == '-T': cluster_margin = float(v) # @@ -54,6 +54,19 @@ def main(argv): # CMapDB.initialize(cmapdir) rsrc = PDFResourceManager() + if not outtype: + outtype = 'text' + if outfile: + if outfile.endswith('.htm') or outfile.endswith('.html'): + outtype = 'html' + elif outfile.endswith('.sgml'): + outtype = 'sgml' + elif outfile.endswith('.tag'): + outtype = 'tag' + if outfile: + outfp = file(outfile, 'w') + else: + outfp = sys.stdout if outtype == 'sgml': device = SGMLConverter(rsrc, outfp, codec=codec, cluster_margin=cluster_margin) elif outtype == 'html':