auto detect output type
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@115 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
fc453e2061
commit
c7a0894182
3
Makefile
3
Makefile
|
@ -5,6 +5,7 @@ PACKAGE=pdfminer
|
||||||
SVN=svn
|
SVN=svn
|
||||||
GNUTAR=tar
|
GNUTAR=tar
|
||||||
PYTHON=python
|
PYTHON=python
|
||||||
|
PREFIX=/usr/local
|
||||||
TMPDIR=/tmp
|
TMPDIR=/tmp
|
||||||
VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
|
VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
|
||||||
DISTNAME=$(PACKAGE)-dist-$(VERSION)
|
DISTNAME=$(PACKAGE)-dist-$(VERSION)
|
||||||
|
@ -15,7 +16,7 @@ CONV_CMAP=$(PYTHON) pdfminer/cmap.py
|
||||||
all:
|
all:
|
||||||
|
|
||||||
install:
|
install:
|
||||||
$(PYTHON) setup.py install
|
$(PYTHON) setup.py install --prefix=$(PREFIX)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm -rf build
|
-rm -rf build
|
||||||
|
|
|
@ -26,9 +26,9 @@ def main(argv):
|
||||||
pagenos = set()
|
pagenos = set()
|
||||||
maxpages = 0
|
maxpages = 0
|
||||||
# output option
|
# output option
|
||||||
outtype = 'html'
|
outfile = None
|
||||||
|
outtype = None
|
||||||
codec = 'utf-8'
|
codec = 'utf-8'
|
||||||
outfp = sys.stdout
|
|
||||||
cluster_margin = None
|
cluster_margin = None
|
||||||
pageno = 1
|
pageno = 1
|
||||||
scale = 1
|
scale = 1
|
||||||
|
@ -41,7 +41,7 @@ def main(argv):
|
||||||
elif k == '-m': maxpages = int(v)
|
elif k == '-m': maxpages = int(v)
|
||||||
elif k == '-t': outtype = v
|
elif k == '-t': outtype = v
|
||||||
elif k == '-c': codec = v
|
elif k == '-c': codec = v
|
||||||
elif k == '-o': outfp = file(v, 'wb')
|
elif k == '-o': outfile = v
|
||||||
elif k == '-s': scale = float(v)
|
elif k == '-s': scale = float(v)
|
||||||
elif k == '-T': cluster_margin = float(v)
|
elif k == '-T': cluster_margin = float(v)
|
||||||
#
|
#
|
||||||
|
@ -54,6 +54,19 @@ def main(argv):
|
||||||
#
|
#
|
||||||
CMapDB.initialize(cmapdir)
|
CMapDB.initialize(cmapdir)
|
||||||
rsrc = PDFResourceManager()
|
rsrc = PDFResourceManager()
|
||||||
|
if not outtype:
|
||||||
|
outtype = 'text'
|
||||||
|
if outfile:
|
||||||
|
if outfile.endswith('.htm') or outfile.endswith('.html'):
|
||||||
|
outtype = 'html'
|
||||||
|
elif outfile.endswith('.sgml'):
|
||||||
|
outtype = 'sgml'
|
||||||
|
elif outfile.endswith('.tag'):
|
||||||
|
outtype = 'tag'
|
||||||
|
if outfile:
|
||||||
|
outfp = file(outfile, 'w')
|
||||||
|
else:
|
||||||
|
outfp = sys.stdout
|
||||||
if outtype == 'sgml':
|
if outtype == 'sgml':
|
||||||
device = SGMLConverter(rsrc, outfp, codec=codec, cluster_margin=cluster_margin)
|
device = SGMLConverter(rsrc, outfp, codec=codec, cluster_margin=cluster_margin)
|
||||||
elif outtype == 'html':
|
elif outtype == 'html':
|
||||||
|
|
Loading…
Reference in New Issue