diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..03641c0 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,42 @@ +TODO +Makefile +README.html +setup.py +pdfminer/Makefile +pdfminer/__init__.py +pdfminer/arcfour.py +pdfminer/ascii85.py +pdfminer/cmap.py +pdfminer/converter.py +pdfminer/fontmetrics.py +pdfminer/glyphlist.py +pdfminer/latin2ascii.py +pdfminer/latin_enc.py +pdfminer/layout.py +pdfminer/lzw.py +pdfminer/pdfcolor.py +pdfminer/pdfdevice.py +pdfminer/pdffont.py +pdfminer/pdfinterp.py +pdfminer/pdfparser.py +pdfminer/pdftypes.py +pdfminer/psparser.py +pdfminer/pycdb.py +pdfminer/rijndael.py +pdfminer/utils.py +tools/Makefile +tools/dumppdf.py +tools/pdf2txt.py +tools/pdf2html.cgi +tools/conv_afm.py +tools/prof.py +samples/Makefile +samples/jo.pdf +samples/simple1.pdf +samples/simple2.pdf +samples/dmca.pdf +samples/f1040nr.pdf +samples/i1040nr.pdf +samples/kampo.pdf +samples/naacl06-shinyama.pdf +samples/nlp2004slides.pdf diff --git a/Makefile b/Makefile index 6630b23..68a1fa7 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,16 @@ -# Makefile for pdfminer +## Makefile (for maintainance purpose) +## PACKAGE=pdfminer +PREFIX=/usr/local SVN=svn -GNUTAR=tar PYTHON=python -PREFIX=/usr/local -TMPDIR=/tmp -VERSION=`$(PYTHON) $(PACKAGE)/__init__.py` -DISTNAME=$(PACKAGE)-dist-$(VERSION) -DISTFILE=$(DISTNAME).tar.gz +RM=rm -f +CP=cp -f -CONV_CMAP=$(PYTHON) pdfminer/cmap.py +VERSION=`$(PYTHON) $(PACKAGE)/__init__.py` +DISTFILE=$(PACKAGE)-$(VERSION).tar.gz all: @@ -19,7 +18,8 @@ install: $(PYTHON) setup.py install --prefix=$(PREFIX) clean: - -rm -rf build + -$(PYTHON) setup.py clean + -$(RM) -r build dist -cd $(PACKAGE) && $(MAKE) clean -cd tools && $(MAKE) clean -cd samples && $(MAKE) clean @@ -27,20 +27,16 @@ clean: test: cd samples && $(MAKE) test -# Maintainance: commit: clean $(SVN) commit check: cd $(PACKAGE) && make check -dist: clean - $(SVN) cleanup - $(SVN) export . $(TMPDIR)/$(DISTNAME) - $(GNUTAR) c -z -C$(TMPDIR) -f $(TMPDIR)/$(DISTFILE) $(DISTNAME) --dereference --numeric-owner - -rm -rf $(TMPDIR)/$(DISTNAME) +dist/$(DISTFILE): clean + $(PYTHON) setup.py sdist -WEBDIR=$$HOME/Site/unixuser.org/python/pdfminer -publish: dist - cp $(TMPDIR)/$(DISTFILE) $(WEBDIR) - cp README.html $(WEBDIR)/index.html +WEBDIR=$$HOME/Site/unixuser.org/python/$(PACKAGE) +publish: dist/$(DISTFILE) + $(CP) dist/$(DISTFILE) $(WEBDIR) + $(CP) README.html $(WEBDIR)/index.html diff --git a/README.html b/README.html index 08fb7f6..b2076f3 100644 --- a/README.html +++ b/README.html @@ -18,7 +18,7 @@ Python PDF parser and analyzer
Features:
Download:
-
-http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090711.tar.gz
+
+http://www.unixuser.org/~euske/python/pdfminer/pdfminer-20090721.tar.gz
(1.8Mbytes)
@@ -158,7 +158,7 @@ Examples:
$ pdf2txt.py samples/naacl06-shinyama.pdf -o output.html
(extract text as an HTML file whose filename is output.html)
-$ pdf2txt.py -c euc-jp samples/jo.pdf -o output.html
+$ pdf2txt.py -c euc-jp samples/jo.pdf -D V -o output.html
(extract a Japanese HTML file in vertical writing, CMap is required)
$ pdf2txt.py -P mypassword secret.pdf -o output.txt
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 7ab9351..8d1dca0 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -761,7 +761,7 @@ def process_pdf(rsrc, device, fp, pagenos=None, maxpages=0, password=''):
parser = PDFParser(doc, fp)
doc.initialize(password)
if not doc.is_extractable:
- raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fname)
+ raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp.name)
interpreter = PDFPageInterpreter(rsrc, device)
for (pageno,page) in enumerate(doc.get_pages()):
if pagenos and (pageno not in pagenos): continue
diff --git a/setup.py b/setup.py
index c95ac38..dbda6a6 100644
--- a/setup.py
+++ b/setup.py
@@ -14,18 +14,23 @@ other extra information such as font information or ruled lines.
It includes a PDF converter that can transform PDF files
into other text formats (such as HTML). It has an extensible
PDF parser that can be used for other purposes instead of text analysis.''',
- keywords=['pdf parser', 'pdf converter', 'text mining'],
license='MIT/X',
author='Yusuke Shinyama',
author_email='yusuke at cs dot nyu dot edu',
url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
- packages=['pdfminer'],
- scripts=['tools/pdf2txt.py', 'tools/dumppdf.py'],
+ packages=[
+ 'pdfminer'
+ ],
+ scripts=[
+ 'tools/pdf2txt.py',
+ 'tools/dumppdf.py'
+ ],
+ keywords=['pdf parser', 'pdf converter', 'text mining'],
classifiers=[
- 'Development Status :: 4 - Beta',
- 'Environment :: Console',
- 'Intended Audience :: Developers',
- 'Intended Audience :: Science/Research',
- 'License :: OSI Approved :: MIT License',
+ 'Development Status :: 4 - Beta',
+ 'Environment :: Console',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: Science/Research',
+ 'License :: OSI Approved :: MIT License',
],
)