20090721

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@121 1aa58f4a-7d42-0410-adbc-911cccaed67c
2009-07-21 14:23:23 +00:00 · 2009-07-21 14:23:23 +00:00 · 9093c340af
parent 8a5bec5065
commit 9093c340af
5 changed files with 76 additions and 33 deletions
--- a/42
+++ b/42
@ -0,0 +1,42 @@
 TODO
 Makefile
 README.html
 setup.py
 pdfminer/Makefile
 pdfminer/__init__.py
 pdfminer/arcfour.py
 pdfminer/ascii85.py
 pdfminer/cmap.py
 pdfminer/converter.py
 pdfminer/fontmetrics.py
 pdfminer/glyphlist.py
 pdfminer/latin2ascii.py
 pdfminer/latin_enc.py
 pdfminer/layout.py
 pdfminer/lzw.py
 pdfminer/pdfcolor.py
 pdfminer/pdfdevice.py
 pdfminer/pdffont.py
 pdfminer/pdfinterp.py
 pdfminer/pdfparser.py
 pdfminer/pdftypes.py
 pdfminer/psparser.py
 pdfminer/pycdb.py
 pdfminer/rijndael.py
 pdfminer/utils.py
 tools/Makefile
 tools/dumppdf.py
 tools/pdf2txt.py
 tools/pdf2html.cgi
 tools/conv_afm.py
 tools/prof.py
 samples/Makefile
 samples/jo.pdf
 samples/simple1.pdf
 samples/simple2.pdf
 samples/dmca.pdf
 samples/f1040nr.pdf
 samples/i1040nr.pdf
 samples/kampo.pdf
 samples/naacl06-shinyama.pdf
 samples/nlp2004slides.pdf
--- a/34
+++ b/34
@ -1,17 +1,16 @@
-# Makefile for pdfminer
+##  Makefile (for maintainance purpose)
 ##
 PACKAGE=pdfminer
 PREFIX=/usr/local
 SVN=svn
 GNUTAR=tar
 PYTHON=python
-PREFIX=/usr/local
+RM=rm -f
-TMPDIR=/tmp
+CP=cp -f
 VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
 DISTNAME=$(PACKAGE)-dist-$(VERSION)
 DISTFILE=$(DISTNAME).tar.gz
-CONV_CMAP=$(PYTHON) pdfminer/cmap.py
+VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
 DISTFILE=$(PACKAGE)-$(VERSION).tar.gz
 all:
@ -19,7 +18,8 @@ install:
 	$(PYTHON) setup.py install --prefix=$(PREFIX)
 clean:
-	-rm -rf build
+	-$(PYTHON) setup.py clean
 	-$(RM) -r build dist
 	-cd $(PACKAGE) && $(MAKE) clean
 	-cd tools && $(MAKE) clean
 	-cd samples && $(MAKE) clean
@ -27,20 +27,16 @@ clean:
 test:
 	cd samples && $(MAKE) test
 # Maintainance:
 commit: clean
 	$(SVN) commit
 check:
 	cd $(PACKAGE) && make check
-dist: clean
+dist/$(DISTFILE): clean
-	$(SVN) cleanup
+	$(PYTHON) setup.py sdist
 	$(SVN) export . $(TMPDIR)/$(DISTNAME)
 	$(GNUTAR) c -z -C$(TMPDIR) -f $(TMPDIR)/$(DISTFILE) $(DISTNAME) --dereference --numeric-owner
 	-rm -rf $(TMPDIR)/$(DISTNAME)
-WEBDIR=$$HOME/Site/unixuser.org/python/pdfminer
+WEBDIR=$$HOME/Site/unixuser.org/python/$(PACKAGE)
-publish: dist
+publish: dist/$(DISTFILE)
-	cp $(TMPDIR)/$(DISTFILE) $(WEBDIR)
+	$(CP) dist/$(DISTFILE) $(WEBDIR)
-	cp README.html $(WEBDIR)/index.html
+	$(CP) README.html $(WEBDIR)/index.html
--- a/README.html
+++ b/README.html
@ -18,7 +18,7 @@ Python PDF parser and analyzer
 <div align=right class=lastmod>
 <!-- hhmts start -->
-Last Modified: Tue Jul 21 16:24:26 JST 2009
+Last Modified: Tue Jul 21 23:22:42 JST 2009
 <!-- hhmts end -->
 </div>
@ -37,7 +37,7 @@ PDF parser that can be used for other purposes instead of text analysis.
 <p>
 <strong>Features:</strong>
 <ul>
-<li> Written entirely in Python. (version 2.4 or newer required)
+<li> Written entirely in Python. (for version 2.4 or newer)
 <li> PDF-1.7 specification support. (well, almost)
 <li> Non-ASCII languages and vertical writing scripts support.
 <li> Various font types (Type1, TrueType, Type3, and CID) support.
@ -51,8 +51,8 @@ PDF parser that can be used for other purposes instead of text analysis.
 <a name="source"></a>
 <p>
 <strong>Download:</strong><br>
-<a href="http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090711.tar.gz">
+<a href="http://www.unixuser.org/~euske/python/pdfminer/pdfminer-20090721.tar.gz">
-http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090711.tar.gz
+http://www.unixuser.org/~euske/python/pdfminer/pdfminer-20090721.tar.gz
 </a>
 (1.8Mbytes)
@ -158,7 +158,7 @@ Examples:
 $ <strong>pdf2txt.py samples/naacl06-shinyama.pdf -o output.html</strong>
 (extract text as an HTML file whose filename is output.html)
-$ <strong>pdf2txt.py -c euc-jp samples/jo.pdf -o output.html</strong>
+$ <strong>pdf2txt.py -c euc-jp samples/jo.pdf -D V -o output.html</strong>
 (extract a Japanese HTML file in vertical writing, CMap is required)
 $ <strong>pdf2txt.py -P mypassword secret.pdf -o output.txt</strong>
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -761,7 +761,7 @@ def process_pdf(rsrc, device, fp, pagenos=None, maxpages=0, password=''):
  parser = PDFParser(doc, fp)
  doc.initialize(password)
  if not doc.is_extractable:
-    raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fname)
+    raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp.name)
  interpreter = PDFPageInterpreter(rsrc, device)
  for (pageno,page) in enumerate(doc.get_pages()):
    if pagenos and (pageno not in pagenos): continue
--- a/setup.py
+++ b/setup.py
@ -14,18 +14,23 @@ other extra information such as font information or ruled lines.
 It includes a PDF converter that can transform PDF files
 into other text formats (such as HTML). It has an extensible
 PDF parser that can be used for other purposes instead of text analysis.''',
  keywords=['pdf parser', 'pdf converter', 'text mining'],
  license='MIT/X',
  author='Yusuke Shinyama',
  author_email='yusuke at cs dot nyu dot edu',
  url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
-  packages=['pdfminer'],
+  packages=[
-  scripts=['tools/pdf2txt.py', 'tools/dumppdf.py'],
+    'pdfminer'
  ],
  scripts=[
    'tools/pdf2txt.py',
    'tools/dumppdf.py'
    ],
  keywords=['pdf parser', 'pdf converter', 'text mining'],
  classifiers=[
-  'Development Status :: 4 - Beta',
+    'Development Status :: 4 - Beta',
-  'Environment :: Console',
+    'Environment :: Console',
-  'Intended Audience :: Developers',
+    'Intended Audience :: Developers',
-  'Intended Audience :: Science/Research',
+    'Intended Audience :: Science/Research',
-  'License :: OSI Approved :: MIT License',
+    'License :: OSI Approved :: MIT License',
  ],
  )