20090721

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@121 1aa58f4a-7d42-0410-adbc-911cccaed67c
2009-07-21 14:23:23 +00:00 · 2009-07-21 14:23:23 +00:00 · 9093c340af
parent 8a5bec5065
commit 9093c340af
5 changed files with 76 additions and 33 deletions
--- a/42
+++ b/42
@ -0,0 +1,42 @@
+TODO
+Makefile
+README.html
+setup.py
+pdfminer/Makefile
+pdfminer/__init__.py
+pdfminer/arcfour.py
+pdfminer/ascii85.py
+pdfminer/cmap.py
+pdfminer/converter.py
+pdfminer/fontmetrics.py
+pdfminer/glyphlist.py
+pdfminer/latin2ascii.py
+pdfminer/latin_enc.py
+pdfminer/layout.py
+pdfminer/lzw.py
+pdfminer/pdfcolor.py
+pdfminer/pdfdevice.py
+pdfminer/pdffont.py
+pdfminer/pdfinterp.py
+pdfminer/pdfparser.py
+pdfminer/pdftypes.py
+pdfminer/psparser.py
+pdfminer/pycdb.py
+pdfminer/rijndael.py
+pdfminer/utils.py
+tools/Makefile
+tools/dumppdf.py
+tools/pdf2txt.py
+tools/pdf2html.cgi
+tools/conv_afm.py
+tools/prof.py
+samples/Makefile
+samples/jo.pdf
+samples/simple1.pdf
+samples/simple2.pdf
+samples/dmca.pdf
+samples/f1040nr.pdf
+samples/i1040nr.pdf
+samples/kampo.pdf
+samples/naacl06-shinyama.pdf
+samples/nlp2004slides.pdf
--- a/34
+++ b/34
@ -1,17 +1,16 @@
-# Makefile for pdfminer
+##  Makefile (for maintainance purpose)
+##

 PACKAGE=pdfminer
+PREFIX=/usr/local

 SVN=svn
-GNUTAR=tar
 PYTHON=python
-PREFIX=/usr/local
-TMPDIR=/tmp
-VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
-DISTNAME=$(PACKAGE)-dist-$(VERSION)
-DISTFILE=$(DISTNAME).tar.gz
+RM=rm -f
+CP=cp -f

-CONV_CMAP=$(PYTHON) pdfminer/cmap.py
+VERSION=`$(PYTHON) $(PACKAGE)/__init__.py`
+DISTFILE=$(PACKAGE)-$(VERSION).tar.gz

 all:

@ -19,7 +18,8 @@ install:
 	$(PYTHON) setup.py install --prefix=$(PREFIX)

 clean:
-	-rm -rf build
+	-$(PYTHON) setup.py clean
+	-$(RM) -r build dist
 	-cd $(PACKAGE) && $(MAKE) clean
 	-cd tools && $(MAKE) clean
 	-cd samples && $(MAKE) clean
@ -27,20 +27,16 @@ clean:
 test:
 	cd samples && $(MAKE) test

-# Maintainance:
 commit: clean
 	$(SVN) commit

 check:
 	cd $(PACKAGE) && make check

-dist: clean
-	$(SVN) cleanup
-	$(SVN) export . $(TMPDIR)/$(DISTNAME)
-	$(GNUTAR) c -z -C$(TMPDIR) -f $(TMPDIR)/$(DISTFILE) $(DISTNAME) --dereference --numeric-owner
-	-rm -rf $(TMPDIR)/$(DISTNAME)
+dist/$(DISTFILE): clean
+	$(PYTHON) setup.py sdist

-WEBDIR=$$HOME/Site/unixuser.org/python/pdfminer
-publish: dist
-	cp $(TMPDIR)/$(DISTFILE) $(WEBDIR)
-	cp README.html $(WEBDIR)/index.html
+WEBDIR=$$HOME/Site/unixuser.org/python/$(PACKAGE)
+publish: dist/$(DISTFILE)
+	$(CP) dist/$(DISTFILE) $(WEBDIR)
+	$(CP) README.html $(WEBDIR)/index.html
--- a/README.html
+++ b/README.html
@ -18,7 +18,7 @@ Python PDF parser and analyzer

 <div align=right class=lastmod>
 <!-- hhmts start -->
-Last Modified: Tue Jul 21 16:24:26 JST 2009
+Last Modified: Tue Jul 21 23:22:42 JST 2009
 <!-- hhmts end -->
 </div>

@ -37,7 +37,7 @@ PDF parser that can be used for other purposes instead of text analysis.
 <p>
 <strong>Features:</strong>
 <ul>
-<li> Written entirely in Python. (version 2.4 or newer required)
+<li> Written entirely in Python. (for version 2.4 or newer)
 <li> PDF-1.7 specification support. (well, almost)
 <li> Non-ASCII languages and vertical writing scripts support.
 <li> Various font types (Type1, TrueType, Type3, and CID) support.
@ -51,8 +51,8 @@ PDF parser that can be used for other purposes instead of text analysis.
 <a name="source"></a>
 <p>
 <strong>Download:</strong><br>
-<a href="http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090711.tar.gz">
-http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090711.tar.gz
+<a href="http://www.unixuser.org/~euske/python/pdfminer/pdfminer-20090721.tar.gz">
+http://www.unixuser.org/~euske/python/pdfminer/pdfminer-20090721.tar.gz
 </a>
 (1.8Mbytes)

@ -158,7 +158,7 @@ Examples:
 $ <strong>pdf2txt.py samples/naacl06-shinyama.pdf -o output.html</strong>
 (extract text as an HTML file whose filename is output.html)

-$ <strong>pdf2txt.py -c euc-jp samples/jo.pdf -o output.html</strong>
+$ <strong>pdf2txt.py -c euc-jp samples/jo.pdf -D V -o output.html</strong>
 (extract a Japanese HTML file in vertical writing, CMap is required)

 $ <strong>pdf2txt.py -P mypassword secret.pdf -o output.txt</strong>
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -761,7 +761,7 @@ def process_pdf(rsrc, device, fp, pagenos=None, maxpages=0, password=''):
  parser = PDFParser(doc, fp)
  doc.initialize(password)
  if not doc.is_extractable:
-    raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fname)
+    raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp.name)
  interpreter = PDFPageInterpreter(rsrc, device)
  for (pageno,page) in enumerate(doc.get_pages()):
    if pagenos and (pageno not in pagenos): continue
--- a/setup.py
+++ b/setup.py
@ -14,18 +14,23 @@ other extra information such as font information or ruled lines.
 It includes a PDF converter that can transform PDF files
 into other text formats (such as HTML). It has an extensible
 PDF parser that can be used for other purposes instead of text analysis.''',
-  keywords=['pdf parser', 'pdf converter', 'text mining'],
  license='MIT/X',
  author='Yusuke Shinyama',
  author_email='yusuke at cs dot nyu dot edu',
  url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
-  packages=['pdfminer'],
-  scripts=['tools/pdf2txt.py', 'tools/dumppdf.py'],
+  packages=[
+    'pdfminer'
+  ],
+  scripts=[
+    'tools/pdf2txt.py',
+    'tools/dumppdf.py'
+    ],
+  keywords=['pdf parser', 'pdf converter', 'text mining'],
  classifiers=[
-  'Development Status :: 4 - Beta',
-  'Environment :: Console',
-  'Intended Audience :: Developers',
-  'Intended Audience :: Science/Research',
-  'License :: OSI Approved :: MIT License',
+    'Development Status :: 4 - Beta',
+    'Environment :: Console',
+    'Intended Audience :: Developers',
+    'Intended Audience :: Science/Research',
+    'License :: OSI Approved :: MIT License',
  ],
  )