diff --git a/README.html b/README.html
index 0345d0f..f683421 100644
--- a/README.html
+++ b/README.html
@@ -18,7 +18,7 @@ Python PDF parser and analyzer
-Last Modified: Sun May 17 15:39:06 JST 2009
+Last Modified: Sun May 17 22:57:53 JST 2009
@@ -51,8 +51,8 @@ PDF parser that can be used for other purpoes instead of text analysis.
Download:
-
-http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090330.tar.gz
+
+http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090517.tar.gz
(1.8Mbytes)
@@ -126,10 +126,11 @@ For example:
$ cd /usr/lib/python2.5/site-packages
$ tar jxf CMap.tar.bz2
-
Do the follwoing: (this is optional but highly recommended)
+ Do the follwoing. (this is optional, but highly recommended)
-$ python -m pdfminer.cmap /usr/lib/python2.5/site-packages/CMap
+$ python -m pdfminer.cmap
+This may take several minutes.
@@ -260,6 +261,7 @@ no stream header is displayed for the ease of saving it to a file.
Changes
+- 2009/05/17: Bugfixes, massive code restructuring, and simple graphic element support added. setup.py is supported.
- 2009/03/30: Text output mode added.
- 2009/03/25: Encoding problems fixed. Word splitting option added.
- 2009/02/28: Robust handling of corrupted PDFs. Thanks to Troy Bollinger.
diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py
index dc5fe9d..26ae14b 100644
--- a/pdfminer/__init__.py
+++ b/pdfminer/__init__.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
-__version__ = '20090330'
+__version__ = '20090517'
if __name__ == '__main__': print __version__
diff --git a/pdfminer/cmap.py b/pdfminer/cmap.py
index 4e6e315..444e90a 100644
--- a/pdfminer/cmap.py
+++ b/pdfminer/cmap.py
@@ -2,14 +2,17 @@
import sys, re, os, os.path
stderr = sys.stderr
from struct import pack, unpack
-from utils import choplist, nunpack
-from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
+from pdfminer.utils import choplist, nunpack
+from pdfminer.fontmetrics import FONT_METRICS
+from pdfminer.latin_enc import ENCODING
+from pdfminer.glyphlist import charname2unicode
+from pdfminer.psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
PSLiteral, PSKeyword, literal_name, keyword_name, \
PSStackParser
try:
import cdb
except ImportError:
- import pycdb as cdb
+ import pdfminer.pycdb as cdb
class CMapError(Exception): pass
@@ -28,7 +31,6 @@ def find_cmap_path():
STRIP_NAME = re.compile(r'[0-9]+')
def name2unicode(name):
- from glyphlist import charname2unicode
if name in charname2unicode:
return charname2unicode[name]
m = STRIP_NAME.search(name)
@@ -360,19 +362,16 @@ class CMapParser(PSStackParser):
## FontMetricsDB
##
class FontMetricsDB(object):
- from fontmetrics import FONT_METRICS
@classmethod
def get_metrics(klass, fontname):
- return klass.FONT_METRICS[fontname]
+ return FONT_METRICS[fontname]
## EncodingDB
##
class EncodingDB(object):
- from latin_enc import ENCODING
-
std2unicode = {}
mac2unicode = {}
win2unicode = {}
@@ -447,8 +446,10 @@ def main(argv):
(opts, args) = getopt.getopt(argv[1:], 'C:D:f')
except getopt.GetoptError:
return usage()
- if not args: usage()
- cmapdir = args.pop(0)
+ if args:
+ cmapdir = args.pop(0)
+ else:
+ cmapdir = find_cmap_path()
outputdir = cmapdir
force = False
for (k, v) in opts:
@@ -456,9 +457,11 @@ def main(argv):
elif k == '-C': cmapdir = v
elif k == '-D': outputdir = v
if not os.path.isdir(cmapdir):
- raise ValueError('not directory: %r' % cmapdir)
+ print >>stderr, 'directory does not exist: %r' % cmapdir
+ return 111
if not os.path.isdir(outputdir):
- raise ValueError('not directory: %r' % outputdir)
+ print >>stderr, 'directory does not exist: %r' % outputdir
+ return 111
return convert_cmap(cmapdir, outputdir, force=force)
if __name__ == '__main__': sys.exit(main(sys.argv))
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index 4b1b5f9..b9510cc 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python
import sys
-from pdfdevice import PDFDevice
-from pdffont import PDFUnicodeNotDefined
-from layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextBox
-from utils import mult_matrix, translate_matrix, apply_matrix_pt, enc
+from pdfminer.pdfdevice import PDFDevice
+from pdfminer.pdffont import PDFUnicodeNotDefined
+from pdfminer.layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextBox
+from pdfminer.utils import mult_matrix, translate_matrix, apply_matrix_pt, enc
## PDFPageAggregator
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index aec5d03..680cbc4 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys
-from utils import apply_matrix_norm
+from pdfminer.utils import apply_matrix_norm
INF = sys.maxint
diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py
index 5dfcee1..22f181a 100644
--- a/pdfminer/lzw.py
+++ b/pdfminer/lzw.py
@@ -2,6 +2,7 @@
import sys
stderr = sys.stderr
+
## LZWDecoder
##
class LZWDecoder(object):
diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py
index 27758fb..61338d4 100644
--- a/pdfminer/pdfcolor.py
+++ b/pdfminer/pdfcolor.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys
-from psparser import PSLiteralTable
+from pdfminer.psparser import PSLiteralTable
## PDFColorSpace
diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py
index cc3af4e..5cfa663 100644
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@@ -5,13 +5,13 @@ try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
-from psparser import PSLiteralTable, PSKeywordTable, PSLiteral, \
+from pdfminer.psparser import PSLiteralTable, PSKeywordTable, PSLiteral, \
literal_name, keyword_name, STRICT
-from pdftypes import PDFException, \
+from pdfminer.pdftypes import PDFException, \
resolve1, int_value, float_value, num_value, \
str_value, list_value, dict_value, stream_value
-from cmap import CMap, CMapDB, CMapParser, FontMetricsDB, EncodingDB
-from utils import apply_matrix_norm, nunpack
+from pdfminer.cmap import CMap, CMapDB, CMapParser, FontMetricsDB, EncodingDB
+from pdfminer.utils import apply_matrix_norm, nunpack
## CFFFont
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 6ad9883..8f3c143 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -6,18 +6,18 @@ try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
-from psparser import PSException, PSTypeError, PSEOF, \
+from pdfminer.psparser import PSException, PSTypeError, PSEOF, \
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
PSStackParser, PSKeyword, STRICT
-from pdftypes import PDFException, PDFStream, PDFObjRef, \
+from pdfminer.pdftypes import PDFException, PDFStream, PDFObjRef, \
resolve1, int_value, float_value, num_value, \
str_value, list_value, dict_value, stream_value
-from utils import choplist, mult_matrix, translate_matrix, MATRIX_IDENTITY
-from pdffont import PDFFontError, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
-from pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
-from pdfcolor import PDFColorSpace, PREDEFINED_COLORSPACE, \
+from pdfminer.utils import choplist, mult_matrix, translate_matrix, MATRIX_IDENTITY
+from pdfminer.pdffont import PDFFontError, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
+from pdfminer.pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
+from pdfminer.pdfcolor import PDFColorSpace, PREDEFINED_COLORSPACE, \
LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK
-from cmap import CMapDB
+from pdfminer.cmap import CMapDB
## Exceptions
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index 9020192..ce8b926 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -7,12 +7,11 @@
import sys, re
import md5, struct
stderr = sys.stderr
-from utils import choplist, nunpack, decode_text
-from arcfour import Arcfour
-from psparser import PSStackParser, PSSyntaxError, PSEOF, \
- PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
- STRICT
-from pdftypes import PDFException, PDFTypeError, PDFNotImplementedError, \
+from pdfminer.utils import choplist, nunpack, decode_text
+from pdfminer.arcfour import Arcfour
+from pdfminer.psparser import PSStackParser, PSSyntaxError, PSEOF, \
+ PSLiteralTable, PSKeywordTable, literal_name, keyword_name, STRICT
+from pdfminer.pdftypes import PDFException, PDFTypeError, PDFNotImplementedError, \
PDFStream, PDFObjRef, resolve1, decipher_all, \
int_value, float_value, num_value, str_value, list_value, dict_value, stream_value
diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
index 5c2f0c4..b4c5a45 100644
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
import sys, zlib
-stderr = sys.stderr
-from lzw import LZWDecoder
-from psparser import PSException, PSObject, \
+from pdfminer.lzw import LZWDecoder
+from pdfminer.psparser import PSException, PSObject, \
PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
literal_name, keyword_name, STRICT
diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
index e8999bc..cf3e43f 100644
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
import sys, re
stderr = sys.stderr
-
-from utils import choplist
+from pdfminer.utils import choplist
STRICT = 0
diff --git a/samples/Makefile b/samples/Makefile
index da56fc6..7c28d62 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,8 +1,7 @@
# GNUMakefile for test
PYTHON=python
-CMAPDIR=../CMap
-PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -C$(CMAPDIR)
+PDF2TXT=$(PYTHON) ../tools/pdf2txt.py
HTMLS= \
simple1.html \