diff --git a/docs/index.html b/docs/index.html index c2d4a34..d0ed14f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Download:
-pdfminer-20091129.tar.gz
+Download from PyPI:
+
+http://pypi.python.org/pypi/pdfminer/
-(1.8Mbytes)
Discussion: (for questions and comments, post here)
diff --git a/pdfminer/cmap.py b/pdfminer/cmapdb.py
similarity index 96%
rename from pdfminer/cmap.py
rename to pdfminer/cmapdb.py
index 688f407..225f238 100644
--- a/pdfminer/cmap.py
+++ b/pdfminer/cmapdb.py
@@ -235,7 +235,7 @@ class CMapDB(object):
print >>sys.stderr, 'Reading: CMap %r...' % fname
cmap = CMap()
fp = file(fname, 'rb')
- CMapParser(cmap, fp).run()
+ CMapParser(self, cmap, fp).run()
fp.close()
elif not strict:
cmap = CMap() # just create empty cmap
@@ -249,8 +249,9 @@ class CMapDB(object):
##
class CMapParser(PSStackParser):
- def __init__(self, cmap, fp):
+ def __init__(self, cmapdb, cmap, fp):
PSStackParser.__init__(self, fp)
+ self.cmapdb = cmapdb
self.cmap = cmap
self.in_cmap = False
return
@@ -282,11 +283,12 @@ class CMapParser(PSStackParser):
return
if name == 'usecmap':
- try:
- ((_,cmapname),) = self.pop(1)
- self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname)))
- except PSSyntaxError:
- pass
+ if self.cmapdb:
+ try:
+ ((_,cmapname),) = self.pop(1)
+ self.cmap.copycmap(self.cmapdb.get_cmap(literal_name(cmapname)))
+ except PSSyntaxError:
+ pass
return
if name == 'begincodespacerange':
@@ -440,7 +442,7 @@ def dump_cdb(cmap, cdbfile, verbose=1):
def convert_cmap(cmapdir, outputdir, force=False):
"""Convert all CMap source files in a directory into cdb files."""
- CMapDB.initialize(cmapdir)
+ cmapdb = CMapDB(cmapdir)
for fname in os.listdir(cmapdir):
if '.' in fname: continue
cmapname = os.path.basename(fname)
@@ -449,7 +451,7 @@ def convert_cmap(cmapdir, outputdir, force=False):
print >>sys.stderr, 'Skipping: %r' % cmapname
continue
print >>sys.stderr, 'Reading: %r...' % cmapname
- cmap = CMapDB.get_cmap(cmapname)
+ cmap = cmapdb.get_cmap(cmapname)
dump_cdb(cmap, cdbname)
return
diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py
index 31c3d32..3f068bd 100644
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@@ -4,8 +4,8 @@ try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
-from cmap import CMap, CMapDB, CMapParser
-from cmap import FontMetricsDB, EncodingDB
+from cmapdb import CMap, CMapDB, CMapParser
+from cmapdb import FontMetricsDB, EncodingDB
from struct import pack, unpack
from psparser import LIT, STRICT
from psparser import PSLiteral, literal_name
@@ -387,7 +387,7 @@ class PDFSimpleFont(PDFFont):
if 'ToUnicode' in spec:
strm = stream_value(spec['ToUnicode'])
self.ucs2_cmap = CMap()
- CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
+ CMapParser(None, self.ucs2_cmap, StringIO(strm.get_data())).run()
PDFFont.__init__(self, descriptor, widths)
return
@@ -494,7 +494,7 @@ class PDFCIDFont(PDFFont):
if 'ToUnicode' in spec:
strm = stream_value(spec['ToUnicode'])
self.ucs2_cmap = CMap()
- CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
+ CMapParser(None, self.ucs2_cmap, StringIO(strm.get_data())).run()
elif self.cidcoding == 'Adobe-Identity':
if ttf:
try:
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 9a15c26..082cada 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -6,7 +6,7 @@ try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
-from cmap import CMapDB
+from cmapdb import CMapDB
from psparser import PSException, PSTypeError, PSEOF
from psparser import PSKeyword, literal_name, keyword_name
from psparser import PSStackParser
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index 49b91ed..606af4a 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -695,7 +695,7 @@ class PDFParser(PSStackParser):
## PDFObjStrmParser
##
-class PDFObjStrmParser(PDFParser):
+class PDFObjStrmParser(PSStackParser):
def __init__(self, doc, data):
PDFParser.__init__(self, doc, StringIO(data))
diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py
index 8ee1cbd..27f4911 100755
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@@ -4,7 +4,7 @@ from pdfminer.pdfparser import PDFDocument, PDFParser
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
from pdfminer.pdfdevice import PDFDevice
from pdfminer.converter import XMLConverter, HTMLConverter, TextConverter, TagExtractor
-from pdfminer.cmap import CMapDB, find_cmap_path
+from pdfminer.cmapdb import CMapDB, find_cmap_path
from pdfminer.layout import LAParams
# main