renamed cmap.py -> cmapdb.py (avoiding future name changes)
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@161 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
4d905b81b7
commit
ed8a5362b9
|
@ -19,7 +19,7 @@ Python PDF parser and analyzer
|
||||||
|
|
||||||
<div align=right class=lastmod>
|
<div align=right class=lastmod>
|
||||||
<!-- hhmts start -->
|
<!-- hhmts start -->
|
||||||
Last Modified: Sun Nov 29 16:16:28 JST 2009
|
Last Modified: Sun Nov 29 16:20:36 JST 2009
|
||||||
<!-- hhmts end -->
|
<!-- hhmts end -->
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -65,10 +65,10 @@ PDF parser that can be used for other purposes instead of text analysis.
|
||||||
|
|
||||||
<a name="source"></a>
|
<a name="source"></a>
|
||||||
<p>
|
<p>
|
||||||
<strong>Download:</strong><br>
|
<strong>Download from PyPI:</strong><br>
|
||||||
<a href="http://pypi.python.org/packages/source/p/pdfminer/pdfminer-20091129.tar.gz">pdfminer-20091129.tar.gz</a>
|
<a href="http://pypi.python.org/pypi/pdfminer/">
|
||||||
|
http://pypi.python.org/pypi/pdfminer/
|
||||||
</a>
|
</a>
|
||||||
(1.8Mbytes)
|
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
<strong>Discussion:</strong> (for questions and comments, post here)<br>
|
<strong>Discussion:</strong> (for questions and comments, post here)<br>
|
||||||
|
|
|
@ -235,7 +235,7 @@ class CMapDB(object):
|
||||||
print >>sys.stderr, 'Reading: CMap %r...' % fname
|
print >>sys.stderr, 'Reading: CMap %r...' % fname
|
||||||
cmap = CMap()
|
cmap = CMap()
|
||||||
fp = file(fname, 'rb')
|
fp = file(fname, 'rb')
|
||||||
CMapParser(cmap, fp).run()
|
CMapParser(self, cmap, fp).run()
|
||||||
fp.close()
|
fp.close()
|
||||||
elif not strict:
|
elif not strict:
|
||||||
cmap = CMap() # just create empty cmap
|
cmap = CMap() # just create empty cmap
|
||||||
|
@ -249,8 +249,9 @@ class CMapDB(object):
|
||||||
##
|
##
|
||||||
class CMapParser(PSStackParser):
|
class CMapParser(PSStackParser):
|
||||||
|
|
||||||
def __init__(self, cmap, fp):
|
def __init__(self, cmapdb, cmap, fp):
|
||||||
PSStackParser.__init__(self, fp)
|
PSStackParser.__init__(self, fp)
|
||||||
|
self.cmapdb = cmapdb
|
||||||
self.cmap = cmap
|
self.cmap = cmap
|
||||||
self.in_cmap = False
|
self.in_cmap = False
|
||||||
return
|
return
|
||||||
|
@ -282,11 +283,12 @@ class CMapParser(PSStackParser):
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'usecmap':
|
if name == 'usecmap':
|
||||||
try:
|
if self.cmapdb:
|
||||||
((_,cmapname),) = self.pop(1)
|
try:
|
||||||
self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname)))
|
((_,cmapname),) = self.pop(1)
|
||||||
except PSSyntaxError:
|
self.cmap.copycmap(self.cmapdb.get_cmap(literal_name(cmapname)))
|
||||||
pass
|
except PSSyntaxError:
|
||||||
|
pass
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'begincodespacerange':
|
if name == 'begincodespacerange':
|
||||||
|
@ -440,7 +442,7 @@ def dump_cdb(cmap, cdbfile, verbose=1):
|
||||||
|
|
||||||
def convert_cmap(cmapdir, outputdir, force=False):
|
def convert_cmap(cmapdir, outputdir, force=False):
|
||||||
"""Convert all CMap source files in a directory into cdb files."""
|
"""Convert all CMap source files in a directory into cdb files."""
|
||||||
CMapDB.initialize(cmapdir)
|
cmapdb = CMapDB(cmapdir)
|
||||||
for fname in os.listdir(cmapdir):
|
for fname in os.listdir(cmapdir):
|
||||||
if '.' in fname: continue
|
if '.' in fname: continue
|
||||||
cmapname = os.path.basename(fname)
|
cmapname = os.path.basename(fname)
|
||||||
|
@ -449,7 +451,7 @@ def convert_cmap(cmapdir, outputdir, force=False):
|
||||||
print >>sys.stderr, 'Skipping: %r' % cmapname
|
print >>sys.stderr, 'Skipping: %r' % cmapname
|
||||||
continue
|
continue
|
||||||
print >>sys.stderr, 'Reading: %r...' % cmapname
|
print >>sys.stderr, 'Reading: %r...' % cmapname
|
||||||
cmap = CMapDB.get_cmap(cmapname)
|
cmap = cmapdb.get_cmap(cmapname)
|
||||||
dump_cdb(cmap, cdbname)
|
dump_cdb(cmap, cdbname)
|
||||||
return
|
return
|
||||||
|
|
|
@ -4,8 +4,8 @@ try:
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
from cmap import CMap, CMapDB, CMapParser
|
from cmapdb import CMap, CMapDB, CMapParser
|
||||||
from cmap import FontMetricsDB, EncodingDB
|
from cmapdb import FontMetricsDB, EncodingDB
|
||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
from psparser import LIT, STRICT
|
from psparser import LIT, STRICT
|
||||||
from psparser import PSLiteral, literal_name
|
from psparser import PSLiteral, literal_name
|
||||||
|
@ -387,7 +387,7 @@ class PDFSimpleFont(PDFFont):
|
||||||
if 'ToUnicode' in spec:
|
if 'ToUnicode' in spec:
|
||||||
strm = stream_value(spec['ToUnicode'])
|
strm = stream_value(spec['ToUnicode'])
|
||||||
self.ucs2_cmap = CMap()
|
self.ucs2_cmap = CMap()
|
||||||
CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
|
CMapParser(None, self.ucs2_cmap, StringIO(strm.get_data())).run()
|
||||||
PDFFont.__init__(self, descriptor, widths)
|
PDFFont.__init__(self, descriptor, widths)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -494,7 +494,7 @@ class PDFCIDFont(PDFFont):
|
||||||
if 'ToUnicode' in spec:
|
if 'ToUnicode' in spec:
|
||||||
strm = stream_value(spec['ToUnicode'])
|
strm = stream_value(spec['ToUnicode'])
|
||||||
self.ucs2_cmap = CMap()
|
self.ucs2_cmap = CMap()
|
||||||
CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
|
CMapParser(None, self.ucs2_cmap, StringIO(strm.get_data())).run()
|
||||||
elif self.cidcoding == 'Adobe-Identity':
|
elif self.cidcoding == 'Adobe-Identity':
|
||||||
if ttf:
|
if ttf:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -6,7 +6,7 @@ try:
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
from cmap import CMapDB
|
from cmapdb import CMapDB
|
||||||
from psparser import PSException, PSTypeError, PSEOF
|
from psparser import PSException, PSTypeError, PSEOF
|
||||||
from psparser import PSKeyword, literal_name, keyword_name
|
from psparser import PSKeyword, literal_name, keyword_name
|
||||||
from psparser import PSStackParser
|
from psparser import PSStackParser
|
||||||
|
|
|
@ -695,7 +695,7 @@ class PDFParser(PSStackParser):
|
||||||
|
|
||||||
## PDFObjStrmParser
|
## PDFObjStrmParser
|
||||||
##
|
##
|
||||||
class PDFObjStrmParser(PDFParser):
|
class PDFObjStrmParser(PSStackParser):
|
||||||
|
|
||||||
def __init__(self, doc, data):
|
def __init__(self, doc, data):
|
||||||
PDFParser.__init__(self, doc, StringIO(data))
|
PDFParser.__init__(self, doc, StringIO(data))
|
||||||
|
|
|
@ -4,7 +4,7 @@ from pdfminer.pdfparser import PDFDocument, PDFParser
|
||||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
|
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
|
||||||
from pdfminer.pdfdevice import PDFDevice
|
from pdfminer.pdfdevice import PDFDevice
|
||||||
from pdfminer.converter import XMLConverter, HTMLConverter, TextConverter, TagExtractor
|
from pdfminer.converter import XMLConverter, HTMLConverter, TextConverter, TagExtractor
|
||||||
from pdfminer.cmap import CMapDB, find_cmap_path
|
from pdfminer.cmapdb import CMapDB, find_cmap_path
|
||||||
from pdfminer.layout import LAParams
|
from pdfminer.layout import LAParams
|
||||||
|
|
||||||
# main
|
# main
|
||||||
|
|
Loading…
Reference in New Issue