From ed8a5362b949550f574abd6351dd083a0bbc4e92 Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Sat, 19 Dec 2009 06:52:02 +0000 Subject: [PATCH] renamed cmap.py -> cmapdb.py (avoiding future name changes) git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@161 1aa58f4a-7d42-0410-adbc-911cccaed67c --- docs/index.html | 8 ++++---- pdfminer/{cmap.py => cmapdb.py} | 20 +++++++++++--------- pdfminer/pdffont.py | 8 ++++---- pdfminer/pdfinterp.py | 2 +- pdfminer/pdfparser.py | 2 +- tools/pdf2txt.py | 2 +- 6 files changed, 22 insertions(+), 20 deletions(-) rename pdfminer/{cmap.py => cmapdb.py} (96%) diff --git a/docs/index.html b/docs/index.html index c2d4a34..d0ed14f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Last Modified: Sun Nov 29 16:16:28 JST 2009 +Last Modified: Sun Nov 29 16:20:36 JST 2009
@@ -65,10 +65,10 @@ PDF parser that can be used for other purposes instead of text analysis.

-Download:
-pdfminer-20091129.tar.gz +Download from PyPI:
+ +http://pypi.python.org/pypi/pdfminer/ -(1.8Mbytes)

Discussion: (for questions and comments, post here)
diff --git a/pdfminer/cmap.py b/pdfminer/cmapdb.py similarity index 96% rename from pdfminer/cmap.py rename to pdfminer/cmapdb.py index 688f407..225f238 100644 --- a/pdfminer/cmap.py +++ b/pdfminer/cmapdb.py @@ -235,7 +235,7 @@ class CMapDB(object): print >>sys.stderr, 'Reading: CMap %r...' % fname cmap = CMap() fp = file(fname, 'rb') - CMapParser(cmap, fp).run() + CMapParser(self, cmap, fp).run() fp.close() elif not strict: cmap = CMap() # just create empty cmap @@ -249,8 +249,9 @@ class CMapDB(object): ## class CMapParser(PSStackParser): - def __init__(self, cmap, fp): + def __init__(self, cmapdb, cmap, fp): PSStackParser.__init__(self, fp) + self.cmapdb = cmapdb self.cmap = cmap self.in_cmap = False return @@ -282,11 +283,12 @@ class CMapParser(PSStackParser): return if name == 'usecmap': - try: - ((_,cmapname),) = self.pop(1) - self.cmap.copycmap(CMapDB.get_cmap(literal_name(cmapname))) - except PSSyntaxError: - pass + if self.cmapdb: + try: + ((_,cmapname),) = self.pop(1) + self.cmap.copycmap(self.cmapdb.get_cmap(literal_name(cmapname))) + except PSSyntaxError: + pass return if name == 'begincodespacerange': @@ -440,7 +442,7 @@ def dump_cdb(cmap, cdbfile, verbose=1): def convert_cmap(cmapdir, outputdir, force=False): """Convert all CMap source files in a directory into cdb files.""" - CMapDB.initialize(cmapdir) + cmapdb = CMapDB(cmapdir) for fname in os.listdir(cmapdir): if '.' in fname: continue cmapname = os.path.basename(fname) @@ -449,7 +451,7 @@ def convert_cmap(cmapdir, outputdir, force=False): print >>sys.stderr, 'Skipping: %r' % cmapname continue print >>sys.stderr, 'Reading: %r...' % cmapname - cmap = CMapDB.get_cmap(cmapname) + cmap = cmapdb.get_cmap(cmapname) dump_cdb(cmap, cdbname) return diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 31c3d32..3f068bd 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -4,8 +4,8 @@ try: from cStringIO import StringIO except ImportError: from StringIO import StringIO -from cmap import CMap, CMapDB, CMapParser -from cmap import FontMetricsDB, EncodingDB +from cmapdb import CMap, CMapDB, CMapParser +from cmapdb import FontMetricsDB, EncodingDB from struct import pack, unpack from psparser import LIT, STRICT from psparser import PSLiteral, literal_name @@ -387,7 +387,7 @@ class PDFSimpleFont(PDFFont): if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.ucs2_cmap = CMap() - CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run() + CMapParser(None, self.ucs2_cmap, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return @@ -494,7 +494,7 @@ class PDFCIDFont(PDFFont): if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.ucs2_cmap = CMap() - CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run() + CMapParser(None, self.ucs2_cmap, StringIO(strm.get_data())).run() elif self.cidcoding == 'Adobe-Identity': if ttf: try: diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 9a15c26..082cada 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -6,7 +6,7 @@ try: from cStringIO import StringIO except ImportError: from StringIO import StringIO -from cmap import CMapDB +from cmapdb import CMapDB from psparser import PSException, PSTypeError, PSEOF from psparser import PSKeyword, literal_name, keyword_name from psparser import PSStackParser diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 49b91ed..606af4a 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -695,7 +695,7 @@ class PDFParser(PSStackParser): ## PDFObjStrmParser ## -class PDFObjStrmParser(PDFParser): +class PDFObjStrmParser(PSStackParser): def __init__(self, doc, data): PDFParser.__init__(self, doc, StringIO(data)) diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index 8ee1cbd..27f4911 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -4,7 +4,7 @@ from pdfminer.pdfparser import PDFDocument, PDFParser from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf from pdfminer.pdfdevice import PDFDevice from pdfminer.converter import XMLConverter, HTMLConverter, TextConverter, TagExtractor -from pdfminer.cmap import CMapDB, find_cmap_path +from pdfminer.cmapdb import CMapDB, find_cmap_path from pdfminer.layout import LAParams # main