From b853990a48765e1a05761f2e31ada90444a1614a Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Mon, 31 Dec 2007 04:10:03 +0000 Subject: [PATCH] added license texts. git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@5 1aa58f4a-7d42-0410-adbc-911cccaed67c --- Makefile | 5 +++++ README | 14 +++++++++++--- README.AFM | 10 ---------- cmap.py | 2 ++ conv_cmap.py | 31 ++++++++++++++++--------------- fontmetrics.py | 24 ++++++++++++++++++++++++ glyphlist.py | 2 +- 7 files changed, 59 insertions(+), 29 deletions(-) delete mode 100644 README.AFM diff --git a/Makefile b/Makefile index 46c6acb..7989b79 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ PACKAGE=pdfminer VERSION=20071231 TAR=tar SVN=svn +PYTHON=python WORKDIR=.. DISTNAME=$(PACKAGE)-dist-$(VERSION) @@ -11,6 +12,10 @@ DISTFILE=$(DISTNAME).tar.gz all: +cdbcmap: CMap + -mkdir CDBCMap + $(PYTHON) conv_cmap.py CMap/* + clean: -rm *.pyc *.pyo *~ diff --git a/README b/README index 8202672..0777bf2 100644 --- a/README +++ b/README @@ -1,7 +1,15 @@ Installation: - 1. $ tar jxf CMap.tar.bz2 - 2. $ mkdir CDBCMap - 3. $ ./conv_cmap CMap/* + 1. Get http://www.unixuser.org/~euske/pub/CMap.tar.bz2 + 2. $ tar jxf CMap.tar.bz2 + 3. $ make cdbcmap + +Dump the contents: + + $ ./dumppdf.py foo.pdf + +Extract the text: + + $ ./pdf2txt.py foo.pdf > foo.xml diff --git a/README.AFM b/README.AFM deleted file mode 100644 index 4c9d4ce..0000000 --- a/README.AFM +++ /dev/null @@ -1,10 +0,0 @@ -Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe - -This file and the 35 PostScript(R) AFM files it accompanies may be -used, copied, and distributed for any purpose and without charge, -with or without modification, provided that all copyright notices -are retained; that the AFM files are not distributed without this -file; that all modifications to this file or any of the AFM files -are prominently noted in the modified file(s); and that this -paragraph is not modified. Adobe Systems has no responsibility or -obligation to support the use of the AFM files. diff --git a/cmap.py b/cmap.py index d08a299..9d5285f 100644 --- a/cmap.py +++ b/cmap.py @@ -197,6 +197,8 @@ class CMapDB: fp = file(fname) CMapParser(cmap, fp).parse() fp.close() + else: + raise KeyError(cmapname) klass.cmapdb[cmapname] = cmap return cmap diff --git a/conv_cmap.py b/conv_cmap.py index e7d67e6..d1e5385 100755 --- a/conv_cmap.py +++ b/conv_cmap.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -import sys -import fileinput -stdout = sys.stdout +import sys, os.path stderr = sys.stderr def dumpcdb(cmap, cdbfile, verbose=1): @@ -22,15 +20,10 @@ def dumpcdb(cmap, cdbfile, verbose=1): m.finish() return -def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False): - from pdfparser import CMapDB - import os.path - if not os.path.isdir(cmapdir): - raise ValueError('not directory: %r' % cmapdir) - if not os.path.isdir(cdbcmapdir): - raise ValueError('not directory: %r' % cdbcmapdir) +def convert_cmap(files, cmapdir, cdbcmapdir, force=False): + from cmap import CMapDB CMapDB.initialize(cmapdir) - for fname in args: + for fname in fiels: cmapname = os.path.basename(fname) cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb') if not force and os.path.exists(cdbname): @@ -44,16 +37,24 @@ def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False): def main(argv): import getopt def usage(): - print 'usage: %s [-C cmapdir] file ...' % argv[0] + print 'usage: %s [-c cmapdir] [-C cdbcmapdir] [-f] file ...' % argv[0] return 100 try: - (opts, args) = getopt.getopt(argv[1:], 'C:') + (opts, args) = getopt.getopt(argv[1:], 'c:C:f') except getopt.GetoptError: return usage() if not args: usage() cmapdir = 'CMap' + cdbcmapdir = 'CDBCMap' + force = False for (k, v) in opts: - if k == '-C': cmapdir = v - return convert_cmap(args, cmapdir) + if k == '-f': force = True + elif k == '-c': cmapdir = v + elif k == '-C': cdbcmapdir = v + if not os.path.isdir(cmapdir): + raise ValueError('not directory: %r' % cmapdir) + if not os.path.isdir(cdbcmapdir): + raise ValueError('not directory: %r' % cdbcmapdir) + return convert_cmap(args, cmapdir, cdbcmapdir, force=force) if __name__ == '__main__': sys.exit(main(sys.argv)) diff --git a/fontmetrics.py b/fontmetrics.py index 85b30b0..59f122d 100644 --- a/fontmetrics.py +++ b/fontmetrics.py @@ -1,4 +1,28 @@ # -*- python -*- +# +# fontmetrics.py - font metrics for the Adobe core 14 fonts. +# +# The following data were extracted from the AFM files: +# http://www.ctan.org/tex-archive/fonts/adobe/afm/ +# + +### BEGIN Verbatim copy of the license part + +# +# Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe +# +# This file and the 35 PostScript(R) AFM files it accompanies may be +# used, copied, and distributed for any purpose and without charge, +# with or without modification, provided that all copyright notices +# are retained; that the AFM files are not distributed without this +# file; that all modifications to this file or any of the AFM files +# are prominently noted in the modified file(s); and that this +# paragraph is not modified. Adobe Systems has no responsibility or +# obligation to support the use of the AFM files. +# + +### END Verbatim copy of the license part + FONT_METRICS = { 'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {32: 600, 33: 600, 34: 600, 35: 600, 36: 600, 37: 600, 38: 600, 39: 600, 40: 600, 41: 600, 42: 600, 43: 600, 44: 600, 45: 600, 46: 600, 47: 600, 48: 600, 49: 600, 50: 600, 51: 600, 52: 600, 53: 600, 54: 600, 55: 600, 56: 600, 57: 600, 58: 600, 59: 600, 60: 600, 61: 600, 62: 600, 63: 600, 64: 600, 65: 600, 66: 600, 67: 600, 68: 600, 69: 600, 70: 600, 71: 600, 72: 600, 73: 600, 74: 600, 75: 600, 76: 600, 77: 600, 78: 600, 79: 600, 80: 600, 81: 600, 82: 600, 83: 600, 84: 600, 85: 600, 86: 600, 87: 600, 88: 600, 89: 600, 90: 600, 91: 600, 92: 600, 93: 600, 94: 600, 95: 600, 96: 600, 97: 600, 98: 600, 99: 600, 100: 600, 101: 600, 102: 600, 103: 600, 104: 600, 105: 600, 106: 600, 107: 600, 108: 600, 109: 600, 110: 600, 111: 600, 112: 600, 113: 600, 114: 600, 115: 600, 116: 600, 117: 600, 118: 600, 119: 600, 120: 600, 121: 600, 122: 600, 123: 600, 124: 600, 125: 600, 126: 600, 161: 600, 162: 600, 163: 600, 164: 600, 165: 600, 166: 600, 167: 600, 168: 600, 169: 600, 170: 600, 171: 600, 172: 600, 173: 600, 174: 600, 175: 600, 177: 600, 178: 600, 179: 600, 180: 600, 182: 600, 183: 600, 184: 600, 185: 600, 186: 600, 187: 600, 188: 600, 189: 600, 191: 600, 193: 600, 194: 600, 195: 600, 196: 600, 197: 600, 198: 600, 199: 600, 200: 600, 202: 600, 203: 600, 205: 600, 206: 600, 207: 600, 208: 600, 225: 600, 227: 600, 232: 600, 233: 600, 234: 600, 235: 600, 241: 600, 245: 600, 248: 600, 249: 600, 250: 600, 251: 600}), 'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {32: 250, 33: 389, 34: 555, 35: 500, 36: 500, 37: 833, 38: 778, 39: 333, 40: 333, 41: 333, 42: 500, 43: 570, 44: 250, 45: 333, 46: 250, 47: 278, 48: 500, 49: 500, 50: 500, 51: 500, 52: 500, 53: 500, 54: 500, 55: 500, 56: 500, 57: 500, 58: 333, 59: 333, 60: 570, 61: 570, 62: 570, 63: 500, 64: 832, 65: 667, 66: 667, 67: 667, 68: 722, 69: 667, 70: 667, 71: 722, 72: 778, 73: 389, 74: 500, 75: 667, 76: 611, 77: 889, 78: 722, 79: 722, 80: 611, 81: 722, 82: 667, 83: 556, 84: 611, 85: 722, 86: 667, 87: 889, 88: 667, 89: 611, 90: 611, 91: 333, 92: 278, 93: 333, 94: 570, 95: 500, 96: 333, 97: 500, 98: 500, 99: 444, 100: 500, 101: 444, 102: 333, 103: 500, 104: 556, 105: 278, 106: 278, 107: 500, 108: 278, 109: 778, 110: 556, 111: 500, 112: 500, 113: 500, 114: 389, 115: 389, 116: 278, 117: 556, 118: 444, 119: 667, 120: 500, 121: 444, 122: 389, 123: 348, 124: 220, 125: 348, 126: 570, 161: 389, 162: 500, 163: 500, 164: 167, 165: 500, 166: 500, 167: 500, 168: 500, 169: 278, 170: 500, 171: 500, 172: 333, 173: 333, 174: 556, 175: 556, 177: 500, 178: 500, 179: 500, 180: 250, 182: 500, 183: 350, 184: 333, 185: 500, 186: 500, 187: 500, 188: 1000, 189: 1000, 191: 500, 193: 333, 194: 333, 195: 333, 196: 333, 197: 333, 198: 333, 199: 333, 200: 333, 202: 333, 203: 333, 205: 333, 206: 333, 207: 333, 208: 1000, 225: 944, 227: 266, 232: 611, 233: 722, 234: 944, 235: 300, 241: 722, 245: 278, 248: 278, 249: 500, 250: 722, 251: 500}), diff --git a/glyphlist.py b/glyphlist.py index 6525499..c3ebfcd 100644 --- a/glyphlist.py +++ b/glyphlist.py @@ -2,7 +2,7 @@ # # glyphlist.py - mappings from Adobe glyph name to unicode. # -# The following data is taken from +# The following data was taken from # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt #