added license texts.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@5 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2007-12-31 04:10:03 +00:00
parent 6d93b4a7f7
commit b853990a48
7 changed files with 59 additions and 29 deletions

View File

@ -4,6 +4,7 @@ PACKAGE=pdfminer
VERSION=20071231 VERSION=20071231
TAR=tar TAR=tar
SVN=svn SVN=svn
PYTHON=python
WORKDIR=.. WORKDIR=..
DISTNAME=$(PACKAGE)-dist-$(VERSION) DISTNAME=$(PACKAGE)-dist-$(VERSION)
@ -11,6 +12,10 @@ DISTFILE=$(DISTNAME).tar.gz
all: all:
cdbcmap: CMap
-mkdir CDBCMap
$(PYTHON) conv_cmap.py CMap/*
clean: clean:
-rm *.pyc *.pyo *~ -rm *.pyc *.pyo *~

14
README
View File

@ -1,7 +1,15 @@
Installation: Installation:
1. $ tar jxf CMap.tar.bz2 1. Get http://www.unixuser.org/~euske/pub/CMap.tar.bz2
2. $ mkdir CDBCMap 2. $ tar jxf CMap.tar.bz2
3. $ ./conv_cmap CMap/* 3. $ make cdbcmap
Dump the contents:
$ ./dumppdf.py foo.pdf
Extract the text:
$ ./pdf2txt.py foo.pdf > foo.xml

View File

@ -1,10 +0,0 @@
Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe
This file and the 35 PostScript(R) AFM files it accompanies may be
used, copied, and distributed for any purpose and without charge,
with or without modification, provided that all copyright notices
are retained; that the AFM files are not distributed without this
file; that all modifications to this file or any of the AFM files
are prominently noted in the modified file(s); and that this
paragraph is not modified. Adobe Systems has no responsibility or
obligation to support the use of the AFM files.

View File

@ -197,6 +197,8 @@ class CMapDB:
fp = file(fname) fp = file(fname)
CMapParser(cmap, fp).parse() CMapParser(cmap, fp).parse()
fp.close() fp.close()
else:
raise KeyError(cmapname)
klass.cmapdb[cmapname] = cmap klass.cmapdb[cmapname] = cmap
return cmap return cmap

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys, os.path
import fileinput
stdout = sys.stdout
stderr = sys.stderr stderr = sys.stderr
def dumpcdb(cmap, cdbfile, verbose=1): def dumpcdb(cmap, cdbfile, verbose=1):
@ -22,15 +20,10 @@ def dumpcdb(cmap, cdbfile, verbose=1):
m.finish() m.finish()
return return
def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False): def convert_cmap(files, cmapdir, cdbcmapdir, force=False):
from pdfparser import CMapDB from cmap import CMapDB
import os.path
if not os.path.isdir(cmapdir):
raise ValueError('not directory: %r' % cmapdir)
if not os.path.isdir(cdbcmapdir):
raise ValueError('not directory: %r' % cdbcmapdir)
CMapDB.initialize(cmapdir) CMapDB.initialize(cmapdir)
for fname in args: for fname in fiels:
cmapname = os.path.basename(fname) cmapname = os.path.basename(fname)
cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb') cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb')
if not force and os.path.exists(cdbname): if not force and os.path.exists(cdbname):
@ -44,16 +37,24 @@ def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False):
def main(argv): def main(argv):
import getopt import getopt
def usage(): def usage():
print 'usage: %s [-C cmapdir] file ...' % argv[0] print 'usage: %s [-c cmapdir] [-C cdbcmapdir] [-f] file ...' % argv[0]
return 100 return 100
try: try:
(opts, args) = getopt.getopt(argv[1:], 'C:') (opts, args) = getopt.getopt(argv[1:], 'c:C:f')
except getopt.GetoptError: except getopt.GetoptError:
return usage() return usage()
if not args: usage() if not args: usage()
cmapdir = 'CMap' cmapdir = 'CMap'
cdbcmapdir = 'CDBCMap'
force = False
for (k, v) in opts: for (k, v) in opts:
if k == '-C': cmapdir = v if k == '-f': force = True
return convert_cmap(args, cmapdir) elif k == '-c': cmapdir = v
elif k == '-C': cdbcmapdir = v
if not os.path.isdir(cmapdir):
raise ValueError('not directory: %r' % cmapdir)
if not os.path.isdir(cdbcmapdir):
raise ValueError('not directory: %r' % cdbcmapdir)
return convert_cmap(args, cmapdir, cdbcmapdir, force=force)
if __name__ == '__main__': sys.exit(main(sys.argv)) if __name__ == '__main__': sys.exit(main(sys.argv))

View File

@ -1,4 +1,28 @@
# -*- python -*- # -*- python -*-
#
# fontmetrics.py - font metrics for the Adobe core 14 fonts.
#
# The following data were extracted from the AFM files:
# http://www.ctan.org/tex-archive/fonts/adobe/afm/
#
### BEGIN Verbatim copy of the license part
#
# Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe
#
# This file and the 35 PostScript(R) AFM files it accompanies may be
# used, copied, and distributed for any purpose and without charge,
# with or without modification, provided that all copyright notices
# are retained; that the AFM files are not distributed without this
# file; that all modifications to this file or any of the AFM files
# are prominently noted in the modified file(s); and that this
# paragraph is not modified. Adobe Systems has no responsibility or
# obligation to support the use of the AFM files.
#
### END Verbatim copy of the license part
FONT_METRICS = { FONT_METRICS = {
'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {32: 600, 33: 600, 34: 600, 35: 600, 36: 600, 37: 600, 38: 600, 39: 600, 40: 600, 41: 600, 42: 600, 43: 600, 44: 600, 45: 600, 46: 600, 47: 600, 48: 600, 49: 600, 50: 600, 51: 600, 52: 600, 53: 600, 54: 600, 55: 600, 56: 600, 57: 600, 58: 600, 59: 600, 60: 600, 61: 600, 62: 600, 63: 600, 64: 600, 65: 600, 66: 600, 67: 600, 68: 600, 69: 600, 70: 600, 71: 600, 72: 600, 73: 600, 74: 600, 75: 600, 76: 600, 77: 600, 78: 600, 79: 600, 80: 600, 81: 600, 82: 600, 83: 600, 84: 600, 85: 600, 86: 600, 87: 600, 88: 600, 89: 600, 90: 600, 91: 600, 92: 600, 93: 600, 94: 600, 95: 600, 96: 600, 97: 600, 98: 600, 99: 600, 100: 600, 101: 600, 102: 600, 103: 600, 104: 600, 105: 600, 106: 600, 107: 600, 108: 600, 109: 600, 110: 600, 111: 600, 112: 600, 113: 600, 114: 600, 115: 600, 116: 600, 117: 600, 118: 600, 119: 600, 120: 600, 121: 600, 122: 600, 123: 600, 124: 600, 125: 600, 126: 600, 161: 600, 162: 600, 163: 600, 164: 600, 165: 600, 166: 600, 167: 600, 168: 600, 169: 600, 170: 600, 171: 600, 172: 600, 173: 600, 174: 600, 175: 600, 177: 600, 178: 600, 179: 600, 180: 600, 182: 600, 183: 600, 184: 600, 185: 600, 186: 600, 187: 600, 188: 600, 189: 600, 191: 600, 193: 600, 194: 600, 195: 600, 196: 600, 197: 600, 198: 600, 199: 600, 200: 600, 202: 600, 203: 600, 205: 600, 206: 600, 207: 600, 208: 600, 225: 600, 227: 600, 232: 600, 233: 600, 234: 600, 235: 600, 241: 600, 245: 600, 248: 600, 249: 600, 250: 600, 251: 600}), 'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {32: 600, 33: 600, 34: 600, 35: 600, 36: 600, 37: 600, 38: 600, 39: 600, 40: 600, 41: 600, 42: 600, 43: 600, 44: 600, 45: 600, 46: 600, 47: 600, 48: 600, 49: 600, 50: 600, 51: 600, 52: 600, 53: 600, 54: 600, 55: 600, 56: 600, 57: 600, 58: 600, 59: 600, 60: 600, 61: 600, 62: 600, 63: 600, 64: 600, 65: 600, 66: 600, 67: 600, 68: 600, 69: 600, 70: 600, 71: 600, 72: 600, 73: 600, 74: 600, 75: 600, 76: 600, 77: 600, 78: 600, 79: 600, 80: 600, 81: 600, 82: 600, 83: 600, 84: 600, 85: 600, 86: 600, 87: 600, 88: 600, 89: 600, 90: 600, 91: 600, 92: 600, 93: 600, 94: 600, 95: 600, 96: 600, 97: 600, 98: 600, 99: 600, 100: 600, 101: 600, 102: 600, 103: 600, 104: 600, 105: 600, 106: 600, 107: 600, 108: 600, 109: 600, 110: 600, 111: 600, 112: 600, 113: 600, 114: 600, 115: 600, 116: 600, 117: 600, 118: 600, 119: 600, 120: 600, 121: 600, 122: 600, 123: 600, 124: 600, 125: 600, 126: 600, 161: 600, 162: 600, 163: 600, 164: 600, 165: 600, 166: 600, 167: 600, 168: 600, 169: 600, 170: 600, 171: 600, 172: 600, 173: 600, 174: 600, 175: 600, 177: 600, 178: 600, 179: 600, 180: 600, 182: 600, 183: 600, 184: 600, 185: 600, 186: 600, 187: 600, 188: 600, 189: 600, 191: 600, 193: 600, 194: 600, 195: 600, 196: 600, 197: 600, 198: 600, 199: 600, 200: 600, 202: 600, 203: 600, 205: 600, 206: 600, 207: 600, 208: 600, 225: 600, 227: 600, 232: 600, 233: 600, 234: 600, 235: 600, 241: 600, 245: 600, 248: 600, 249: 600, 250: 600, 251: 600}),
'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {32: 250, 33: 389, 34: 555, 35: 500, 36: 500, 37: 833, 38: 778, 39: 333, 40: 333, 41: 333, 42: 500, 43: 570, 44: 250, 45: 333, 46: 250, 47: 278, 48: 500, 49: 500, 50: 500, 51: 500, 52: 500, 53: 500, 54: 500, 55: 500, 56: 500, 57: 500, 58: 333, 59: 333, 60: 570, 61: 570, 62: 570, 63: 500, 64: 832, 65: 667, 66: 667, 67: 667, 68: 722, 69: 667, 70: 667, 71: 722, 72: 778, 73: 389, 74: 500, 75: 667, 76: 611, 77: 889, 78: 722, 79: 722, 80: 611, 81: 722, 82: 667, 83: 556, 84: 611, 85: 722, 86: 667, 87: 889, 88: 667, 89: 611, 90: 611, 91: 333, 92: 278, 93: 333, 94: 570, 95: 500, 96: 333, 97: 500, 98: 500, 99: 444, 100: 500, 101: 444, 102: 333, 103: 500, 104: 556, 105: 278, 106: 278, 107: 500, 108: 278, 109: 778, 110: 556, 111: 500, 112: 500, 113: 500, 114: 389, 115: 389, 116: 278, 117: 556, 118: 444, 119: 667, 120: 500, 121: 444, 122: 389, 123: 348, 124: 220, 125: 348, 126: 570, 161: 389, 162: 500, 163: 500, 164: 167, 165: 500, 166: 500, 167: 500, 168: 500, 169: 278, 170: 500, 171: 500, 172: 333, 173: 333, 174: 556, 175: 556, 177: 500, 178: 500, 179: 500, 180: 250, 182: 500, 183: 350, 184: 333, 185: 500, 186: 500, 187: 500, 188: 1000, 189: 1000, 191: 500, 193: 333, 194: 333, 195: 333, 196: 333, 197: 333, 198: 333, 199: 333, 200: 333, 202: 333, 203: 333, 205: 333, 206: 333, 207: 333, 208: 1000, 225: 944, 227: 266, 232: 611, 233: 722, 234: 944, 235: 300, 241: 722, 245: 278, 248: 278, 249: 500, 250: 722, 251: 500}), 'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {32: 250, 33: 389, 34: 555, 35: 500, 36: 500, 37: 833, 38: 778, 39: 333, 40: 333, 41: 333, 42: 500, 43: 570, 44: 250, 45: 333, 46: 250, 47: 278, 48: 500, 49: 500, 50: 500, 51: 500, 52: 500, 53: 500, 54: 500, 55: 500, 56: 500, 57: 500, 58: 333, 59: 333, 60: 570, 61: 570, 62: 570, 63: 500, 64: 832, 65: 667, 66: 667, 67: 667, 68: 722, 69: 667, 70: 667, 71: 722, 72: 778, 73: 389, 74: 500, 75: 667, 76: 611, 77: 889, 78: 722, 79: 722, 80: 611, 81: 722, 82: 667, 83: 556, 84: 611, 85: 722, 86: 667, 87: 889, 88: 667, 89: 611, 90: 611, 91: 333, 92: 278, 93: 333, 94: 570, 95: 500, 96: 333, 97: 500, 98: 500, 99: 444, 100: 500, 101: 444, 102: 333, 103: 500, 104: 556, 105: 278, 106: 278, 107: 500, 108: 278, 109: 778, 110: 556, 111: 500, 112: 500, 113: 500, 114: 389, 115: 389, 116: 278, 117: 556, 118: 444, 119: 667, 120: 500, 121: 444, 122: 389, 123: 348, 124: 220, 125: 348, 126: 570, 161: 389, 162: 500, 163: 500, 164: 167, 165: 500, 166: 500, 167: 500, 168: 500, 169: 278, 170: 500, 171: 500, 172: 333, 173: 333, 174: 556, 175: 556, 177: 500, 178: 500, 179: 500, 180: 250, 182: 500, 183: 350, 184: 333, 185: 500, 186: 500, 187: 500, 188: 1000, 189: 1000, 191: 500, 193: 333, 194: 333, 195: 333, 196: 333, 197: 333, 198: 333, 199: 333, 200: 333, 202: 333, 203: 333, 205: 333, 206: 333, 207: 333, 208: 1000, 225: 944, 227: 266, 232: 611, 233: 722, 234: 944, 235: 300, 241: 722, 245: 278, 248: 278, 249: 500, 250: 722, 251: 500}),

View File

@ -2,7 +2,7 @@
# #
# glyphlist.py - mappings from Adobe glyph name to unicode. # glyphlist.py - mappings from Adobe glyph name to unicode.
# #
# The following data is taken from # The following data was taken from
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
# #