added license texts.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@5 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2007-12-31 04:10:03 +00:00
parent 6d93b4a7f7
commit b853990a48
7 changed files with 59 additions and 29 deletions

View File

@ -4,6 +4,7 @@ PACKAGE=pdfminer
VERSION=20071231
TAR=tar
SVN=svn
PYTHON=python
WORKDIR=..
DISTNAME=$(PACKAGE)-dist-$(VERSION)
@ -11,6 +12,10 @@ DISTFILE=$(DISTNAME).tar.gz
all:
cdbcmap: CMap
-mkdir CDBCMap
$(PYTHON) conv_cmap.py CMap/*
clean:
-rm *.pyc *.pyo *~

14
README
View File

@ -1,7 +1,15 @@
Installation:
1. $ tar jxf CMap.tar.bz2
2. $ mkdir CDBCMap
3. $ ./conv_cmap CMap/*
1. Get http://www.unixuser.org/~euske/pub/CMap.tar.bz2
2. $ tar jxf CMap.tar.bz2
3. $ make cdbcmap
Dump the contents:
$ ./dumppdf.py foo.pdf
Extract the text:
$ ./pdf2txt.py foo.pdf > foo.xml

View File

@ -1,10 +0,0 @@
Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe
This file and the 35 PostScript(R) AFM files it accompanies may be
used, copied, and distributed for any purpose and without charge,
with or without modification, provided that all copyright notices
are retained; that the AFM files are not distributed without this
file; that all modifications to this file or any of the AFM files
are prominently noted in the modified file(s); and that this
paragraph is not modified. Adobe Systems has no responsibility or
obligation to support the use of the AFM files.

View File

@ -197,6 +197,8 @@ class CMapDB:
fp = file(fname)
CMapParser(cmap, fp).parse()
fp.close()
else:
raise KeyError(cmapname)
klass.cmapdb[cmapname] = cmap
return cmap

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
import sys
import fileinput
stdout = sys.stdout
import sys, os.path
stderr = sys.stderr
def dumpcdb(cmap, cdbfile, verbose=1):
@ -22,15 +20,10 @@ def dumpcdb(cmap, cdbfile, verbose=1):
m.finish()
return
def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False):
from pdfparser import CMapDB
import os.path
if not os.path.isdir(cmapdir):
raise ValueError('not directory: %r' % cmapdir)
if not os.path.isdir(cdbcmapdir):
raise ValueError('not directory: %r' % cdbcmapdir)
def convert_cmap(files, cmapdir, cdbcmapdir, force=False):
from cmap import CMapDB
CMapDB.initialize(cmapdir)
for fname in args:
for fname in fiels:
cmapname = os.path.basename(fname)
cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb')
if not force and os.path.exists(cdbname):
@ -44,16 +37,24 @@ def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False):
def main(argv):
import getopt
def usage():
print 'usage: %s [-C cmapdir] file ...' % argv[0]
print 'usage: %s [-c cmapdir] [-C cdbcmapdir] [-f] file ...' % argv[0]
return 100
try:
(opts, args) = getopt.getopt(argv[1:], 'C:')
(opts, args) = getopt.getopt(argv[1:], 'c:C:f')
except getopt.GetoptError:
return usage()
if not args: usage()
cmapdir = 'CMap'
cdbcmapdir = 'CDBCMap'
force = False
for (k, v) in opts:
if k == '-C': cmapdir = v
return convert_cmap(args, cmapdir)
if k == '-f': force = True
elif k == '-c': cmapdir = v
elif k == '-C': cdbcmapdir = v
if not os.path.isdir(cmapdir):
raise ValueError('not directory: %r' % cmapdir)
if not os.path.isdir(cdbcmapdir):
raise ValueError('not directory: %r' % cdbcmapdir)
return convert_cmap(args, cmapdir, cdbcmapdir, force=force)
if __name__ == '__main__': sys.exit(main(sys.argv))

View File

@ -1,4 +1,28 @@
# -*- python -*-
#
# fontmetrics.py - font metrics for the Adobe core 14 fonts.
#
# The following data were extracted from the AFM files:
# http://www.ctan.org/tex-archive/fonts/adobe/afm/
#
### BEGIN Verbatim copy of the license part
#
# Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe
#
# This file and the 35 PostScript(R) AFM files it accompanies may be
# used, copied, and distributed for any purpose and without charge,
# with or without modification, provided that all copyright notices
# are retained; that the AFM files are not distributed without this
# file; that all modifications to this file or any of the AFM files
# are prominently noted in the modified file(s); and that this
# paragraph is not modified. Adobe Systems has no responsibility or
# obligation to support the use of the AFM files.
#
### END Verbatim copy of the license part
FONT_METRICS = {
'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {32: 600, 33: 600, 34: 600, 35: 600, 36: 600, 37: 600, 38: 600, 39: 600, 40: 600, 41: 600, 42: 600, 43: 600, 44: 600, 45: 600, 46: 600, 47: 600, 48: 600, 49: 600, 50: 600, 51: 600, 52: 600, 53: 600, 54: 600, 55: 600, 56: 600, 57: 600, 58: 600, 59: 600, 60: 600, 61: 600, 62: 600, 63: 600, 64: 600, 65: 600, 66: 600, 67: 600, 68: 600, 69: 600, 70: 600, 71: 600, 72: 600, 73: 600, 74: 600, 75: 600, 76: 600, 77: 600, 78: 600, 79: 600, 80: 600, 81: 600, 82: 600, 83: 600, 84: 600, 85: 600, 86: 600, 87: 600, 88: 600, 89: 600, 90: 600, 91: 600, 92: 600, 93: 600, 94: 600, 95: 600, 96: 600, 97: 600, 98: 600, 99: 600, 100: 600, 101: 600, 102: 600, 103: 600, 104: 600, 105: 600, 106: 600, 107: 600, 108: 600, 109: 600, 110: 600, 111: 600, 112: 600, 113: 600, 114: 600, 115: 600, 116: 600, 117: 600, 118: 600, 119: 600, 120: 600, 121: 600, 122: 600, 123: 600, 124: 600, 125: 600, 126: 600, 161: 600, 162: 600, 163: 600, 164: 600, 165: 600, 166: 600, 167: 600, 168: 600, 169: 600, 170: 600, 171: 600, 172: 600, 173: 600, 174: 600, 175: 600, 177: 600, 178: 600, 179: 600, 180: 600, 182: 600, 183: 600, 184: 600, 185: 600, 186: 600, 187: 600, 188: 600, 189: 600, 191: 600, 193: 600, 194: 600, 195: 600, 196: 600, 197: 600, 198: 600, 199: 600, 200: 600, 202: 600, 203: 600, 205: 600, 206: 600, 207: 600, 208: 600, 225: 600, 227: 600, 232: 600, 233: 600, 234: 600, 235: 600, 241: 600, 245: 600, 248: 600, 249: 600, 250: 600, 251: 600}),
'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {32: 250, 33: 389, 34: 555, 35: 500, 36: 500, 37: 833, 38: 778, 39: 333, 40: 333, 41: 333, 42: 500, 43: 570, 44: 250, 45: 333, 46: 250, 47: 278, 48: 500, 49: 500, 50: 500, 51: 500, 52: 500, 53: 500, 54: 500, 55: 500, 56: 500, 57: 500, 58: 333, 59: 333, 60: 570, 61: 570, 62: 570, 63: 500, 64: 832, 65: 667, 66: 667, 67: 667, 68: 722, 69: 667, 70: 667, 71: 722, 72: 778, 73: 389, 74: 500, 75: 667, 76: 611, 77: 889, 78: 722, 79: 722, 80: 611, 81: 722, 82: 667, 83: 556, 84: 611, 85: 722, 86: 667, 87: 889, 88: 667, 89: 611, 90: 611, 91: 333, 92: 278, 93: 333, 94: 570, 95: 500, 96: 333, 97: 500, 98: 500, 99: 444, 100: 500, 101: 444, 102: 333, 103: 500, 104: 556, 105: 278, 106: 278, 107: 500, 108: 278, 109: 778, 110: 556, 111: 500, 112: 500, 113: 500, 114: 389, 115: 389, 116: 278, 117: 556, 118: 444, 119: 667, 120: 500, 121: 444, 122: 389, 123: 348, 124: 220, 125: 348, 126: 570, 161: 389, 162: 500, 163: 500, 164: 167, 165: 500, 166: 500, 167: 500, 168: 500, 169: 278, 170: 500, 171: 500, 172: 333, 173: 333, 174: 556, 175: 556, 177: 500, 178: 500, 179: 500, 180: 250, 182: 500, 183: 350, 184: 333, 185: 500, 186: 500, 187: 500, 188: 1000, 189: 1000, 191: 500, 193: 333, 194: 333, 195: 333, 196: 333, 197: 333, 198: 333, 199: 333, 200: 333, 202: 333, 203: 333, 205: 333, 206: 333, 207: 333, 208: 1000, 225: 944, 227: 266, 232: 611, 233: 722, 234: 944, 235: 300, 241: 722, 245: 278, 248: 278, 249: 500, 250: 722, 251: 500}),

View File

@ -2,7 +2,7 @@
#
# glyphlist.py - mappings from Adobe glyph name to unicode.
#
# The following data is taken from
# The following data was taken from
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
#