added license texts.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@5 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
6d93b4a7f7
commit
b853990a48
5
Makefile
5
Makefile
|
@ -4,6 +4,7 @@ PACKAGE=pdfminer
|
||||||
VERSION=20071231
|
VERSION=20071231
|
||||||
TAR=tar
|
TAR=tar
|
||||||
SVN=svn
|
SVN=svn
|
||||||
|
PYTHON=python
|
||||||
|
|
||||||
WORKDIR=..
|
WORKDIR=..
|
||||||
DISTNAME=$(PACKAGE)-dist-$(VERSION)
|
DISTNAME=$(PACKAGE)-dist-$(VERSION)
|
||||||
|
@ -11,6 +12,10 @@ DISTFILE=$(DISTNAME).tar.gz
|
||||||
|
|
||||||
all:
|
all:
|
||||||
|
|
||||||
|
cdbcmap: CMap
|
||||||
|
-mkdir CDBCMap
|
||||||
|
$(PYTHON) conv_cmap.py CMap/*
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm *.pyc *.pyo *~
|
-rm *.pyc *.pyo *~
|
||||||
|
|
||||||
|
|
14
README
14
README
|
@ -1,7 +1,15 @@
|
||||||
|
|
||||||
Installation:
|
Installation:
|
||||||
|
|
||||||
1. $ tar jxf CMap.tar.bz2
|
1. Get http://www.unixuser.org/~euske/pub/CMap.tar.bz2
|
||||||
2. $ mkdir CDBCMap
|
2. $ tar jxf CMap.tar.bz2
|
||||||
3. $ ./conv_cmap CMap/*
|
3. $ make cdbcmap
|
||||||
|
|
||||||
|
|
||||||
|
Dump the contents:
|
||||||
|
|
||||||
|
$ ./dumppdf.py foo.pdf
|
||||||
|
|
||||||
|
Extract the text:
|
||||||
|
|
||||||
|
$ ./pdf2txt.py foo.pdf > foo.xml
|
||||||
|
|
10
README.AFM
10
README.AFM
|
@ -1,10 +0,0 @@
|
||||||
Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe
|
|
||||||
|
|
||||||
This file and the 35 PostScript(R) AFM files it accompanies may be
|
|
||||||
used, copied, and distributed for any purpose and without charge,
|
|
||||||
with or without modification, provided that all copyright notices
|
|
||||||
are retained; that the AFM files are not distributed without this
|
|
||||||
file; that all modifications to this file or any of the AFM files
|
|
||||||
are prominently noted in the modified file(s); and that this
|
|
||||||
paragraph is not modified. Adobe Systems has no responsibility or
|
|
||||||
obligation to support the use of the AFM files.
|
|
2
cmap.py
2
cmap.py
|
@ -197,6 +197,8 @@ class CMapDB:
|
||||||
fp = file(fname)
|
fp = file(fname)
|
||||||
CMapParser(cmap, fp).parse()
|
CMapParser(cmap, fp).parse()
|
||||||
fp.close()
|
fp.close()
|
||||||
|
else:
|
||||||
|
raise KeyError(cmapname)
|
||||||
klass.cmapdb[cmapname] = cmap
|
klass.cmapdb[cmapname] = cmap
|
||||||
return cmap
|
return cmap
|
||||||
|
|
||||||
|
|
31
conv_cmap.py
31
conv_cmap.py
|
@ -1,7 +1,5 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys
|
import sys, os.path
|
||||||
import fileinput
|
|
||||||
stdout = sys.stdout
|
|
||||||
stderr = sys.stderr
|
stderr = sys.stderr
|
||||||
|
|
||||||
def dumpcdb(cmap, cdbfile, verbose=1):
|
def dumpcdb(cmap, cdbfile, verbose=1):
|
||||||
|
@ -22,15 +20,10 @@ def dumpcdb(cmap, cdbfile, verbose=1):
|
||||||
m.finish()
|
m.finish()
|
||||||
return
|
return
|
||||||
|
|
||||||
def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False):
|
def convert_cmap(files, cmapdir, cdbcmapdir, force=False):
|
||||||
from pdfparser import CMapDB
|
from cmap import CMapDB
|
||||||
import os.path
|
|
||||||
if not os.path.isdir(cmapdir):
|
|
||||||
raise ValueError('not directory: %r' % cmapdir)
|
|
||||||
if not os.path.isdir(cdbcmapdir):
|
|
||||||
raise ValueError('not directory: %r' % cdbcmapdir)
|
|
||||||
CMapDB.initialize(cmapdir)
|
CMapDB.initialize(cmapdir)
|
||||||
for fname in args:
|
for fname in fiels:
|
||||||
cmapname = os.path.basename(fname)
|
cmapname = os.path.basename(fname)
|
||||||
cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb')
|
cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb')
|
||||||
if not force and os.path.exists(cdbname):
|
if not force and os.path.exists(cdbname):
|
||||||
|
@ -44,16 +37,24 @@ def convert_cmap(args, cmapdir='CMap', cdbcmapdir='CDBCMap', force=False):
|
||||||
def main(argv):
|
def main(argv):
|
||||||
import getopt
|
import getopt
|
||||||
def usage():
|
def usage():
|
||||||
print 'usage: %s [-C cmapdir] file ...' % argv[0]
|
print 'usage: %s [-c cmapdir] [-C cdbcmapdir] [-f] file ...' % argv[0]
|
||||||
return 100
|
return 100
|
||||||
try:
|
try:
|
||||||
(opts, args) = getopt.getopt(argv[1:], 'C:')
|
(opts, args) = getopt.getopt(argv[1:], 'c:C:f')
|
||||||
except getopt.GetoptError:
|
except getopt.GetoptError:
|
||||||
return usage()
|
return usage()
|
||||||
if not args: usage()
|
if not args: usage()
|
||||||
cmapdir = 'CMap'
|
cmapdir = 'CMap'
|
||||||
|
cdbcmapdir = 'CDBCMap'
|
||||||
|
force = False
|
||||||
for (k, v) in opts:
|
for (k, v) in opts:
|
||||||
if k == '-C': cmapdir = v
|
if k == '-f': force = True
|
||||||
return convert_cmap(args, cmapdir)
|
elif k == '-c': cmapdir = v
|
||||||
|
elif k == '-C': cdbcmapdir = v
|
||||||
|
if not os.path.isdir(cmapdir):
|
||||||
|
raise ValueError('not directory: %r' % cmapdir)
|
||||||
|
if not os.path.isdir(cdbcmapdir):
|
||||||
|
raise ValueError('not directory: %r' % cdbcmapdir)
|
||||||
|
return convert_cmap(args, cmapdir, cdbcmapdir, force=force)
|
||||||
|
|
||||||
if __name__ == '__main__': sys.exit(main(sys.argv))
|
if __name__ == '__main__': sys.exit(main(sys.argv))
|
||||||
|
|
|
@ -1,4 +1,28 @@
|
||||||
# -*- python -*-
|
# -*- python -*-
|
||||||
|
#
|
||||||
|
# fontmetrics.py - font metrics for the Adobe core 14 fonts.
|
||||||
|
#
|
||||||
|
# The following data were extracted from the AFM files:
|
||||||
|
# http://www.ctan.org/tex-archive/fonts/adobe/afm/
|
||||||
|
#
|
||||||
|
|
||||||
|
### BEGIN Verbatim copy of the license part
|
||||||
|
|
||||||
|
#
|
||||||
|
# Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe
|
||||||
|
#
|
||||||
|
# This file and the 35 PostScript(R) AFM files it accompanies may be
|
||||||
|
# used, copied, and distributed for any purpose and without charge,
|
||||||
|
# with or without modification, provided that all copyright notices
|
||||||
|
# are retained; that the AFM files are not distributed without this
|
||||||
|
# file; that all modifications to this file or any of the AFM files
|
||||||
|
# are prominently noted in the modified file(s); and that this
|
||||||
|
# paragraph is not modified. Adobe Systems has no responsibility or
|
||||||
|
# obligation to support the use of the AFM files.
|
||||||
|
#
|
||||||
|
|
||||||
|
### END Verbatim copy of the license part
|
||||||
|
|
||||||
FONT_METRICS = {
|
FONT_METRICS = {
|
||||||
'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {32: 600, 33: 600, 34: 600, 35: 600, 36: 600, 37: 600, 38: 600, 39: 600, 40: 600, 41: 600, 42: 600, 43: 600, 44: 600, 45: 600, 46: 600, 47: 600, 48: 600, 49: 600, 50: 600, 51: 600, 52: 600, 53: 600, 54: 600, 55: 600, 56: 600, 57: 600, 58: 600, 59: 600, 60: 600, 61: 600, 62: 600, 63: 600, 64: 600, 65: 600, 66: 600, 67: 600, 68: 600, 69: 600, 70: 600, 71: 600, 72: 600, 73: 600, 74: 600, 75: 600, 76: 600, 77: 600, 78: 600, 79: 600, 80: 600, 81: 600, 82: 600, 83: 600, 84: 600, 85: 600, 86: 600, 87: 600, 88: 600, 89: 600, 90: 600, 91: 600, 92: 600, 93: 600, 94: 600, 95: 600, 96: 600, 97: 600, 98: 600, 99: 600, 100: 600, 101: 600, 102: 600, 103: 600, 104: 600, 105: 600, 106: 600, 107: 600, 108: 600, 109: 600, 110: 600, 111: 600, 112: 600, 113: 600, 114: 600, 115: 600, 116: 600, 117: 600, 118: 600, 119: 600, 120: 600, 121: 600, 122: 600, 123: 600, 124: 600, 125: 600, 126: 600, 161: 600, 162: 600, 163: 600, 164: 600, 165: 600, 166: 600, 167: 600, 168: 600, 169: 600, 170: 600, 171: 600, 172: 600, 173: 600, 174: 600, 175: 600, 177: 600, 178: 600, 179: 600, 180: 600, 182: 600, 183: 600, 184: 600, 185: 600, 186: 600, 187: 600, 188: 600, 189: 600, 191: 600, 193: 600, 194: 600, 195: 600, 196: 600, 197: 600, 198: 600, 199: 600, 200: 600, 202: 600, 203: 600, 205: 600, 206: 600, 207: 600, 208: 600, 225: 600, 227: 600, 232: 600, 233: 600, 234: 600, 235: 600, 241: 600, 245: 600, 248: 600, 249: 600, 250: 600, 251: 600}),
|
'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {32: 600, 33: 600, 34: 600, 35: 600, 36: 600, 37: 600, 38: 600, 39: 600, 40: 600, 41: 600, 42: 600, 43: 600, 44: 600, 45: 600, 46: 600, 47: 600, 48: 600, 49: 600, 50: 600, 51: 600, 52: 600, 53: 600, 54: 600, 55: 600, 56: 600, 57: 600, 58: 600, 59: 600, 60: 600, 61: 600, 62: 600, 63: 600, 64: 600, 65: 600, 66: 600, 67: 600, 68: 600, 69: 600, 70: 600, 71: 600, 72: 600, 73: 600, 74: 600, 75: 600, 76: 600, 77: 600, 78: 600, 79: 600, 80: 600, 81: 600, 82: 600, 83: 600, 84: 600, 85: 600, 86: 600, 87: 600, 88: 600, 89: 600, 90: 600, 91: 600, 92: 600, 93: 600, 94: 600, 95: 600, 96: 600, 97: 600, 98: 600, 99: 600, 100: 600, 101: 600, 102: 600, 103: 600, 104: 600, 105: 600, 106: 600, 107: 600, 108: 600, 109: 600, 110: 600, 111: 600, 112: 600, 113: 600, 114: 600, 115: 600, 116: 600, 117: 600, 118: 600, 119: 600, 120: 600, 121: 600, 122: 600, 123: 600, 124: 600, 125: 600, 126: 600, 161: 600, 162: 600, 163: 600, 164: 600, 165: 600, 166: 600, 167: 600, 168: 600, 169: 600, 170: 600, 171: 600, 172: 600, 173: 600, 174: 600, 175: 600, 177: 600, 178: 600, 179: 600, 180: 600, 182: 600, 183: 600, 184: 600, 185: 600, 186: 600, 187: 600, 188: 600, 189: 600, 191: 600, 193: 600, 194: 600, 195: 600, 196: 600, 197: 600, 198: 600, 199: 600, 200: 600, 202: 600, 203: 600, 205: 600, 206: 600, 207: 600, 208: 600, 225: 600, 227: 600, 232: 600, 233: 600, 234: 600, 235: 600, 241: 600, 245: 600, 248: 600, 249: 600, 250: 600, 251: 600}),
|
||||||
'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {32: 250, 33: 389, 34: 555, 35: 500, 36: 500, 37: 833, 38: 778, 39: 333, 40: 333, 41: 333, 42: 500, 43: 570, 44: 250, 45: 333, 46: 250, 47: 278, 48: 500, 49: 500, 50: 500, 51: 500, 52: 500, 53: 500, 54: 500, 55: 500, 56: 500, 57: 500, 58: 333, 59: 333, 60: 570, 61: 570, 62: 570, 63: 500, 64: 832, 65: 667, 66: 667, 67: 667, 68: 722, 69: 667, 70: 667, 71: 722, 72: 778, 73: 389, 74: 500, 75: 667, 76: 611, 77: 889, 78: 722, 79: 722, 80: 611, 81: 722, 82: 667, 83: 556, 84: 611, 85: 722, 86: 667, 87: 889, 88: 667, 89: 611, 90: 611, 91: 333, 92: 278, 93: 333, 94: 570, 95: 500, 96: 333, 97: 500, 98: 500, 99: 444, 100: 500, 101: 444, 102: 333, 103: 500, 104: 556, 105: 278, 106: 278, 107: 500, 108: 278, 109: 778, 110: 556, 111: 500, 112: 500, 113: 500, 114: 389, 115: 389, 116: 278, 117: 556, 118: 444, 119: 667, 120: 500, 121: 444, 122: 389, 123: 348, 124: 220, 125: 348, 126: 570, 161: 389, 162: 500, 163: 500, 164: 167, 165: 500, 166: 500, 167: 500, 168: 500, 169: 278, 170: 500, 171: 500, 172: 333, 173: 333, 174: 556, 175: 556, 177: 500, 178: 500, 179: 500, 180: 250, 182: 500, 183: 350, 184: 333, 185: 500, 186: 500, 187: 500, 188: 1000, 189: 1000, 191: 500, 193: 333, 194: 333, 195: 333, 196: 333, 197: 333, 198: 333, 199: 333, 200: 333, 202: 333, 203: 333, 205: 333, 206: 333, 207: 333, 208: 1000, 225: 944, 227: 266, 232: 611, 233: 722, 234: 944, 235: 300, 241: 722, 245: 278, 248: 278, 249: 500, 250: 722, 251: 500}),
|
'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {32: 250, 33: 389, 34: 555, 35: 500, 36: 500, 37: 833, 38: 778, 39: 333, 40: 333, 41: 333, 42: 500, 43: 570, 44: 250, 45: 333, 46: 250, 47: 278, 48: 500, 49: 500, 50: 500, 51: 500, 52: 500, 53: 500, 54: 500, 55: 500, 56: 500, 57: 500, 58: 333, 59: 333, 60: 570, 61: 570, 62: 570, 63: 500, 64: 832, 65: 667, 66: 667, 67: 667, 68: 722, 69: 667, 70: 667, 71: 722, 72: 778, 73: 389, 74: 500, 75: 667, 76: 611, 77: 889, 78: 722, 79: 722, 80: 611, 81: 722, 82: 667, 83: 556, 84: 611, 85: 722, 86: 667, 87: 889, 88: 667, 89: 611, 90: 611, 91: 333, 92: 278, 93: 333, 94: 570, 95: 500, 96: 333, 97: 500, 98: 500, 99: 444, 100: 500, 101: 444, 102: 333, 103: 500, 104: 556, 105: 278, 106: 278, 107: 500, 108: 278, 109: 778, 110: 556, 111: 500, 112: 500, 113: 500, 114: 389, 115: 389, 116: 278, 117: 556, 118: 444, 119: 667, 120: 500, 121: 444, 122: 389, 123: 348, 124: 220, 125: 348, 126: 570, 161: 389, 162: 500, 163: 500, 164: 167, 165: 500, 166: 500, 167: 500, 168: 500, 169: 278, 170: 500, 171: 500, 172: 333, 173: 333, 174: 556, 175: 556, 177: 500, 178: 500, 179: 500, 180: 250, 182: 500, 183: 350, 184: 333, 185: 500, 186: 500, 187: 500, 188: 1000, 189: 1000, 191: 500, 193: 333, 194: 333, 195: 333, 196: 333, 197: 333, 198: 333, 199: 333, 200: 333, 202: 333, 203: 333, 205: 333, 206: 333, 207: 333, 208: 1000, 225: 944, 227: 266, 232: 611, 233: 722, 234: 944, 235: 300, 241: 722, 245: 278, 248: 278, 249: 500, 250: 722, 251: 500}),
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
#
|
#
|
||||||
# glyphlist.py - mappings from Adobe glyph name to unicode.
|
# glyphlist.py - mappings from Adobe glyph name to unicode.
|
||||||
#
|
#
|
||||||
# The following data is taken from
|
# The following data was taken from
|
||||||
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue