%s' % (len(k), len(v), k, v))
- for (k, v) in opts:
- if k == '-k': f = (lambda k,_: k)
- elif k == '-v': f = (lambda _,v: v)
- elif k == '-2': f = (lambda k,v: k+'\t'+v)
- for (k,v) in cdbdump(dbname):
- print f(k,v)
- print
- elif cmd == 'cmerge':
- dbs = [ cdbdump(fname) for fname in args ]
- m = CDBMaker(dbname, dbname+'.tmp')
- for (k,vs) in tcdbmerge(dbs):
- m.add(k, ' '.join(vs))
- m.finish()
- # tcdb
- elif cmd == 'tmake':
- TCDBMaker(dbname, dbname+'.tmp').txt2tcdb(fileinput.input(args)).finish()
- elif cmd == 'tget':
- print repr(TCDBReader(dbname).lookup(args))
- elif cmd == 'tdump':
- f = (lambda k,v: '%s%d,%d:%s->%s' % ('+'*len(k), len(k[-1]), len(v), k[-1], v))
- for (k, v) in opts:
- if k == '-k': f = (lambda k,_: '/'.join(k))
- elif k == '-v': f = (lambda _,v: v)
- elif k == '-2': f = (lambda k,v: '/'.join(k)+'\t'+v)
- for (k,v) in tcdbdump(dbname):
- print f(k,v)
- print
- elif cmd == 'tmerge':
- dbs = [ tcdbdump(fname) for fname in args ]
- m = TCDBMaker(dbname, dbname+'.tmp')
- for (k,vs) in tcdbmerge(dbs):
- m.put(len(k), k[-1], ' '.join(vs))
- m.finish()
-
- else:
- return usage()
- return
+ import getopt, fileinput
+ def usage():
+ print 'usage: %s {cmake,cget,cdump,cmerge} [options] cdbname [args ...]' % argv[0]
+ print 'usage: %s {tmake,tget,tdump,tmerge} [options] tcdbname [args ...]' % argv[0]
+ return 100
+ args = argv[1:]
+ if not args: return usage()
+ cmd = args.pop(0)
+ try:
+ (opts, args) = getopt.getopt(args, 'kv2')
+ except getopt.GetoptError:
+ return usage()
+ if not args: return usage()
+ dbname = args.pop(0)
+
+ # cdb
+ if cmd == 'cmake':
+ CDBMaker(dbname, dbname+'.tmp').txt2cdb(fileinput.input(args)).finish()
+ elif cmd == 'cget':
+ print repr(CDBReader(dbname).get(args[0]))
+ elif cmd == 'cdump':
+ f = (lambda k,v: '+%d,%d:%s->%s' % (len(k), len(v), k, v))
+ for (k, v) in opts:
+ if k == '-k': f = (lambda k,_: k)
+ elif k == '-v': f = (lambda _,v: v)
+ elif k == '-2': f = (lambda k,v: k+'\t'+v)
+ for (k,v) in cdbdump(dbname):
+ print f(k,v)
+ print
+ elif cmd == 'cmerge':
+ dbs = [ cdbdump(fname) for fname in args ]
+ m = CDBMaker(dbname, dbname+'.tmp')
+ for (k,vs) in tcdbmerge(dbs):
+ m.add(k, ' '.join(vs))
+ m.finish()
+ # tcdb
+ elif cmd == 'tmake':
+ TCDBMaker(dbname, dbname+'.tmp').txt2tcdb(fileinput.input(args)).finish()
+ elif cmd == 'tget':
+ print repr(TCDBReader(dbname).lookup(args))
+ elif cmd == 'tdump':
+ f = (lambda k,v: '%s%d,%d:%s->%s' % ('+'*len(k), len(k[-1]), len(v), k[-1], v))
+ for (k, v) in opts:
+ if k == '-k': f = (lambda k,_: '/'.join(k))
+ elif k == '-v': f = (lambda _,v: v)
+ elif k == '-2': f = (lambda k,v: '/'.join(k)+'\t'+v)
+ for (k,v) in tcdbdump(dbname):
+ print f(k,v)
+ print
+ elif cmd == 'tmerge':
+ dbs = [ tcdbdump(fname) for fname in args ]
+ m = TCDBMaker(dbname, dbname+'.tmp')
+ for (k,vs) in tcdbmerge(dbs):
+ m.put(len(k), k[-1], ' '.join(vs))
+ m.finish()
+
+ else:
+ return usage()
+ return
if __name__ == '__main__': sys.exit(main(sys.argv))
diff --git a/pdfminer/rijndael.py b/pdfminer/rijndael.py
index 630342a..0d53334 100644
--- a/pdfminer/rijndael.py
+++ b/pdfminer/rijndael.py
@@ -691,88 +691,88 @@ rcon = [
]
if len(pack('L',0)) == 4:
- # 32bit
- def GETU32(x): return unpack('>L', x)[0]
- def PUTU32(x): return pack('>L', x)
+ # 32bit
+ def GETU32(x): return unpack('>L', x)[0]
+ def PUTU32(x): return pack('>L', x)
else:
- # 64bit
- def GETU32(x): return unpack('>I', x)[0]
- def PUTU32(x): return pack('>I', x)
+ # 64bit
+ def GETU32(x): return unpack('>I', x)[0]
+ def PUTU32(x): return pack('>I', x)
# Expand the cipher key into the encryption key schedule.
#
# @return the number of rounds for the given cipher key size.
def rijndaelSetupEncrypt(key, keybits):
- i = p = 0
- rk = [0]*RKLENGTH(keybits)
- rk[0] = GETU32(key[0:4])
- rk[1] = GETU32(key[4:8])
- rk[2] = GETU32(key[8:12])
- rk[3] = GETU32(key[12:16])
- if keybits == 128:
- while 1:
- temp = rk[p+3]
- rk[p+4] = (rk[p+0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
- rcon[i])
- rk[p+5] = rk[p+1] ^ rk[p+4]
- rk[p+6] = rk[p+2] ^ rk[p+5]
- rk[p+7] = rk[p+3] ^ rk[p+6]
- i += 1
- if i == 10: return (rk, 10)
- p += 4
+ i = p = 0
+ rk = [0]*RKLENGTH(keybits)
+ rk[0] = GETU32(key[0:4])
+ rk[1] = GETU32(key[4:8])
+ rk[2] = GETU32(key[8:12])
+ rk[3] = GETU32(key[12:16])
+ if keybits == 128:
+ while 1:
+ temp = rk[p+3]
+ rk[p+4] = (rk[p+0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i])
+ rk[p+5] = rk[p+1] ^ rk[p+4]
+ rk[p+6] = rk[p+2] ^ rk[p+5]
+ rk[p+7] = rk[p+3] ^ rk[p+6]
+ i += 1
+ if i == 10: return (rk, 10)
+ p += 4
- rk[4] = GETU32(key[16:20])
- rk[5] = GETU32(key[20:24])
- if keybits == 192:
- while 1:
- temp = rk[p+5]
- rk[p+6] = (rk[p+0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
- rcon[i])
- rk[p+7] = rk[p+1] ^ rk[p+6]
- rk[p+8] = rk[p+2] ^ rk[p+7]
- rk[p+9] = rk[p+3] ^ rk[p+8]
- i += 1
- if i == 8: return (rk, 12)
- rk[p+10] = rk[p+4] ^ rk[p+9]
- rk[p+11] = rk[p+5] ^ rk[p+10]
- p += 6
+ rk[4] = GETU32(key[16:20])
+ rk[5] = GETU32(key[20:24])
+ if keybits == 192:
+ while 1:
+ temp = rk[p+5]
+ rk[p+6] = (rk[p+0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i])
+ rk[p+7] = rk[p+1] ^ rk[p+6]
+ rk[p+8] = rk[p+2] ^ rk[p+7]
+ rk[p+9] = rk[p+3] ^ rk[p+8]
+ i += 1
+ if i == 8: return (rk, 12)
+ rk[p+10] = rk[p+4] ^ rk[p+9]
+ rk[p+11] = rk[p+5] ^ rk[p+10]
+ p += 6
- rk[6] = GETU32(key[24:28])
- rk[7] = GETU32(key[28:32])
- if keybits == 256:
- while 1:
- temp = rk[p+7]
- rk[p+8] = (rk[p+0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
- rcon[i])
- rk[p+9] = rk[p+1] ^ rk[p+8]
- rk[p+10] = rk[p+2] ^ rk[p+9]
- rk[p+11] = rk[p+3] ^ rk[p+10]
- i += 1
- if i == 7: return (rk, 14)
- temp = rk[p+11]
- rk[p+12] = (rk[p+4] ^
- (Te4[(temp >> 24) ] & 0xff000000) ^
- (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(temp ) & 0xff] & 0x000000ff))
- rk[p+13] = rk[p+5] ^ rk[p+12]
- rk[p+14] = rk[p+6] ^ rk[p+13]
- rk[p+15] = rk[p+7] ^ rk[p+14]
- p += 8
+ rk[6] = GETU32(key[24:28])
+ rk[7] = GETU32(key[28:32])
+ if keybits == 256:
+ while 1:
+ temp = rk[p+7]
+ rk[p+8] = (rk[p+0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i])
+ rk[p+9] = rk[p+1] ^ rk[p+8]
+ rk[p+10] = rk[p+2] ^ rk[p+9]
+ rk[p+11] = rk[p+3] ^ rk[p+10]
+ i += 1
+ if i == 7: return (rk, 14)
+ temp = rk[p+11]
+ rk[p+12] = (rk[p+4] ^
+ (Te4[(temp >> 24) ] & 0xff000000) ^
+ (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp ) & 0xff] & 0x000000ff))
+ rk[p+13] = rk[p+5] ^ rk[p+12]
+ rk[p+14] = rk[p+6] ^ rk[p+13]
+ rk[p+15] = rk[p+7] ^ rk[p+14]
+ p += 8
- raise ValueError(keybits)
+ raise ValueError(keybits)
# Expand the cipher key into the decryption key schedule.
@@ -780,291 +780,291 @@ def rijndaelSetupEncrypt(key, keybits):
# @return the number of rounds for the given cipher key size.
def rijndaelSetupDecrypt(key, keybits):
- # expand the cipher key:
- (rk, nrounds) = rijndaelSetupEncrypt(key, keybits)
- # invert the order of the round keys:
- i = 0
- j = 4*nrounds
- while i < j:
- temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp
- temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp
- temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp
- temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp
- i += 4
- j -= 4
- # apply the inverse MixColumn transform to all round keys but the first and the last:
- p = 0
- for i in xrange(1, nrounds):
- p += 4
- rk[p+0] = (
- Td0[Te4[(rk[p+0] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[p+0] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[p+0] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[p+0] ) & 0xff] & 0xff])
- rk[p+1] = (
- Td0[Te4[(rk[p+1] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[p+1] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[p+1] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[p+1] ) & 0xff] & 0xff])
- rk[p+2] = (
- Td0[Te4[(rk[p+2] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[p+2] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[p+2] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[p+2] ) & 0xff] & 0xff])
- rk[p+3] = (
- Td0[Te4[(rk[p+3] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[p+3] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[p+3] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[p+3] ) & 0xff] & 0xff])
+ # expand the cipher key:
+ (rk, nrounds) = rijndaelSetupEncrypt(key, keybits)
+ # invert the order of the round keys:
+ i = 0
+ j = 4*nrounds
+ while i < j:
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp
+ i += 4
+ j -= 4
+ # apply the inverse MixColumn transform to all round keys but the first and the last:
+ p = 0
+ for i in xrange(1, nrounds):
+ p += 4
+ rk[p+0] = (
+ Td0[Te4[(rk[p+0] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[p+0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[p+0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[p+0] ) & 0xff] & 0xff])
+ rk[p+1] = (
+ Td0[Te4[(rk[p+1] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[p+1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[p+1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[p+1] ) & 0xff] & 0xff])
+ rk[p+2] = (
+ Td0[Te4[(rk[p+2] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[p+2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[p+2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[p+2] ) & 0xff] & 0xff])
+ rk[p+3] = (
+ Td0[Te4[(rk[p+3] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[p+3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[p+3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[p+3] ) & 0xff] & 0xff])
- return (rk, nrounds)
+ return (rk, nrounds)
def rijndaelEncrypt(rk, nrounds, plaintext):
- assert len(plaintext) == 16
+ assert len(plaintext) == 16
- # map byte array block to cipher state
- # and add initial round key:
- s0 = GETU32(plaintext[0:4]) ^ rk[0]
- s1 = GETU32(plaintext[4:8]) ^ rk[1]
- s2 = GETU32(plaintext[8:12]) ^ rk[2]
- s3 = GETU32(plaintext[12:16]) ^ rk[3]
-
- # nrounds - 1 full rounds:
- r = nrounds >> 1
- p = 0
- while 1:
- t0 = (
- Te0[(s0 >> 24) ] ^
- Te1[(s1 >> 16) & 0xff] ^
- Te2[(s2 >> 8) & 0xff] ^
- Te3[(s3 ) & 0xff] ^
- rk[p+4])
- t1 = (
- Te0[(s1 >> 24) ] ^
- Te1[(s2 >> 16) & 0xff] ^
- Te2[(s3 >> 8) & 0xff] ^
- Te3[(s0 ) & 0xff] ^
- rk[p+5])
- t2 = (
- Te0[(s2 >> 24) ] ^
- Te1[(s3 >> 16) & 0xff] ^
- Te2[(s0 >> 8) & 0xff] ^
- Te3[(s1 ) & 0xff] ^
- rk[p+6])
- t3 = (
- Te0[(s3 >> 24) ] ^
- Te1[(s0 >> 16) & 0xff] ^
- Te2[(s1 >> 8) & 0xff] ^
- Te3[(s2 ) & 0xff] ^
- rk[p+7])
- p += 8
- r -= 1
- if r == 0: break
+ # map byte array block to cipher state
+ # and add initial round key:
+ s0 = GETU32(plaintext[0:4]) ^ rk[0]
+ s1 = GETU32(plaintext[4:8]) ^ rk[1]
+ s2 = GETU32(plaintext[8:12]) ^ rk[2]
+ s3 = GETU32(plaintext[12:16]) ^ rk[3]
+
+ # nrounds - 1 full rounds:
+ r = nrounds >> 1
+ p = 0
+ while 1:
+ t0 = (
+ Te0[(s0 >> 24) ] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3 ) & 0xff] ^
+ rk[p+4])
+ t1 = (
+ Te0[(s1 >> 24) ] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0 ) & 0xff] ^
+ rk[p+5])
+ t2 = (
+ Te0[(s2 >> 24) ] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1 ) & 0xff] ^
+ rk[p+6])
+ t3 = (
+ Te0[(s3 >> 24) ] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2 ) & 0xff] ^
+ rk[p+7])
+ p += 8
+ r -= 1
+ if r == 0: break
+ s0 = (
+ Te0[(t0 >> 24) ] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[(t3 ) & 0xff] ^
+ rk[p+0])
+ s1 = (
+ Te0[(t1 >> 24) ] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[(t0 ) & 0xff] ^
+ rk[p+1])
+ s2 = (
+ Te0[(t2 >> 24) ] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[(t1 ) & 0xff] ^
+ rk[p+2])
+ s3 = (
+ Te0[(t3 >> 24) ] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[(t2 ) & 0xff] ^
+ rk[p+3])
+
+ ciphertext = ''
+
+ # apply last round and
+ # map cipher state to byte array block:
s0 = (
- Te0[(t0 >> 24) ] ^
- Te1[(t1 >> 16) & 0xff] ^
- Te2[(t2 >> 8) & 0xff] ^
- Te3[(t3 ) & 0xff] ^
+ (Te4[(t0 >> 24) ] & 0xff000000) ^
+ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t3 ) & 0xff] & 0x000000ff) ^
rk[p+0])
+ ciphertext += PUTU32(s0)
s1 = (
- Te0[(t1 >> 24) ] ^
- Te1[(t2 >> 16) & 0xff] ^
- Te2[(t3 >> 8) & 0xff] ^
- Te3[(t0 ) & 0xff] ^
+ (Te4[(t1 >> 24) ] & 0xff000000) ^
+ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t0 ) & 0xff] & 0x000000ff) ^
rk[p+1])
+ ciphertext += PUTU32(s1)
s2 = (
- Te0[(t2 >> 24) ] ^
- Te1[(t3 >> 16) & 0xff] ^
- Te2[(t0 >> 8) & 0xff] ^
- Te3[(t1 ) & 0xff] ^
+ (Te4[(t2 >> 24) ] & 0xff000000) ^
+ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t1 ) & 0xff] & 0x000000ff) ^
rk[p+2])
+ ciphertext += PUTU32(s2)
s3 = (
- Te0[(t3 >> 24) ] ^
- Te1[(t0 >> 16) & 0xff] ^
- Te2[(t1 >> 8) & 0xff] ^
- Te3[(t2 ) & 0xff] ^
+ (Te4[(t3 >> 24) ] & 0xff000000) ^
+ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t2 ) & 0xff] & 0x000000ff) ^
rk[p+3])
+ ciphertext += PUTU32(s3)
- ciphertext = ''
-
- # apply last round and
- # map cipher state to byte array block:
- s0 = (
- (Te4[(t0 >> 24) ] & 0xff000000) ^
- (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t3 ) & 0xff] & 0x000000ff) ^
- rk[p+0])
- ciphertext += PUTU32(s0)
- s1 = (
- (Te4[(t1 >> 24) ] & 0xff000000) ^
- (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t0 ) & 0xff] & 0x000000ff) ^
- rk[p+1])
- ciphertext += PUTU32(s1)
- s2 = (
- (Te4[(t2 >> 24) ] & 0xff000000) ^
- (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t1 ) & 0xff] & 0x000000ff) ^
- rk[p+2])
- ciphertext += PUTU32(s2)
- s3 = (
- (Te4[(t3 >> 24) ] & 0xff000000) ^
- (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t2 ) & 0xff] & 0x000000ff) ^
- rk[p+3])
- ciphertext += PUTU32(s3)
-
- assert len(ciphertext) == 16
- return ciphertext
+ assert len(ciphertext) == 16
+ return ciphertext
def rijndaelDecrypt(rk, nrounds, ciphertext):
- assert len(ciphertext) == 16
-
- # map byte array block to cipher state
- # and add initial round key:
- s0 = GETU32(ciphertext[0:4]) ^ rk[0]
- s1 = GETU32(ciphertext[4:8]) ^ rk[1]
- s2 = GETU32(ciphertext[8:12]) ^ rk[2]
- s3 = GETU32(ciphertext[12:16]) ^ rk[3]
-
- # nrounds - 1 full rounds:
- r = nrounds >> 1
- p = 0
- while 1:
- t0 = (
- Td0[(s0 >> 24) ] ^
- Td1[(s3 >> 16) & 0xff] ^
- Td2[(s2 >> 8) & 0xff] ^
- Td3[(s1 ) & 0xff] ^
- rk[p+4])
- t1 = (
- Td0[(s1 >> 24) ] ^
- Td1[(s0 >> 16) & 0xff] ^
- Td2[(s3 >> 8) & 0xff] ^
- Td3[(s2 ) & 0xff] ^
- rk[p+5])
- t2 = (
- Td0[(s2 >> 24) ] ^
- Td1[(s1 >> 16) & 0xff] ^
- Td2[(s0 >> 8) & 0xff] ^
- Td3[(s3 ) & 0xff] ^
- rk[p+6])
- t3 = (
- Td0[(s3 >> 24) ] ^
- Td1[(s2 >> 16) & 0xff] ^
- Td2[(s1 >> 8) & 0xff] ^
- Td3[(s0 ) & 0xff] ^
- rk[p+7])
- p += 8
- r -= 1
- if r == 0: break
+ assert len(ciphertext) == 16
+
+ # map byte array block to cipher state
+ # and add initial round key:
+ s0 = GETU32(ciphertext[0:4]) ^ rk[0]
+ s1 = GETU32(ciphertext[4:8]) ^ rk[1]
+ s2 = GETU32(ciphertext[8:12]) ^ rk[2]
+ s3 = GETU32(ciphertext[12:16]) ^ rk[3]
+
+ # nrounds - 1 full rounds:
+ r = nrounds >> 1
+ p = 0
+ while 1:
+ t0 = (
+ Td0[(s0 >> 24) ] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1 ) & 0xff] ^
+ rk[p+4])
+ t1 = (
+ Td0[(s1 >> 24) ] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2 ) & 0xff] ^
+ rk[p+5])
+ t2 = (
+ Td0[(s2 >> 24) ] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3 ) & 0xff] ^
+ rk[p+6])
+ t3 = (
+ Td0[(s3 >> 24) ] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0 ) & 0xff] ^
+ rk[p+7])
+ p += 8
+ r -= 1
+ if r == 0: break
+ s0 = (
+ Td0[(t0 >> 24) ] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[(t1 ) & 0xff] ^
+ rk[p+0])
+ s1 = (
+ Td0[(t1 >> 24) ] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[(t2 ) & 0xff] ^
+ rk[p+1])
+ s2 = (
+ Td0[(t2 >> 24) ] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[(t3 ) & 0xff] ^
+ rk[p+2])
+ s3 = (
+ Td0[(t3 >> 24) ] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[(t0 ) & 0xff] ^
+ rk[p+3])
+
+ plaintext = ''
+
+ # apply last round and
+ # map cipher state to byte array block:
s0 = (
- Td0[(t0 >> 24) ] ^
- Td1[(t3 >> 16) & 0xff] ^
- Td2[(t2 >> 8) & 0xff] ^
- Td3[(t1 ) & 0xff] ^
+ (Td4[(t0 >> 24) ] & 0xff000000) ^
+ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t1 ) & 0xff] & 0x000000ff) ^
rk[p+0])
+ plaintext += PUTU32(s0)
s1 = (
- Td0[(t1 >> 24) ] ^
- Td1[(t0 >> 16) & 0xff] ^
- Td2[(t3 >> 8) & 0xff] ^
- Td3[(t2 ) & 0xff] ^
+ (Td4[(t1 >> 24) ] & 0xff000000) ^
+ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t2 ) & 0xff] & 0x000000ff) ^
rk[p+1])
+ plaintext += PUTU32(s1)
s2 = (
- Td0[(t2 >> 24) ] ^
- Td1[(t1 >> 16) & 0xff] ^
- Td2[(t0 >> 8) & 0xff] ^
- Td3[(t3 ) & 0xff] ^
+ (Td4[(t2 >> 24) ] & 0xff000000) ^
+ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t3 ) & 0xff] & 0x000000ff) ^
rk[p+2])
+ plaintext += PUTU32(s2)
s3 = (
- Td0[(t3 >> 24) ] ^
- Td1[(t2 >> 16) & 0xff] ^
- Td2[(t1 >> 8) & 0xff] ^
- Td3[(t0 ) & 0xff] ^
+ (Td4[(t3 >> 24) ] & 0xff000000) ^
+ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t0 ) & 0xff] & 0x000000ff) ^
rk[p+3])
+ plaintext += PUTU32(s3)
- plaintext = ''
-
- # apply last round and
- # map cipher state to byte array block:
- s0 = (
- (Td4[(t0 >> 24) ] & 0xff000000) ^
- (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t1 ) & 0xff] & 0x000000ff) ^
- rk[p+0])
- plaintext += PUTU32(s0)
- s1 = (
- (Td4[(t1 >> 24) ] & 0xff000000) ^
- (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t2 ) & 0xff] & 0x000000ff) ^
- rk[p+1])
- plaintext += PUTU32(s1)
- s2 = (
- (Td4[(t2 >> 24) ] & 0xff000000) ^
- (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t3 ) & 0xff] & 0x000000ff) ^
- rk[p+2])
- plaintext += PUTU32(s2)
- s3 = (
- (Td4[(t3 >> 24) ] & 0xff000000) ^
- (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t0 ) & 0xff] & 0x000000ff) ^
- rk[p+3])
- plaintext += PUTU32(s3)
-
- assert len(plaintext) == 16
- return plaintext
+ assert len(plaintext) == 16
+ return plaintext
# decrypt(key, fin, fout, keybits=256)
class RijndaelDecryptor(object):
-
- def __init__(self, key, keybits=256):
- assert len(key) == KEYLENGTH(keybits)
- (self.rk, self.nrounds) = rijndaelSetupDecrypt(key, keybits)
- assert len(self.rk) == RKLENGTH(keybits)
- assert self.nrounds == NROUNDS(keybits)
- return
- def decrypt(self, ciphertext):
- assert len(ciphertext) == 16
- return rijndaelDecrypt(self.rk, self.nrounds, ciphertext)
+ def __init__(self, key, keybits=256):
+ assert len(key) == KEYLENGTH(keybits)
+ (self.rk, self.nrounds) = rijndaelSetupDecrypt(key, keybits)
+ assert len(self.rk) == RKLENGTH(keybits)
+ assert self.nrounds == NROUNDS(keybits)
+ return
+
+ def decrypt(self, ciphertext):
+ assert len(ciphertext) == 16
+ return rijndaelDecrypt(self.rk, self.nrounds, ciphertext)
# encrypt(key, fin, fout, keybits=256)
class RijndaelEncryptor(object):
- def __init__(self, key, keybits=256):
- assert len(key) == KEYLENGTH(keybits)
- (self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits)
- assert len(self.rk) == RKLENGTH(keybits)
- assert self.nrounds == NROUNDS(keybits)
- return
+ def __init__(self, key, keybits=256):
+ assert len(key) == KEYLENGTH(keybits)
+ (self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits)
+ assert len(self.rk) == RKLENGTH(keybits)
+ assert self.nrounds == NROUNDS(keybits)
+ return
- def encrypt(self, plaintext):
- assert len(plaintext) == 16
- return rijndaelEncrypt(self.rk, self.nrounds, plaintext)
+ def encrypt(self, plaintext):
+ assert len(plaintext) == 16
+ return rijndaelEncrypt(self.rk, self.nrounds, plaintext)
def main(argv):
- # test
- key = '00010203050607080A0B0C0D0F101112'.decode('hex')
- plaintext = '506812A45F08C889B97F5980038B8359'.decode('hex')
- ciphertext = 'D8F532538289EF7D06B506A4FD5BE9C9'.decode('hex')
- e = RijndaelEncryptor(key, 128)
- text = e.encrypt(plaintext)
- assert text == ciphertext
- d = RijndaelDecryptor(key, 128)
- text = d.decrypt(ciphertext)
- assert text == plaintext
- return 0
+ # test
+ key = '00010203050607080A0B0C0D0F101112'.decode('hex')
+ plaintext = '506812A45F08C889B97F5980038B8359'.decode('hex')
+ ciphertext = 'D8F532538289EF7D06B506A4FD5BE9C9'.decode('hex')
+ e = RijndaelEncryptor(key, 128)
+ text = e.encrypt(plaintext)
+ assert text == ciphertext
+ d = RijndaelDecryptor(key, 128)
+ text = d.decrypt(ciphertext)
+ assert text == plaintext
+ return 0
if __name__ == '__main__': sys.exit(main(sys.argv))
diff --git a/pdfminer/utils.py b/pdfminer/utils.py
index 42aeef5..c743885 100644
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@@ -7,21 +7,21 @@ from struct import unpack
MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
def mult_matrix((a1,b1,c1,d1,e1,f1), (a0,b0,c0,d0,e0,f0)):
- '''Multiplies two matrices.'''
- return (a0*a1+c0*b1, b0*a1+d0*b1,
- a0*c1+c0*d1, b0*c1+d0*d1,
- a0*e1+c0*f1+e0, b0*e1+d0*f1+f0)
+ '''Multiplies two matrices.'''
+ return (a0*a1+c0*b1, b0*a1+d0*b1,
+ a0*c1+c0*d1, b0*c1+d0*d1,
+ a0*e1+c0*f1+e0, b0*e1+d0*f1+f0)
def translate_matrix((a,b,c,d,e,f), (x,y)):
- return (a,b,c,d,x*a+y*c+e,x*b+y*d+f)
-
+ return (a,b,c,d,x*a+y*c+e,x*b+y*d+f)
+
def apply_matrix_pt((a,b,c,d,e,f), (x,y)):
- '''Applies a matrix to a point.'''
- return (a*x+c*y+e, b*x+d*y+f)
+ '''Applies a matrix to a point.'''
+ return (a*x+c*y+e, b*x+d*y+f)
def apply_matrix_norm((a,b,c,d,e,f), (p,q)):
- '''Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))'''
- return (a*p+c*q, b*p+d*q)
+ '''Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))'''
+ return (a*p+c*q, b*p+d*q)
## Utility functions
@@ -29,62 +29,62 @@ def apply_matrix_norm((a,b,c,d,e,f), (p,q)):
# pick
def pick(seq, func, maxobj=None):
- '''Picks the object that has the highest value of func(obj).'''
- maxscore = None
- for obj in seq:
- score = func(obj)
- if maxscore == None or maxscore < score:
- (maxscore,maxobj) = (score,obj)
- return maxobj
+ '''Picks the object that has the highest value of func(obj).'''
+ maxscore = None
+ for obj in seq:
+ score = func(obj)
+ if maxscore == None or maxscore < score:
+ (maxscore,maxobj) = (score,obj)
+ return maxobj
# bsearch
def bsearch(objs, v0):
- '''Tries to find the closest value to v0.'''
- i0 = 0
- i1 = len(objs)
- while i0 < i1:
- i = (i0+i1)/2
- (v, obj) = objs[i]
- if v0 == v:
- (i0,i1) = (i,i+1)
- while 0 < i0 and objs[i0-1][0] == v0:
- i0 -= 1
- while i1 < len(objs)-1 and objs[i1][0] == v0:
- i1 += 1
- break
- elif v0 < v:
- i1 = i
- else:
- i0 = i+1
- return (i0,i1)
+ '''Tries to find the closest value to v0.'''
+ i0 = 0
+ i1 = len(objs)
+ while i0 < i1:
+ i = (i0+i1)/2
+ (v, obj) = objs[i]
+ if v0 == v:
+ (i0,i1) = (i,i+1)
+ while 0 < i0 and objs[i0-1][0] == v0:
+ i0 -= 1
+ while i1 < len(objs)-1 and objs[i1][0] == v0:
+ i1 += 1
+ break
+ elif v0 < v:
+ i1 = i
+ else:
+ i0 = i+1
+ return (i0,i1)
# choplist
def choplist(n, seq):
- '''Groups every n elements of the list.'''
- r = []
- for x in seq:
- r.append(x)
- if len(r) == n:
- yield tuple(r)
- r = []
- return
+ '''Groups every n elements of the list.'''
+ r = []
+ for x in seq:
+ r.append(x)
+ if len(r) == n:
+ yield tuple(r)
+ r = []
+ return
# nunpack
def nunpack(s, default=0):
- '''Unpacks up to 4 bytes big endian.'''
- l = len(s)
- if not l:
- return default
- elif l == 1:
- return ord(s)
- elif l == 2:
- return unpack('>H', s)[0]
- elif l == 3:
- return unpack('>L', '\x00'+s)[0]
- elif l == 4:
- return unpack('>L', s)[0]
- else:
- return TypeError('invalid length: %d' % l)
+ '''Unpacks up to 4 bytes big endian.'''
+ l = len(s)
+ if not l:
+ return default
+ elif l == 1:
+ return ord(s)
+ elif l == 2:
+ return unpack('>H', s)[0]
+ elif l == 3:
+ return unpack('>L', '\x00'+s)[0]
+ elif l == 4:
+ return unpack('>L', s)[0]
+ else:
+ return TypeError('invalid length: %d' % l)
# decode_text
PDFDocEncoding = ''.join( unichr(x) for x in (
@@ -122,14 +122,14 @@ PDFDocEncoding = ''.join( unichr(x) for x in (
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
))
def decode_text(s):
- '''Decodes a PDFDocEncoding string to Unicode.'''
- if s.startswith('\xfe\xff'):
- return unicode(s[2:], 'utf-16be', 'ignore')
- else:
- return ''.join( PDFDocEncoding[ord(c)] for c in s )
+ '''Decodes a PDFDocEncoding string to Unicode.'''
+ if s.startswith('\xfe\xff'):
+ return unicode(s[2:], 'utf-16be', 'ignore')
+ else:
+ return ''.join( PDFDocEncoding[ord(c)] for c in s )
# enc
def enc(x, codec='ascii'):
- '''Encodes a string for SGML/XML/HTML'''
- x = x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
- return x.encode(codec, 'xmlcharrefreplace')
+ '''Encodes a string for SGML/XML/HTML'''
+ x = x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
+ return x.encode(codec, 'xmlcharrefreplace')
diff --git a/setup.py b/setup.py
index dbda6a6..5a7b2ab 100644
--- a/setup.py
+++ b/setup.py
@@ -3,10 +3,10 @@ from distutils.core import setup
from pdfminer import __version__
setup(
- name='pdfminer',
- version=__version__,
- description='PDF parser and analyzer',
- long_description='''PDFMiner is a suite of programs that help
+ name='pdfminer',
+ version=__version__,
+ description='PDF parser and analyzer',
+ long_description='''PDFMiner is a suite of programs that help
extracting and analyzing text data of PDF documents.
Unlike other PDF-related tools, it allows to obtain
the exact location of texts in a page, as well as
@@ -14,23 +14,23 @@ other extra information such as font information or ruled lines.
It includes a PDF converter that can transform PDF files
into other text formats (such as HTML). It has an extensible
PDF parser that can be used for other purposes instead of text analysis.''',
- license='MIT/X',
- author='Yusuke Shinyama',
- author_email='yusuke at cs dot nyu dot edu',
- url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
- packages=[
+ license='MIT/X',
+ author='Yusuke Shinyama',
+ author_email='yusuke at cs dot nyu dot edu',
+ url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
+ packages=[
'pdfminer'
- ],
- scripts=[
+ ],
+ scripts=[
'tools/pdf2txt.py',
'tools/dumppdf.py'
],
- keywords=['pdf parser', 'pdf converter', 'text mining'],
- classifiers=[
+ keywords=['pdf parser', 'pdf converter', 'text mining'],
+ classifiers=[
'Development Status :: 4 - Beta',
'Environment :: Console',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
- ],
- )
+ ],
+ )
diff --git a/tools/conv_afm.py b/tools/conv_afm.py
index de76015..efc63ad 100755
--- a/tools/conv_afm.py
+++ b/tools/conv_afm.py
@@ -5,38 +5,38 @@ stdout = sys.stdout
stderr = sys.stderr
def main(argv):
- fonts = {}
- for line in fileinput.input():
- f = line.strip().split(' ')
- if not f: continue
- k = f[0]
- if k == 'FontName':
- fontname = f[1]
- props = {'FontName': fontname, 'Flags': 0}
- chars = {}
- fonts[fontname] = (props, chars)
- elif k == 'C':
- cid = int(f[1])
- if 0 <= cid and cid <= 255:
- width = int(f[4])
- chars[cid] = width
- elif k in ('CapHeight', 'XHeight', 'ItalicAngle',
- 'Ascender', 'Descender'):
- k = {'Ascender':'Ascent', 'Descender':'Descent'}.get(k,k)
- props[k] = float(f[1])
- elif k in ('FontName', 'FamilyName', 'Weight'):
- k = {'FamilyName':'FontFamily', 'Weight':'FontWeight'}.get(k,k)
- props[k] = f[1]
- elif k == 'IsFixedPitch':
- if f[1].lower() == 'true':
- props['Flags'] = 64
- elif k == 'FontBBox':
- props[k] = tuple(map(float, f[1:5]))
- print '# -*- python -*-'
- print 'FONT_METRICS = {'
- for (fontname,(props,chars)) in fonts.iteritems():
- print ' %r: %r,' % (fontname, (props,chars))
- print '}'
- return 0
+ fonts = {}
+ for line in fileinput.input():
+ f = line.strip().split(' ')
+ if not f: continue
+ k = f[0]
+ if k == 'FontName':
+ fontname = f[1]
+ props = {'FontName': fontname, 'Flags': 0}
+ chars = {}
+ fonts[fontname] = (props, chars)
+ elif k == 'C':
+ cid = int(f[1])
+ if 0 <= cid and cid <= 255:
+ width = int(f[4])
+ chars[cid] = width
+ elif k in ('CapHeight', 'XHeight', 'ItalicAngle',
+ 'Ascender', 'Descender'):
+ k = {'Ascender':'Ascent', 'Descender':'Descent'}.get(k,k)
+ props[k] = float(f[1])
+ elif k in ('FontName', 'FamilyName', 'Weight'):
+ k = {'FamilyName':'FontFamily', 'Weight':'FontWeight'}.get(k,k)
+ props[k] = f[1]
+ elif k == 'IsFixedPitch':
+ if f[1].lower() == 'true':
+ props['Flags'] = 64
+ elif k == 'FontBBox':
+ props[k] = tuple(map(float, f[1:5]))
+ print '# -*- python -*-'
+ print 'FONT_METRICS = {'
+ for (fontname,(props,chars)) in fonts.iteritems():
+ print ' %r: %r,' % (fontname, (props,chars))
+ print '}'
+ return 0
if __name__ == '__main__': sys.exit(main(sys.argv))
diff --git a/tools/dumppdf.py b/tools/dumppdf.py
index 3003555..b8fbbbf 100755
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@@ -13,173 +13,173 @@ from pdfminer.pdftypes import PDFStream, PDFObjRef, PSKeyword, PSLiteral, resolv
ESC_PAT = re.compile(r'[\000-\037&<>()\042\047\134\177-\377]')
def esc(s):
- return ESC_PAT.sub(lambda m:'%d;' % ord(m.group(0)), s)
+ return ESC_PAT.sub(lambda m:'%d;' % ord(m.group(0)), s)
# dumpxml
def dumpxml(out, obj, codec=None):
- if isinstance(obj, dict):
- out.write('\n' % len(obj))
- for (k,v) in obj.iteritems():
- out.write('%s\n' % k)
- out.write('')
- dumpxml(out, v)
- out.write('\n')
- out.write('')
- return
+ if isinstance(obj, dict):
+ out.write('\n' % len(obj))
+ for (k,v) in obj.iteritems():
+ out.write('%s\n' % k)
+ out.write('')
+ dumpxml(out, v)
+ out.write('\n')
+ out.write('')
+ return
- if isinstance(obj, list):
- out.write('\n' % len(obj))
- for v in obj:
- dumpxml(out, v)
- out.write('\n')
- out.write('
')
- return
+ if isinstance(obj, list):
+ out.write('\n' % len(obj))
+ for v in obj:
+ dumpxml(out, v)
+ out.write('\n')
+ out.write('
')
+ return
- if isinstance(obj, str):
- out.write('%s' % (len(obj), esc(obj)))
- return
+ if isinstance(obj, str):
+ out.write('%s' % (len(obj), esc(obj)))
+ return
- if isinstance(obj, PDFStream):
- out.write('\n\n')
- dumpxml(out, obj.dic)
- out.write('\n\n')
- if codec == 'text':
- data = obj.get_data()
- out.write('%s\n' % (len(data), esc(data)))
- out.write('')
- return
+ if isinstance(obj, PDFStream):
+ out.write('\n\n')
+ dumpxml(out, obj.dic)
+ out.write('\n\n')
+ if codec == 'text':
+ data = obj.get_data()
+ out.write('%s\n' % (len(data), esc(data)))
+ out.write('')
+ return
- if isinstance(obj, PDFObjRef):
- out.write('' % obj.objid)
- return
+ if isinstance(obj, PDFObjRef):
+ out.write('' % obj.objid)
+ return
- if isinstance(obj, PSKeyword):
- out.write('%s' % obj.name)
- return
+ if isinstance(obj, PSKeyword):
+ out.write('%s' % obj.name)
+ return
- if isinstance(obj, PSLiteral):
- out.write('%s' % obj.name)
- return
+ if isinstance(obj, PSLiteral):
+ out.write('%s' % obj.name)
+ return
- if isinstance(obj, int) or isinstance(obj, float):
- out.write('%s' % obj)
- return
+ if isinstance(obj, int) or isinstance(obj, float):
+ out.write('%s' % obj)
+ return
- raise TypeError(obj)
+ raise TypeError(obj)
# dumptrailers
def dumptrailers(out, doc):
- for xref in doc.xrefs:
- out.write('\n')
- dumpxml(out, xref.trailer)
- out.write('\n\n\n')
- return
+ for xref in doc.xrefs:
+ out.write('\n')
+ dumpxml(out, xref.trailer)
+ out.write('\n\n\n')
+ return
# dumpallobjs
def dumpallobjs(out, doc, codec=None):
- out.write('')
- for xref in doc.xrefs:
- for objid in xref.objids():
- try:
- obj = doc.getobj(objid)
- if obj == None: continue
- out.write('\n\n')
- except:
- raise
- dumptrailers(out, doc)
- out.write('')
- return
+ out.write('')
+ for xref in doc.xrefs:
+ for objid in xref.objids():
+ try:
+ obj = doc.getobj(objid)
+ if obj == None: continue
+ out.write('\n\n')
+ except:
+ raise
+ dumptrailers(out, doc)
+ out.write('')
+ return
# dumpoutline
def dumpoutline(outfp, fname, objids, pagenos, password='',
dumpall=False, codec=None):
- doc = PDFDocument()
- fp = file(fname, 'rb')
- parser = PDFParser(doc, fp)
- doc.initialize(password)
- pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
- for (level,title,dest,a,se) in doc.get_outlines():
- pageno = None
- if dest:
- dest = resolve1( doc.lookup_name('Dests', dest) )
- if isinstance(dest, dict):
- dest = dest['D']
- pageno = pages[dest[0].objid]
- outfp.write(repr((level,title,dest,pageno))+'\n')
- parser.close()
- fp.close()
- return
+ doc = PDFDocument()
+ fp = file(fname, 'rb')
+ parser = PDFParser(doc, fp)
+ doc.initialize(password)
+ pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
+ for (level,title,dest,a,se) in doc.get_outlines():
+ pageno = None
+ if dest:
+ dest = resolve1( doc.lookup_name('Dests', dest) )
+ if isinstance(dest, dict):
+ dest = dest['D']
+ pageno = pages[dest[0].objid]
+ outfp.write(repr((level,title,dest,pageno))+'\n')
+ parser.close()
+ fp.close()
+ return
# dumppdf
def dumppdf(outfp, fname, objids, pagenos, password='',
dumpall=False, codec=None):
- doc = PDFDocument()
- fp = file(fname, 'rb')
- parser = PDFParser(doc, fp)
- doc.initialize(password)
- if objids:
- for objid in objids:
- obj = doc.getobj(objid)
- if isinstance(obj, PDFStream) and codec == 'raw':
- outfp.write(obj.get_rawdata())
- elif isinstance(obj, PDFStream) and codec == 'binary':
- outfp.write(obj.get_data())
- else:
- dumpxml(outfp, obj, codec=codec)
- if pagenos:
- for (pageno,page) in enumerate(doc.get_pages()):
- if pageno in pagenos:
- dumpxml(outfp, page.attrs)
- if dumpall:
- dumpallobjs(outfp, doc, codec=codec)
- if (not objids) and (not pagenos) and (not dumpall):
- dumptrailers(outfp, doc)
- fp.close()
- if codec not in ('raw','binary'):
- outfp.write('\n')
- return
+ doc = PDFDocument()
+ fp = file(fname, 'rb')
+ parser = PDFParser(doc, fp)
+ doc.initialize(password)
+ if objids:
+ for objid in objids:
+ obj = doc.getobj(objid)
+ if isinstance(obj, PDFStream) and codec == 'raw':
+ outfp.write(obj.get_rawdata())
+ elif isinstance(obj, PDFStream) and codec == 'binary':
+ outfp.write(obj.get_data())
+ else:
+ dumpxml(outfp, obj, codec=codec)
+ if pagenos:
+ for (pageno,page) in enumerate(doc.get_pages()):
+ if pageno in pagenos:
+ dumpxml(outfp, page.attrs)
+ if dumpall:
+ dumpallobjs(outfp, doc, codec=codec)
+ if (not objids) and (not pagenos) and (not dumpall):
+ dumptrailers(outfp, doc)
+ fp.close()
+ if codec not in ('raw','binary'):
+ outfp.write('\n')
+ return
# main
def main(argv):
- import getopt
- def usage():
- print 'usage: %s [-d] [-a] [-p pageid] [-P password] [-r|-b|-t] [-T] [-i objid] file ...' % argv[0]
- return 100
- try:
- (opts, args) = getopt.getopt(argv[1:], 'dap:P:rbtTi:')
- except getopt.GetoptError:
- return usage()
- if not args: return usage()
- debug = 0
- objids = []
- pagenos = set()
- codec = None
- password = ''
- dumpall = False
- proc = dumppdf
- outfp = sys.stdout
- for (k, v) in opts:
- if k == '-d': debug += 1
- elif k == '-i': objids.extend( int(x) for x in v.split(',') )
- elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
- elif k == '-P': password = v
- elif k == '-a': dumpall = True
- elif k == '-r': codec = 'raw'
- elif k == '-b': codec = 'binary'
- elif k == '-t': codec = 'text'
- elif k == '-T': proc = dumpoutline
- elif k == '-o': outfp = file(v, 'wb')
- #
- PDFDocument.debug = debug
- PDFParser.debug = debug
- #
- for fname in args:
- proc(outfp, fname, objids, pagenos, password=password,
- dumpall=dumpall, codec=codec)
- return
+ import getopt
+ def usage():
+ print 'usage: %s [-d] [-a] [-p pageid] [-P password] [-r|-b|-t] [-T] [-i objid] file ...' % argv[0]
+ return 100
+ try:
+ (opts, args) = getopt.getopt(argv[1:], 'dap:P:rbtTi:')
+ except getopt.GetoptError:
+ return usage()
+ if not args: return usage()
+ debug = 0
+ objids = []
+ pagenos = set()
+ codec = None
+ password = ''
+ dumpall = False
+ proc = dumppdf
+ outfp = sys.stdout
+ for (k, v) in opts:
+ if k == '-d': debug += 1
+ elif k == '-i': objids.extend( int(x) for x in v.split(',') )
+ elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
+ elif k == '-P': password = v
+ elif k == '-a': dumpall = True
+ elif k == '-r': codec = 'raw'
+ elif k == '-b': codec = 'binary'
+ elif k == '-t': codec = 'text'
+ elif k == '-T': proc = dumpoutline
+ elif k == '-o': outfp = file(v, 'wb')
+ #
+ PDFDocument.debug = debug
+ PDFParser.debug = debug
+ #
+ for fname in args:
+ proc(outfp, fname, objids, pagenos, password=password,
+ dumpall=dumpall, codec=codec)
+ return
if __name__ == '__main__': sys.exit(main(sys.argv))
diff --git a/tools/pdf2html.cgi b/tools/pdf2html.cgi
index 15f9ecc..68de31c 100755
--- a/tools/pdf2html.cgi
+++ b/tools/pdf2html.cgi
@@ -12,7 +12,7 @@
# $ mkdir CGIDIR
# $ mkdir CGIDIR/var
# $ cp -a pdfminer/pdflib CGIDIR
-# $ PYTHONPATH=CGIDIR pdfminer/tools/pdf2html.cgi
+# $ PYTHONPATH=CGIDIR pdfminer/tools/pdf2html.cgi
#
import sys
@@ -27,16 +27,16 @@ from pdfminer.cmap import CMapDB
# quote HTML metacharacters
def q(x):
- return x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
+ return x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
# encode parameters as a URL
Q = re.compile(r'[^a-zA-Z0-9_.-=]')
def url(base, **kw):
- r = []
- for (k,v) in kw.iteritems():
- v = Q.sub(lambda m: '%%%02X' % ord(m.group(0)), encoder(q(v), 'replace')[0])
- r.append('%s=%s' % (k, v))
- return base+'&'.join(r)
+ r = []
+ for (k,v) in kw.iteritems():
+ v = Q.sub(lambda m: '%%%02X' % ord(m.group(0)), encoder(q(v), 'replace')[0])
+ r.append('%s=%s' % (k, v))
+ return base+'&'.join(r)
## convert
@@ -44,156 +44,156 @@ def url(base, **kw):
class FileSizeExceeded(ValueError): pass
def convert(outfp, infp, path, codec='utf-8', maxpages=10,
maxfilesize=5000000, pagenos=None, html=True):
- # save the input file.
- src = file(path, 'wb')
- nbytes = 0
- while 1:
- data = infp.read(4096)
- nbytes += len(data)
- if maxfilesize and maxfilesize < nbytes:
- raise FileSizeExceeded(maxfilesize)
- if not data: break
- src.write(data)
- src.close()
- infp.close()
- # perform conversion and
- # send the results over the network.
- CMapDB.initialize()
- rsrc = PDFResourceManager()
- laparams = LAParams()
- if html:
- device = HTMLConverter(rsrc, outfp, codec=codec, laparams=laparams)
- else:
- device = TextConverter(rsrc, outfp, codec=codec, laparams=laparams)
- fp = file(path, 'rb')
- process_pdf(rsrc, device, fp, pagenos, maxpages=maxpages)
- fp.close()
- return
+ # save the input file.
+ src = file(path, 'wb')
+ nbytes = 0
+ while 1:
+ data = infp.read(4096)
+ nbytes += len(data)
+ if maxfilesize and maxfilesize < nbytes:
+ raise FileSizeExceeded(maxfilesize)
+ if not data: break
+ src.write(data)
+ src.close()
+ infp.close()
+ # perform conversion and
+ # send the results over the network.
+ CMapDB.initialize()
+ rsrc = PDFResourceManager()
+ laparams = LAParams()
+ if html:
+ device = HTMLConverter(rsrc, outfp, codec=codec, laparams=laparams)
+ else:
+ device = TextConverter(rsrc, outfp, codec=codec, laparams=laparams)
+ fp = file(path, 'rb')
+ process_pdf(rsrc, device, fp, pagenos, maxpages=maxpages)
+ fp.close()
+ return
## PDF2HTMLApp
##
class PDF2HTMLApp(object):
- APPURL = '/convert'
- TMPDIR = './var/'
- LOGPATH = './var/log'
- MAXFILESIZE = 5000000
- MAXPAGES = 10
-
- def __init__(self, outfp, logpath=LOGPATH, loglevel=logging.DEBUG, codec='utf-8'):
- self.outfp = outfp
- self.codec = codec
- logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
- level=loglevel, filename=logpath, filemode='a')
- self.remote_addr = os.environ.get('REMOTE_ADDR')
- self.path_info = os.environ.get('PATH_INFO')
- self.method = os.environ.get('REQUEST_METHOD', 'GET')
- self.server = os.environ.get('SERVER_SOFTWARE', '')
- self.content_type = 'text/html; charset=%s' % codec
- self.cur_time = time.time()
- self.form = cgi.FieldStorage()
- return
+ APPURL = '/convert'
+ TMPDIR = './var/'
+ LOGPATH = './var/log'
+ MAXFILESIZE = 5000000
+ MAXPAGES = 10
- def put(self, *args):
- for x in args:
- if isinstance(x, str):
- self.outfp.write(x)
- elif isinstance(x, unicode):
- self.outfp.write(x.encode(self.codec, 'xmlcharrefreplace'))
- return
+ def __init__(self, outfp, logpath=LOGPATH, loglevel=logging.DEBUG, codec='utf-8'):
+ self.outfp = outfp
+ self.codec = codec
+ logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+ level=loglevel, filename=logpath, filemode='a')
+ self.remote_addr = os.environ.get('REMOTE_ADDR')
+ self.path_info = os.environ.get('PATH_INFO')
+ self.method = os.environ.get('REQUEST_METHOD', 'GET')
+ self.server = os.environ.get('SERVER_SOFTWARE', '')
+ self.content_type = 'text/html; charset=%s' % codec
+ self.cur_time = time.time()
+ self.form = cgi.FieldStorage()
+ return
- def http_200(self):
- if self.server.startswith('cgi-httpd'):
- # required for cgi-httpd
- self.outfp.write('HTTP/1.0 200 OK\r\n')
- self.outfp.write('Content-type: %s\r\n' % self.content_type)
- self.outfp.write('Connection: close\r\n\r\n')
- return
-
- def http_404(self):
- if self.server.startswith('cgi-httpd'):
- # required for cgi-httpd
- self.outfp.write('HTTP/1.0 404 Not Found\r\n')
- self.outfp.write('Content-type: text/html\r\n')
- self.outfp.write('Connection: close\r\n\r\n')
- self.outfp.write('page does not exist\n')
- return
-
- def http_301(self, url):
- if self.server.startswith('cgi-httpd'):
- # required for cgi-httpd
- self.outfp.write('HTTP/1.0 301 Moved\r\n')
- self.outfp.write('Location: %s\r\n\r\n' % url)
- return
+ def put(self, *args):
+ for x in args:
+ if isinstance(x, str):
+ self.outfp.write(x)
+ elif isinstance(x, unicode):
+ self.outfp.write(x.encode(self.codec, 'xmlcharrefreplace'))
+ return
- def coverpage(self):
- self.put(
- 'pdf2html demo\n',
- 'pdf2html demo
\n',
- '
\n',
- 'Powered by PDFMiner\n',
- '\n',
- )
- return
+ def http_200(self):
+ if self.server.startswith('cgi-httpd'):
+ # required for cgi-httpd
+ self.outfp.write('HTTP/1.0 200 OK\r\n')
+ self.outfp.write('Content-type: %s\r\n' % self.content_type)
+ self.outfp.write('Connection: close\r\n\r\n')
+ return
- def run(self, argv):
- if self.path_info == '/':
- self.http_200()
- self.coverpage()
- return
- if self.path_info != self.APPURL:
- self.http_404()
- return
- if not os.path.isdir(self.TMPDIR):
- self.bummer('error')
- return
- if 'f' not in self.form:
- self.http_301('/')
- return
- if 'c' not in self.form:
- self.http_301('/')
- return
- item = self.form['f']
- if not (item.file and item.filename):
- self.http_301('/')
- return
- cmd = self.form.getvalue('c')
- html = (cmd == 'Convert to HTML')
- pagenos = []
- if 'p' in self.form:
- for m in re.finditer(r'\d+', self.form.getvalue('p')):
+ def http_404(self):
+ if self.server.startswith('cgi-httpd'):
+ # required for cgi-httpd
+ self.outfp.write('HTTP/1.0 404 Not Found\r\n')
+ self.outfp.write('Content-type: text/html\r\n')
+ self.outfp.write('Connection: close\r\n\r\n')
+ self.outfp.write('
page does not exist\n')
+ return
+
+ def http_301(self, url):
+ if self.server.startswith('cgi-httpd'):
+ # required for cgi-httpd
+ self.outfp.write('HTTP/1.0 301 Moved\r\n')
+ self.outfp.write('Location: %s\r\n\r\n' % url)
+ return
+
+ def coverpage(self):
+ self.put(
+ 'pdf2html demo\n',
+ 'pdf2html demo
\n',
+ '
\n',
+ 'Powered by PDFMiner\n',
+ '\n',
+ )
+ return
+
+ def run(self, argv):
+ if self.path_info == '/':
+ self.http_200()
+ self.coverpage()
+ return
+ if self.path_info != self.APPURL:
+ self.http_404()
+ return
+ if not os.path.isdir(self.TMPDIR):
+ self.bummer('error')
+ return
+ if 'f' not in self.form:
+ self.http_301('/')
+ return
+ if 'c' not in self.form:
+ self.http_301('/')
+ return
+ item = self.form['f']
+ if not (item.file and item.filename):
+ self.http_301('/')
+ return
+ cmd = self.form.getvalue('c')
+ html = (cmd == 'Convert to HTML')
+ pagenos = []
+ if 'p' in self.form:
+ for m in re.finditer(r'\d+', self.form.getvalue('p')):
+ try:
+ pagenos.append(int(m.group(0)))
+ except ValueError:
+ pass
+ logging.info('process: host=%s, name=%r, pagenos=%r' % (self.remote_addr, item.filename, pagenos))
+ h = abs(hash((random.random(), self.remote_addr, item.filename)))
+ tmppath = os.path.join(self.TMPDIR, '%08x%08x.pdf' % (self.cur_time, h))
try:
- pagenos.append(int(m.group(0)))
- except ValueError:
- pass
- logging.info('process: host=%s, name=%r, pagenos=%r' % (self.remote_addr, item.filename, pagenos))
- h = abs(hash((random.random(), self.remote_addr, item.filename)))
- tmppath = os.path.join(self.TMPDIR, '%08x%08x.pdf' % (self.cur_time, h))
- try:
- try:
- if not html:
- self.content_type = 'text/plain; charset=%s' % self.codec
- self.http_200()
- convert(sys.stdout, item.file, tmppath, pagenos=pagenos, codec=self.codec,
- maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
- except Exception, e:
- self.put('
Sorry, an error has occured: %s' % q(repr(e)))
- logging.error('error: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
- finally:
- try:
- os.remove(tmppath)
- except:
- pass
- return
+ try:
+ if not html:
+ self.content_type = 'text/plain; charset=%s' % self.codec
+ self.http_200()
+ convert(sys.stdout, item.file, tmppath, pagenos=pagenos, codec=self.codec,
+ maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
+ except Exception, e:
+ self.put('
Sorry, an error has occured: %s' % q(repr(e)))
+ logging.error('error: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
+ finally:
+ try:
+ os.remove(tmppath)
+ except:
+ pass
+ return
# main
diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py
index 006e8ce..d61676f 100755
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@@ -9,85 +9,85 @@ from pdfminer.layout import LAParams
# main
def main(argv):
- import getopt
- def usage():
- print ('usage: %s [-d] [-p pagenos] [-P password] [-c codec] '
- '[-D direction] [-M char_margin] [-L line_margin] [-W word_margin] '
- '[-t text|html|sgml|tag] [-o output] file ...' % argv[0])
- return 100
- try:
- (opts, args) = getopt.getopt(argv[1:], 'dp:P:c:D:M:L:W:t:o:C:D:m:')
- except getopt.GetoptError:
- return usage()
- if not args: return usage()
- # debug option
- debug = 0
- # path option
- cmapdir = find_cmap_path()
- # input option
- password = ''
- pagenos = set()
- maxpages = 0
- # output option
- outfile = None
- outtype = None
- codec = 'utf-8'
- pageno = 1
- scale = 1
- showpageno = True
- laparams = LAParams()
- for (k, v) in opts:
- if k == '-d': debug += 1
- elif k == '-C': cmapdir = v
- elif k == '-P': password = v
- elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
- elif k == '-m': maxpages = int(v)
- elif k == '-t': outtype = v
- elif k == '-c': codec = v
- elif k == '-o': outfile = v
- elif k == '-s': scale = float(v)
- elif k == '-D': laparams.direction = v
- elif k == '-M': laparams.char_margin = float(v)
- elif k == '-L': laparams.line_margin = float(v)
- elif k == '-W': laparams.word_margin = float(v)
- #
- CMapDB.debug = debug
- PDFResourceManager.debug = debug
- PDFDocument.debug = debug
- PDFParser.debug = debug
- PDFPageInterpreter.debug = debug
- PDFDevice.debug = debug
- #
- CMapDB.initialize(cmapdir)
- rsrc = PDFResourceManager()
- if not outtype:
- outtype = 'text'
+ import getopt
+ def usage():
+ print ('usage: %s [-d] [-p pagenos] [-P password] [-c codec] '
+ '[-D direction] [-M char_margin] [-L line_margin] [-W word_margin] '
+ '[-t text|html|sgml|tag] [-o output] file ...' % argv[0])
+ return 100
+ try:
+ (opts, args) = getopt.getopt(argv[1:], 'dp:P:c:D:M:L:W:t:o:C:D:m:')
+ except getopt.GetoptError:
+ return usage()
+ if not args: return usage()
+ # debug option
+ debug = 0
+ # path option
+ cmapdir = find_cmap_path()
+ # input option
+ password = ''
+ pagenos = set()
+ maxpages = 0
+ # output option
+ outfile = None
+ outtype = None
+ codec = 'utf-8'
+ pageno = 1
+ scale = 1
+ showpageno = True
+ laparams = LAParams()
+ for (k, v) in opts:
+ if k == '-d': debug += 1
+ elif k == '-C': cmapdir = v
+ elif k == '-P': password = v
+ elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
+ elif k == '-m': maxpages = int(v)
+ elif k == '-t': outtype = v
+ elif k == '-c': codec = v
+ elif k == '-o': outfile = v
+ elif k == '-s': scale = float(v)
+ elif k == '-D': laparams.direction = v
+ elif k == '-M': laparams.char_margin = float(v)
+ elif k == '-L': laparams.line_margin = float(v)
+ elif k == '-W': laparams.word_margin = float(v)
+ #
+ CMapDB.debug = debug
+ PDFResourceManager.debug = debug
+ PDFDocument.debug = debug
+ PDFParser.debug = debug
+ PDFPageInterpreter.debug = debug
+ PDFDevice.debug = debug
+ #
+ CMapDB.initialize(cmapdir)
+ rsrc = PDFResourceManager()
+ if not outtype:
+ outtype = 'text'
+ if outfile:
+ if outfile.endswith('.htm') or outfile.endswith('.html'):
+ outtype = 'html'
+ elif outfile.endswith('.sgml'):
+ outtype = 'sgml'
+ elif outfile.endswith('.tag'):
+ outtype = 'tag'
if outfile:
- if outfile.endswith('.htm') or outfile.endswith('.html'):
- outtype = 'html'
- elif outfile.endswith('.sgml'):
- outtype = 'sgml'
- elif outfile.endswith('.tag'):
- outtype = 'tag'
- if outfile:
- outfp = file(outfile, 'w')
- else:
- outfp = sys.stdout
- if outtype == 'text':
- device = TextConverter(rsrc, outfp, codec=codec, laparams=laparams)
- elif outtype == 'sgml':
- device = SGMLConverter(rsrc, outfp, codec=codec, laparams=laparams)
- elif outtype == 'html':
- device = HTMLConverter(rsrc, outfp, codec=codec, scale=scale, laparams=laparams)
- elif outtype == 'tag':
- device = TagExtractor(rsrc, outfp, codec=codec)
- else:
- return usage()
- for fname in args:
- fp = file(fname, 'rb')
- process_pdf(rsrc, device, fp, pagenos, maxpages=maxpages, password=password)
- fp.close()
- device.close()
- return
+ outfp = file(outfile, 'w')
+ else:
+ outfp = sys.stdout
+ if outtype == 'text':
+ device = TextConverter(rsrc, outfp, codec=codec, laparams=laparams)
+ elif outtype == 'sgml':
+ device = SGMLConverter(rsrc, outfp, codec=codec, laparams=laparams)
+ elif outtype == 'html':
+ device = HTMLConverter(rsrc, outfp, codec=codec, scale=scale, laparams=laparams)
+ elif outtype == 'tag':
+ device = TagExtractor(rsrc, outfp, codec=codec)
+ else:
+ return usage()
+ for fname in args:
+ fp = file(fname, 'rb')
+ process_pdf(rsrc, device, fp, pagenos, maxpages=maxpages, password=password)
+ fp.close()
+ device.close()
+ return
if __name__ == '__main__': sys.exit(main(sys.argv))
diff --git a/tools/prof.py b/tools/prof.py
index 4041228..ae5d596 100644
--- a/tools/prof.py
+++ b/tools/prof.py
@@ -2,29 +2,29 @@
import sys
def prof_main(argv):
- import getopt
- import hotshot, hotshot.stats
- def usage():
- print 'usage: %s module.function [args ...]' % argv[0]
- return 100
- args = argv[1:]
- if len(args) < 1: return usage()
- name = args.pop(0)
- prof = name+'.prof'
- i = name.rindex('.')
- (modname, funcname) = (name[:i], name[i+1:])
- module = __import__(modname, fromlist=1)
- func = getattr(module, funcname)
- if args:
- args.insert(0, argv[0])
- prof = hotshot.Profile(prof)
- prof.runcall(lambda : func(args))
- prof.close()
- else:
- stats = hotshot.stats.load(prof)
- stats.strip_dirs()
- stats.sort_stats('time', 'calls')
- stats.print_stats(1000)
- return
-
+ import getopt
+ import hotshot, hotshot.stats
+ def usage():
+ print 'usage: %s module.function [args ...]' % argv[0]
+ return 100
+ args = argv[1:]
+ if len(args) < 1: return usage()
+ name = args.pop(0)
+ prof = name+'.prof'
+ i = name.rindex('.')
+ (modname, funcname) = (name[:i], name[i+1:])
+ module = __import__(modname, fromlist=1)
+ func = getattr(module, funcname)
+ if args:
+ args.insert(0, argv[0])
+ prof = hotshot.Profile(prof)
+ prof.runcall(lambda : func(args))
+ prof.close()
+ else:
+ stats = hotshot.stats.load(prof)
+ stats.strip_dirs()
+ stats.sort_stats('time', 'calls')
+ stats.print_stats(1000)
+ return
+
if __name__ == '__main__': sys.exit(prof_main(sys.argv))