canonicalize package names

pull/1/head
Yusuke Shinyama 2011-03-02 23:43:03 +09:00
parent bb26cf9180
commit 18e782f330
8 changed files with 77 additions and 81 deletions

View File

@ -18,7 +18,7 @@ import os.path
import gzip
import cPickle as pickle
import cmap
from struct import pack, unpack
import struct
from psparser import PSStackParser
from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF
from psparser import PSLiteral, PSKeyword
@ -98,7 +98,7 @@ class IdentityCMap(object):
def decode(self, code):
n = len(code)/2
if n:
return unpack('>%dH' % n, code)
return struct.unpack('>%dH' % n, code)
else:
return ()
@ -348,7 +348,7 @@ class CMapParser(PSStackParser):
vlen = len(svar)
#assert s1 <= e1
for i in xrange(e1-s1+1):
x = sprefix+pack('>L',s1+i)[-vlen:]
x = sprefix+struct.pack('>L',s1+i)[-vlen:]
self.cmap.add_code2cid(x, cid+i)
return
@ -382,7 +382,7 @@ class CMapParser(PSStackParser):
prefix = code[:-4]
vlen = len(var)
for i in xrange(e1-s1+1):
x = prefix+pack('>L',base+i)[-vlen:]
x = prefix+struct.pack('>L',base+i)[-vlen:]
self.cmap.add_cid2unichr(s1+i, x)
return

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python2
import sys
from sys import stderr
try:
from cStringIO import StringIO
except ImportError:
@ -84,8 +83,8 @@ class LZWDecoder(object):
x = self.feed(code)
yield x
if self.debug:
print >>stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
(self.nbits, code, x, self.table[258:]))
print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
(self.nbits, code, x, self.table[258:]))
return
# lzwdecode

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python2
import sys
import struct
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from cmapdb import CMapDB, CMapParser, FileUnicodeMap, CMap
from encodingdb import EncodingDB, name2unicode
from struct import pack, unpack
from psparser import PSStackParser
from psparser import PSSyntaxError, PSEOF
from psparser import LIT, KWD, STRICT
@ -154,7 +154,7 @@ def getdict(data):
if b0 == 28:
value = b1<<8 | b2
else:
value = b1<<24 | b2<<16 | unpack('>H', fp.read(2))[0]
value = b1<<24 | b2<<16 | struct.unpack('>H', fp.read(2))[0]
stack.append(value)
return d
@ -246,7 +246,7 @@ class CFFFont(object):
def __init__(self, fp):
self.fp = fp
self.offsets = []
(count, offsize) = unpack('>HB', self.fp.read(3))
(count, offsize) = struct.unpack('>HB', self.fp.read(3))
for i in xrange(count+1):
self.offsets.append(nunpack(self.fp.read(offsize)))
self.base = self.fp.tell()-1
@ -270,7 +270,7 @@ class CFFFont(object):
self.name = name
self.fp = fp
# Header
(_major,_minor,hdrsize,offsize) = unpack('BBBB', self.fp.read(4))
(_major,_minor,hdrsize,offsize) = struct.unpack('BBBB', self.fp.read(4))
self.fp.read(hdrsize-4)
# Name INDEX
self.name_index = self.INDEX(self.fp)
@ -296,16 +296,16 @@ class CFFFont(object):
format = self.fp.read(1)
if format == '\x00':
# Format 0
(n,) = unpack('B', self.fp.read(1))
for (code,gid) in enumerate(unpack('B'*n, self.fp.read(n))):
(n,) = struct.unpack('B', self.fp.read(1))
for (code,gid) in enumerate(struct.unpack('B'*n, self.fp.read(n))):
self.code2gid[code] = gid
self.gid2code[gid] = code
elif format == '\x01':
# Format 1
(n,) = unpack('B', self.fp.read(1))
(n,) = struct.unpack('B', self.fp.read(1))
code = 0
for i in xrange(n):
(first,nleft) = unpack('BB', self.fp.read(2))
(first,nleft) = struct.unpack('BB', self.fp.read(2))
for gid in xrange(first,first+nleft+1):
self.code2gid[code] = gid
self.gid2code[gid] = code
@ -320,17 +320,17 @@ class CFFFont(object):
if format == '\x00':
# Format 0
n = self.nglyphs-1
for (gid,sid) in enumerate(unpack('>'+'H'*n, self.fp.read(2*n))):
for (gid,sid) in enumerate(struct.unpack('>'+'H'*n, self.fp.read(2*n))):
gid += 1
name = self.getstr(sid)
self.name2gid[name] = gid
self.gid2name[gid] = name
elif format == '\x01':
# Format 1
(n,) = unpack('B', self.fp.read(1))
(n,) = struct.unpack('B', self.fp.read(1))
sid = 0
for i in xrange(n):
(first,nleft) = unpack('BB', self.fp.read(2))
(first,nleft) = struct.unpack('BB', self.fp.read(2))
for gid in xrange(first,first+nleft+1):
name = self.getstr(sid)
self.name2gid[name] = gid
@ -363,9 +363,9 @@ class TrueTypeFont(object):
self.fp = fp
self.tables = {}
self.fonttype = fp.read(4)
(ntables, _1, _2, _3) = unpack('>HHHH', fp.read(8))
(ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
for _ in xrange(ntables):
(name, tsum, offset, length) = unpack('>4sLLL', fp.read(16))
(name, tsum, offset, length) = struct.unpack('>4sLLL', fp.read(16))
self.tables[name] = (offset, length)
return
@ -375,50 +375,50 @@ class TrueTypeFont(object):
(base_offset, length) = self.tables['cmap']
fp = self.fp
fp.seek(base_offset)
(version, nsubtables) = unpack('>HH', fp.read(4))
(version, nsubtables) = struct.unpack('>HH', fp.read(4))
subtables = []
for i in xrange(nsubtables):
subtables.append(unpack('>HHL', fp.read(8)))
subtables.append(struct.unpack('>HHL', fp.read(8)))
char2gid = {}
# Only supports subtable type 0, 2 and 4.
for (_1, _2, st_offset) in subtables:
fp.seek(base_offset+st_offset)
(fmttype, fmtlen, fmtlang) = unpack('>HHH', fp.read(6))
(fmttype, fmtlen, fmtlang) = struct.unpack('>HHH', fp.read(6))
if fmttype == 0:
char2gid.update(enumerate(unpack('>256B', fp.read(256))))
char2gid.update(enumerate(struct.unpack('>256B', fp.read(256))))
elif fmttype == 2:
subheaderkeys = unpack('>256H', fp.read(512))
subheaderkeys = struct.unpack('>256H', fp.read(512))
firstbytes = [0]*8192
for (i,k) in enumerate(subheaderkeys):
firstbytes[k/8] = i
nhdrs = max(subheaderkeys)/8 + 1
hdrs = []
for i in xrange(nhdrs):
(firstcode,entcount,delta,offset) = unpack('>HHhH', fp.read(8))
(firstcode,entcount,delta,offset) = struct.unpack('>HHhH', fp.read(8))
hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset))
for (i,firstcode,entcount,delta,pos) in hdrs:
if not entcount: continue
first = firstcode + (firstbytes[i] << 8)
fp.seek(pos)
for c in xrange(entcount):
gid = unpack('>H', fp.read(2))
gid = struct.unpack('>H', fp.read(2))
if gid:
gid += delta
char2gid[first+c] = gid
elif fmttype == 4:
(segcount, _1, _2, _3) = unpack('>HHHH', fp.read(8))
(segcount, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
segcount /= 2
ecs = unpack('>%dH' % segcount, fp.read(2*segcount))
ecs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
fp.read(2)
scs = unpack('>%dH' % segcount, fp.read(2*segcount))
idds = unpack('>%dh' % segcount, fp.read(2*segcount))
scs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
idds = struct.unpack('>%dh' % segcount, fp.read(2*segcount))
pos = fp.tell()
idrs = unpack('>%dH' % segcount, fp.read(2*segcount))
idrs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs):
if idr:
fp.seek(pos+idr)
for c in xrange(sc, ec+1):
char2gid[c] = (unpack('>H', fp.read(2))[0] + idd) & 0xffff
char2gid[c] = (struct.unpack('>H', fp.read(2))[0] + idd) & 0xffff
else:
for c in xrange(sc, ec+1):
char2gid[c] = (c + idd) & 0xffff

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python2
import sys
import re
from sys import stderr
from struct import pack, unpack
try:
from cStringIO import StringIO
except ImportError:
@ -159,7 +158,7 @@ class PDFResourceManager(object):
font = self.fonts[objid]
else:
if 2 <= self.debug:
print >>stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec)
print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec)
if STRICT:
if spec['Type'] is not LITERAL_FONT:
raise PDFFontError('Type is not /Font')
@ -329,7 +328,7 @@ class PDFPageInterpreter(object):
return PREDEFINED_COLORSPACE[name]
for (k,v) in dict_value(resources).iteritems():
if 2 <= self.debug:
print >>stderr, 'Resource: %r: %r' % (k,v)
print >>sys.stderr, 'Resource: %r: %r' % (k,v)
if k == 'Font':
for (fontid,spec) in dict_value(v).iteritems():
objid = None
@ -649,7 +648,7 @@ class PDFPageInterpreter(object):
(a,b,c,d,e,f) = self.textstate.matrix
self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f)
self.textstate.linematrix = (0, 0)
#print >>stderr, 'Td(%r,%r): %r' % (tx,ty,self.textstate)
#print >>sys.stderr, 'Td(%r,%r): %r' % (tx,ty,self.textstate)
return
# text-move
def do_TD(self, tx, ty):
@ -657,7 +656,7 @@ class PDFPageInterpreter(object):
self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f)
self.textstate.leading = ty
self.textstate.linematrix = (0, 0)
#print >>stderr, 'TD(%r,%r): %r' % (tx,ty,self.textstate)
#print >>sys.stderr, 'TD(%r,%r): %r' % (tx,ty,self.textstate)
return
# textmatrix
def do_Tm(self, a,b,c,d,e,f):
@ -673,7 +672,7 @@ class PDFPageInterpreter(object):
# show-pos
def do_TJ(self, seq):
#print >>stderr, 'TJ(%r): %r' % (seq,self.textstate)
#print >>sys.stderr, 'TJ(%r): %r' % (seq,self.textstate)
if self.textstate.font is None:
if STRICT:
raise PDFInterpreterError('No font specified!')
@ -719,7 +718,7 @@ class PDFPageInterpreter(object):
raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
return
if 1 <= self.debug:
print >>stderr, 'Processing xobj: %r' % xobj
print >>sys.stderr, 'Processing xobj: %r' % xobj
subtype = xobj.get('Subtype')
if subtype is LITERAL_FORM and 'BBox' in xobj:
interpreter = self.dup()
@ -743,7 +742,7 @@ class PDFPageInterpreter(object):
def process_page(self, page):
if 1 <= self.debug:
print >>stderr, 'Processing page: %r' % page
print >>sys.stderr, 'Processing page: %r' % page
(x0,y0,x1,y1) = page.mediabox
if page.rotate == 90:
ctm = (0,-1,1,0, -y0,x1)
@ -763,7 +762,7 @@ class PDFPageInterpreter(object):
# This method may be called recursively.
def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
if 1 <= self.debug:
print >>stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' %
print >>sys.stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' %
(resources, streams, ctm))
self.init_resources(resources)
self.init_state(ctm)
@ -790,12 +789,12 @@ class PDFPageInterpreter(object):
if nargs:
args = self.pop(nargs)
if 2 <= self.debug:
print >>stderr, 'exec: %s %r' % (name, args)
print >>sys.stderr, 'exec: %s %r' % (name, args)
if len(args) == nargs:
func(*args)
else:
if 2 <= self.debug:
print >>stderr, 'exec: %s' % (name)
print >>sys.stderr, 'exec: %s' % (name)
func()
else:
if STRICT:

View File

@ -2,7 +2,6 @@
import sys
import re
import struct
from sys import stderr
try:
import hashlib as md5
except ImportError:
@ -96,7 +95,7 @@ class PDFXRef(PDFBaseXRef):
if use != 'n': continue
self.offsets[objid] = (int(genno), long(pos))
if 1 <= debug:
print >>stderr, 'xref objects:', self.offsets
print >>sys.stderr, 'xref objects:', self.offsets
self.load_trailer(parser)
return
@ -126,7 +125,7 @@ class PDFXRef(PDFBaseXRef):
parser.seek(pos)
self.load_trailer(parser)
if 1 <= debug:
print >>stderr, 'trailer: %r' % self.get_trailer()
print >>sys.stderr, 'trailer: %r' % self.get_trailer()
break
m = self.PDFOBJ_CUE.match(line)
if not m: continue
@ -180,7 +179,7 @@ class PDFXRefStream(PDFBaseXRef):
self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.attrs
if 1 <= debug:
print >>stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
(', '.join(map(repr, self.objid_ranges)),
self.fl1, self.fl2, self.fl3))
return
@ -408,7 +407,7 @@ class PDFDocument(object):
if not self.xrefs:
raise PDFException('PDFDocument is not initialized')
if 2 <= self.debug:
print >>stderr, 'getobj: objid=%r' % (objid)
print >>sys.stderr, 'getobj: objid=%r' % (objid)
if objid in self.objs:
genno = 0
obj = self.objs[objid]
@ -481,7 +480,7 @@ class PDFDocument(object):
except PSEOF:
return None
if 2 <= self.debug:
print >>stderr, 'register: objid=%r: %r' % (objid, obj)
print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj)
self.objs[objid] = obj
if self.decipher:
obj = decipher_all(self.decipher, objid, genno, obj)
@ -503,13 +502,13 @@ class PDFDocument(object):
tree[k] = v
if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
if 1 <= self.debug:
print >>stderr, 'Pages: Kids=%r' % tree['Kids']
print >>sys.stderr, 'Pages: Kids=%r' % tree['Kids']
for c in list_value(tree['Kids']):
for x in search(c, tree):
yield x
elif tree.get('Type') is LITERAL_PAGE:
if 1 <= self.debug:
print >>stderr, 'Page: %r' % tree
print >>sys.stderr, 'Page: %r' % tree
yield (objid, tree)
if 'Pages' not in self.catalog: return
for (pageid,tree) in search(self.catalog['Pages'], self.catalog):
@ -672,7 +671,7 @@ class PDFParser(PSStackParser):
self.seek(pos+objlen)
# XXX limit objlen not to exceed object boundary
if 2 <= self.debug:
print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
(pos, objlen, dic, data[:10])
obj = PDFStream(dic, data, self.doc.decipher)
self.push((pos, obj))
@ -690,14 +689,14 @@ class PDFParser(PSStackParser):
for line in self.revreadlines():
line = line.strip()
if 2 <= self.debug:
print >>stderr, 'find_xref: %r' % line
print >>sys.stderr, 'find_xref: %r' % line
if line == 'startxref': break
if line:
prev = line
else:
raise PDFNoValidXRef('Unexpected EOF')
if 1 <= self.debug:
print >>stderr, 'xref found: pos=%r' % prev
print >>sys.stderr, 'xref found: pos=%r' % prev
return long(prev)
# read xref table
@ -710,7 +709,7 @@ class PDFParser(PSStackParser):
except PSEOF:
raise PDFNoValidXRef('Unexpected EOF')
if 2 <= self.debug:
print >>stderr, 'read_xref_from: start=%d, token=%r' % (start, token)
print >>sys.stderr, 'read_xref_from: start=%d, token=%r' % (start, token)
if isinstance(token, int):
# XRefStream: PDF-1.5
self.seek(pos)
@ -725,7 +724,7 @@ class PDFParser(PSStackParser):
xrefs.append(xref)
trailer = xref.get_trailer()
if 1 <= self.debug:
print >>stderr, 'trailer: %r' % trailer
print >>sys.stderr, 'trailer: %r' % trailer
if 'XRefStm' in trailer:
pos = int_value(trailer['XRefStm'])
self.read_xref_from(pos, xrefs)
@ -745,7 +744,7 @@ class PDFParser(PSStackParser):
except PDFNoValidXRef:
# fallback
if 1 <= self.debug:
print >>stderr, 'no xref, fallback'
print >>sys.stderr, 'no xref, fallback'
self.fallback = True
xref = PDFXRef()
xref.load_fallback(self)

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python2
import sys
import re
from sys import stderr
from utils import choplist
STRICT = 0
@ -169,7 +168,7 @@ class PSBaseParser(object):
if not pos:
pos = self.bufpos+self.charpos
self.fp.seek(pos)
print >>stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
self.fp.seek(pos0)
return
@ -177,7 +176,7 @@ class PSBaseParser(object):
"""Seeks the parser to the given position.
"""
if 2 <= self.debug:
print >>stderr, 'seek: %r' % pos
print >>sys.stderr, 'seek: %r' % pos
self.fp.seek(pos)
# reset the status for nextline()
self.bufpos = pos
@ -227,7 +226,7 @@ class PSBaseParser(object):
linebuf += self.buf[self.charpos:]
self.charpos = len(self.buf)
if 2 <= self.debug:
print >>stderr, 'nextline: %r' % ((linepos, linebuf),)
print >>sys.stderr, 'nextline: %r' % ((linepos, linebuf),)
return (linepos, linebuf)
def revreadlines(self):
@ -466,7 +465,7 @@ class PSBaseParser(object):
self.charpos = self._parse1(self.buf, self.charpos)
token = self._tokens.pop(0)
if 2 <= self.debug:
print >>stderr, 'nexttoken: %r' % (token,)
print >>sys.stderr, 'nexttoken: %r' % (token,)
return token
@ -507,7 +506,7 @@ class PSStackParser(PSBaseParser):
def add_results(self, *objs):
if 2 <= self.debug:
print >>stderr, 'add_results: %r' % (objs,)
print >>sys.stderr, 'add_results: %r' % (objs,)
self.results.extend(objs)
return
@ -515,7 +514,7 @@ class PSStackParser(PSBaseParser):
self.context.append((pos, self.curtype, self.curstack))
(self.curtype, self.curstack) = (type, [])
if 2 <= self.debug:
print >>stderr, 'start_type: pos=%r, type=%r' % (pos, type)
print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type)
return
def end_type(self, type):
@ -524,7 +523,7 @@ class PSStackParser(PSBaseParser):
objs = [ obj for (_,obj) in self.curstack ]
(pos, self.curtype, self.curstack) = self.context.pop()
if 2 <= self.debug:
print >>stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)
print >>sys.stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)
return (pos, objs)
def do_keyword(self, pos, token):
@ -580,7 +579,7 @@ class PSStackParser(PSBaseParser):
if STRICT: raise
else:
if 2 <= self.debug:
print >>stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
(pos, token, self.curstack)
self.do_keyword(pos, token)
if self.context:
@ -589,7 +588,7 @@ class PSStackParser(PSBaseParser):
self.flush()
obj = self.results.pop(0)
if 2 <= self.debug:
print >>stderr, 'nextobject: %r' % (obj,)
print >>sys.stderr, 'nextobject: %r' % (obj,)
return obj

View File

@ -11,7 +11,7 @@ by Philip J. Erdelsky:
"""
import sys
from struct import pack, unpack
import struct
def KEYLENGTH(keybits): return (keybits)/8
def RKLENGTH(keybits): return (keybits)/8+28
@ -694,14 +694,14 @@ rcon = [
# 128-bit blocks, Rijndael never uses more than 10 rcon values
]
if len(pack('L',0)) == 4:
if len(struct.pack('L',0)) == 4:
# 32bit
def GETU32(x): return unpack('>L', x)[0]
def PUTU32(x): return pack('>L', x)
def GETU32(x): return struct.unpack('>L', x)[0]
def PUTU32(x): return struct.pack('>L', x)
else:
# 64bit
def GETU32(x): return unpack('>I', x)[0]
def PUTU32(x): return pack('>I', x)
def GETU32(x): return struct.unpack('>I', x)[0]
def PUTU32(x): return struct.pack('>I', x)
# Expand the cipher key into the encryption key schedule.
#

View File

@ -2,8 +2,8 @@
"""
Miscellaneous Routines.
"""
import struct
from sys import maxint as INF
from struct import pack, unpack
## Matrix operations
@ -107,11 +107,11 @@ def nunpack(s, default=0):
elif l == 1:
return ord(s)
elif l == 2:
return unpack('>H', s)[0]
return struct.unpack('>H', s)[0]
elif l == 3:
return unpack('>L', '\x00'+s)[0]
return struct.unpack('>L', '\x00'+s)[0]
elif l == 4:
return unpack('>L', s)[0]
return struct.unpack('>L', s)[0]
else:
raise TypeError('invalid length: %d' % l)
@ -247,9 +247,9 @@ class Plane(object):
# create_bmp
def create_bmp(data, bits, width, height):
info = pack('<IiiHHIIIIII', 40, width, height, 1, bits, 0, len(data), 0, 0, 0, 0)
info = struct.pack('<IiiHHIIIIII', 40, width, height, 1, bits, 0, len(data), 0, 0, 0, 0)
assert len(info) == 40, len(info)
header = pack('<ccIHHI', 'B', 'M', 14+40+len(data), 0, 0, 14+40)
header = struct.pack('<ccIHHI', 'B', 'M', 14+40+len(data), 0, 0, 14+40)
assert len(header) == 14, len(header)
# XXX re-rasterize every line
return header+info+data