diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index ffccff5..d2b0618 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -18,7 +18,7 @@ import os.path import gzip import cPickle as pickle import cmap -from struct import pack, unpack +import struct from psparser import PSStackParser from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF from psparser import PSLiteral, PSKeyword @@ -98,7 +98,7 @@ class IdentityCMap(object): def decode(self, code): n = len(code)/2 if n: - return unpack('>%dH' % n, code) + return struct.unpack('>%dH' % n, code) else: return () @@ -348,7 +348,7 @@ class CMapParser(PSStackParser): vlen = len(svar) #assert s1 <= e1 for i in xrange(e1-s1+1): - x = sprefix+pack('>L',s1+i)[-vlen:] + x = sprefix+struct.pack('>L',s1+i)[-vlen:] self.cmap.add_code2cid(x, cid+i) return @@ -382,7 +382,7 @@ class CMapParser(PSStackParser): prefix = code[:-4] vlen = len(var) for i in xrange(e1-s1+1): - x = prefix+pack('>L',base+i)[-vlen:] + x = prefix+struct.pack('>L',base+i)[-vlen:] self.cmap.add_cid2unichr(s1+i, x) return diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index 89b5f76..6d0eaf1 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -1,6 +1,5 @@ #!/usr/bin/env python2 import sys -from sys import stderr try: from cStringIO import StringIO except ImportError: @@ -84,8 +83,8 @@ class LZWDecoder(object): x = self.feed(code) yield x if self.debug: - print >>stderr, ('nbits=%d, code=%d, output=%r, table=%r' % - (self.nbits, code, x, self.table[258:])) + print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' % + (self.nbits, code, x, self.table[258:])) return # lzwdecode diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 2d45184..55e5c8a 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -1,12 +1,12 @@ #!/usr/bin/env python2 import sys +import struct try: from cStringIO import StringIO except ImportError: from StringIO import StringIO from cmapdb import CMapDB, CMapParser, FileUnicodeMap, CMap from encodingdb import EncodingDB, name2unicode -from struct import pack, unpack from psparser import PSStackParser from psparser import PSSyntaxError, PSEOF from psparser import LIT, KWD, STRICT @@ -154,7 +154,7 @@ def getdict(data): if b0 == 28: value = b1<<8 | b2 else: - value = b1<<24 | b2<<16 | unpack('>H', fp.read(2))[0] + value = b1<<24 | b2<<16 | struct.unpack('>H', fp.read(2))[0] stack.append(value) return d @@ -246,7 +246,7 @@ class CFFFont(object): def __init__(self, fp): self.fp = fp self.offsets = [] - (count, offsize) = unpack('>HB', self.fp.read(3)) + (count, offsize) = struct.unpack('>HB', self.fp.read(3)) for i in xrange(count+1): self.offsets.append(nunpack(self.fp.read(offsize))) self.base = self.fp.tell()-1 @@ -270,7 +270,7 @@ class CFFFont(object): self.name = name self.fp = fp # Header - (_major,_minor,hdrsize,offsize) = unpack('BBBB', self.fp.read(4)) + (_major,_minor,hdrsize,offsize) = struct.unpack('BBBB', self.fp.read(4)) self.fp.read(hdrsize-4) # Name INDEX self.name_index = self.INDEX(self.fp) @@ -296,16 +296,16 @@ class CFFFont(object): format = self.fp.read(1) if format == '\x00': # Format 0 - (n,) = unpack('B', self.fp.read(1)) - for (code,gid) in enumerate(unpack('B'*n, self.fp.read(n))): + (n,) = struct.unpack('B', self.fp.read(1)) + for (code,gid) in enumerate(struct.unpack('B'*n, self.fp.read(n))): self.code2gid[code] = gid self.gid2code[gid] = code elif format == '\x01': # Format 1 - (n,) = unpack('B', self.fp.read(1)) + (n,) = struct.unpack('B', self.fp.read(1)) code = 0 for i in xrange(n): - (first,nleft) = unpack('BB', self.fp.read(2)) + (first,nleft) = struct.unpack('BB', self.fp.read(2)) for gid in xrange(first,first+nleft+1): self.code2gid[code] = gid self.gid2code[gid] = code @@ -320,17 +320,17 @@ class CFFFont(object): if format == '\x00': # Format 0 n = self.nglyphs-1 - for (gid,sid) in enumerate(unpack('>'+'H'*n, self.fp.read(2*n))): + for (gid,sid) in enumerate(struct.unpack('>'+'H'*n, self.fp.read(2*n))): gid += 1 name = self.getstr(sid) self.name2gid[name] = gid self.gid2name[gid] = name elif format == '\x01': # Format 1 - (n,) = unpack('B', self.fp.read(1)) + (n,) = struct.unpack('B', self.fp.read(1)) sid = 0 for i in xrange(n): - (first,nleft) = unpack('BB', self.fp.read(2)) + (first,nleft) = struct.unpack('BB', self.fp.read(2)) for gid in xrange(first,first+nleft+1): name = self.getstr(sid) self.name2gid[name] = gid @@ -363,9 +363,9 @@ class TrueTypeFont(object): self.fp = fp self.tables = {} self.fonttype = fp.read(4) - (ntables, _1, _2, _3) = unpack('>HHHH', fp.read(8)) + (ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8)) for _ in xrange(ntables): - (name, tsum, offset, length) = unpack('>4sLLL', fp.read(16)) + (name, tsum, offset, length) = struct.unpack('>4sLLL', fp.read(16)) self.tables[name] = (offset, length) return @@ -375,50 +375,50 @@ class TrueTypeFont(object): (base_offset, length) = self.tables['cmap'] fp = self.fp fp.seek(base_offset) - (version, nsubtables) = unpack('>HH', fp.read(4)) + (version, nsubtables) = struct.unpack('>HH', fp.read(4)) subtables = [] for i in xrange(nsubtables): - subtables.append(unpack('>HHL', fp.read(8))) + subtables.append(struct.unpack('>HHL', fp.read(8))) char2gid = {} # Only supports subtable type 0, 2 and 4. for (_1, _2, st_offset) in subtables: fp.seek(base_offset+st_offset) - (fmttype, fmtlen, fmtlang) = unpack('>HHH', fp.read(6)) + (fmttype, fmtlen, fmtlang) = struct.unpack('>HHH', fp.read(6)) if fmttype == 0: - char2gid.update(enumerate(unpack('>256B', fp.read(256)))) + char2gid.update(enumerate(struct.unpack('>256B', fp.read(256)))) elif fmttype == 2: - subheaderkeys = unpack('>256H', fp.read(512)) + subheaderkeys = struct.unpack('>256H', fp.read(512)) firstbytes = [0]*8192 for (i,k) in enumerate(subheaderkeys): firstbytes[k/8] = i nhdrs = max(subheaderkeys)/8 + 1 hdrs = [] for i in xrange(nhdrs): - (firstcode,entcount,delta,offset) = unpack('>HHhH', fp.read(8)) + (firstcode,entcount,delta,offset) = struct.unpack('>HHhH', fp.read(8)) hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset)) for (i,firstcode,entcount,delta,pos) in hdrs: if not entcount: continue first = firstcode + (firstbytes[i] << 8) fp.seek(pos) for c in xrange(entcount): - gid = unpack('>H', fp.read(2)) + gid = struct.unpack('>H', fp.read(2)) if gid: gid += delta char2gid[first+c] = gid elif fmttype == 4: - (segcount, _1, _2, _3) = unpack('>HHHH', fp.read(8)) + (segcount, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8)) segcount /= 2 - ecs = unpack('>%dH' % segcount, fp.read(2*segcount)) + ecs = struct.unpack('>%dH' % segcount, fp.read(2*segcount)) fp.read(2) - scs = unpack('>%dH' % segcount, fp.read(2*segcount)) - idds = unpack('>%dh' % segcount, fp.read(2*segcount)) + scs = struct.unpack('>%dH' % segcount, fp.read(2*segcount)) + idds = struct.unpack('>%dh' % segcount, fp.read(2*segcount)) pos = fp.tell() - idrs = unpack('>%dH' % segcount, fp.read(2*segcount)) + idrs = struct.unpack('>%dH' % segcount, fp.read(2*segcount)) for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs): if idr: fp.seek(pos+idr) for c in xrange(sc, ec+1): - char2gid[c] = (unpack('>H', fp.read(2))[0] + idd) & 0xffff + char2gid[c] = (struct.unpack('>H', fp.read(2))[0] + idd) & 0xffff else: for c in xrange(sc, ec+1): char2gid[c] = (c + idd) & 0xffff diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 1c66f9e..d24300a 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -1,7 +1,6 @@ #!/usr/bin/env python2 +import sys import re -from sys import stderr -from struct import pack, unpack try: from cStringIO import StringIO except ImportError: @@ -159,7 +158,7 @@ class PDFResourceManager(object): font = self.fonts[objid] else: if 2 <= self.debug: - print >>stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec) + print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec) if STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') @@ -329,7 +328,7 @@ class PDFPageInterpreter(object): return PREDEFINED_COLORSPACE[name] for (k,v) in dict_value(resources).iteritems(): if 2 <= self.debug: - print >>stderr, 'Resource: %r: %r' % (k,v) + print >>sys.stderr, 'Resource: %r: %r' % (k,v) if k == 'Font': for (fontid,spec) in dict_value(v).iteritems(): objid = None @@ -649,7 +648,7 @@ class PDFPageInterpreter(object): (a,b,c,d,e,f) = self.textstate.matrix self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f) self.textstate.linematrix = (0, 0) - #print >>stderr, 'Td(%r,%r): %r' % (tx,ty,self.textstate) + #print >>sys.stderr, 'Td(%r,%r): %r' % (tx,ty,self.textstate) return # text-move def do_TD(self, tx, ty): @@ -657,7 +656,7 @@ class PDFPageInterpreter(object): self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f) self.textstate.leading = ty self.textstate.linematrix = (0, 0) - #print >>stderr, 'TD(%r,%r): %r' % (tx,ty,self.textstate) + #print >>sys.stderr, 'TD(%r,%r): %r' % (tx,ty,self.textstate) return # textmatrix def do_Tm(self, a,b,c,d,e,f): @@ -673,7 +672,7 @@ class PDFPageInterpreter(object): # show-pos def do_TJ(self, seq): - #print >>stderr, 'TJ(%r): %r' % (seq,self.textstate) + #print >>sys.stderr, 'TJ(%r): %r' % (seq,self.textstate) if self.textstate.font is None: if STRICT: raise PDFInterpreterError('No font specified!') @@ -719,7 +718,7 @@ class PDFPageInterpreter(object): raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return if 1 <= self.debug: - print >>stderr, 'Processing xobj: %r' % xobj + print >>sys.stderr, 'Processing xobj: %r' % xobj subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() @@ -743,7 +742,7 @@ class PDFPageInterpreter(object): def process_page(self, page): if 1 <= self.debug: - print >>stderr, 'Processing page: %r' % page + print >>sys.stderr, 'Processing page: %r' % page (x0,y0,x1,y1) = page.mediabox if page.rotate == 90: ctm = (0,-1,1,0, -y0,x1) @@ -763,7 +762,7 @@ class PDFPageInterpreter(object): # This method may be called recursively. def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): if 1 <= self.debug: - print >>stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' % + print >>sys.stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' % (resources, streams, ctm)) self.init_resources(resources) self.init_state(ctm) @@ -790,12 +789,12 @@ class PDFPageInterpreter(object): if nargs: args = self.pop(nargs) if 2 <= self.debug: - print >>stderr, 'exec: %s %r' % (name, args) + print >>sys.stderr, 'exec: %s %r' % (name, args) if len(args) == nargs: func(*args) else: if 2 <= self.debug: - print >>stderr, 'exec: %s' % (name) + print >>sys.stderr, 'exec: %s' % (name) func() else: if STRICT: diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index a39b058..9ce7fdd 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -2,7 +2,6 @@ import sys import re import struct -from sys import stderr try: import hashlib as md5 except ImportError: @@ -96,7 +95,7 @@ class PDFXRef(PDFBaseXRef): if use != 'n': continue self.offsets[objid] = (int(genno), long(pos)) if 1 <= debug: - print >>stderr, 'xref objects:', self.offsets + print >>sys.stderr, 'xref objects:', self.offsets self.load_trailer(parser) return @@ -126,7 +125,7 @@ class PDFXRef(PDFBaseXRef): parser.seek(pos) self.load_trailer(parser) if 1 <= debug: - print >>stderr, 'trailer: %r' % self.get_trailer() + print >>sys.stderr, 'trailer: %r' % self.get_trailer() break m = self.PDFOBJ_CUE.match(line) if not m: continue @@ -180,7 +179,7 @@ class PDFXRefStream(PDFBaseXRef): self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs if 1 <= debug: - print >>stderr, ('xref stream: objid=%s, fields=%d,%d,%d' % + print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' % (', '.join(map(repr, self.objid_ranges)), self.fl1, self.fl2, self.fl3)) return @@ -408,7 +407,7 @@ class PDFDocument(object): if not self.xrefs: raise PDFException('PDFDocument is not initialized') if 2 <= self.debug: - print >>stderr, 'getobj: objid=%r' % (objid) + print >>sys.stderr, 'getobj: objid=%r' % (objid) if objid in self.objs: genno = 0 obj = self.objs[objid] @@ -481,7 +480,7 @@ class PDFDocument(object): except PSEOF: return None if 2 <= self.debug: - print >>stderr, 'register: objid=%r: %r' % (objid, obj) + print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj) self.objs[objid] = obj if self.decipher: obj = decipher_all(self.decipher, objid, genno, obj) @@ -503,13 +502,13 @@ class PDFDocument(object): tree[k] = v if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: if 1 <= self.debug: - print >>stderr, 'Pages: Kids=%r' % tree['Kids'] + print >>sys.stderr, 'Pages: Kids=%r' % tree['Kids'] for c in list_value(tree['Kids']): for x in search(c, tree): yield x elif tree.get('Type') is LITERAL_PAGE: if 1 <= self.debug: - print >>stderr, 'Page: %r' % tree + print >>sys.stderr, 'Page: %r' % tree yield (objid, tree) if 'Pages' not in self.catalog: return for (pageid,tree) in search(self.catalog['Pages'], self.catalog): @@ -672,7 +671,7 @@ class PDFParser(PSStackParser): self.seek(pos+objlen) # XXX limit objlen not to exceed object boundary if 2 <= self.debug: - print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ + print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ (pos, objlen, dic, data[:10]) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) @@ -690,14 +689,14 @@ class PDFParser(PSStackParser): for line in self.revreadlines(): line = line.strip() if 2 <= self.debug: - print >>stderr, 'find_xref: %r' % line + print >>sys.stderr, 'find_xref: %r' % line if line == 'startxref': break if line: prev = line else: raise PDFNoValidXRef('Unexpected EOF') if 1 <= self.debug: - print >>stderr, 'xref found: pos=%r' % prev + print >>sys.stderr, 'xref found: pos=%r' % prev return long(prev) # read xref table @@ -710,7 +709,7 @@ class PDFParser(PSStackParser): except PSEOF: raise PDFNoValidXRef('Unexpected EOF') if 2 <= self.debug: - print >>stderr, 'read_xref_from: start=%d, token=%r' % (start, token) + print >>sys.stderr, 'read_xref_from: start=%d, token=%r' % (start, token) if isinstance(token, int): # XRefStream: PDF-1.5 self.seek(pos) @@ -725,7 +724,7 @@ class PDFParser(PSStackParser): xrefs.append(xref) trailer = xref.get_trailer() if 1 <= self.debug: - print >>stderr, 'trailer: %r' % trailer + print >>sys.stderr, 'trailer: %r' % trailer if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(pos, xrefs) @@ -745,7 +744,7 @@ class PDFParser(PSStackParser): except PDFNoValidXRef: # fallback if 1 <= self.debug: - print >>stderr, 'no xref, fallback' + print >>sys.stderr, 'no xref, fallback' self.fallback = True xref = PDFXRef() xref.load_fallback(self) diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 7dd9f28..d008fa2 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -1,7 +1,6 @@ #!/usr/bin/env python2 import sys import re -from sys import stderr from utils import choplist STRICT = 0 @@ -169,7 +168,7 @@ class PSBaseParser(object): if not pos: pos = self.bufpos+self.charpos self.fp.seek(pos) - print >>stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) + print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) self.fp.seek(pos0) return @@ -177,7 +176,7 @@ class PSBaseParser(object): """Seeks the parser to the given position. """ if 2 <= self.debug: - print >>stderr, 'seek: %r' % pos + print >>sys.stderr, 'seek: %r' % pos self.fp.seek(pos) # reset the status for nextline() self.bufpos = pos @@ -227,7 +226,7 @@ class PSBaseParser(object): linebuf += self.buf[self.charpos:] self.charpos = len(self.buf) if 2 <= self.debug: - print >>stderr, 'nextline: %r' % ((linepos, linebuf),) + print >>sys.stderr, 'nextline: %r' % ((linepos, linebuf),) return (linepos, linebuf) def revreadlines(self): @@ -466,7 +465,7 @@ class PSBaseParser(object): self.charpos = self._parse1(self.buf, self.charpos) token = self._tokens.pop(0) if 2 <= self.debug: - print >>stderr, 'nexttoken: %r' % (token,) + print >>sys.stderr, 'nexttoken: %r' % (token,) return token @@ -507,7 +506,7 @@ class PSStackParser(PSBaseParser): def add_results(self, *objs): if 2 <= self.debug: - print >>stderr, 'add_results: %r' % (objs,) + print >>sys.stderr, 'add_results: %r' % (objs,) self.results.extend(objs) return @@ -515,7 +514,7 @@ class PSStackParser(PSBaseParser): self.context.append((pos, self.curtype, self.curstack)) (self.curtype, self.curstack) = (type, []) if 2 <= self.debug: - print >>stderr, 'start_type: pos=%r, type=%r' % (pos, type) + print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type) return def end_type(self, type): @@ -524,7 +523,7 @@ class PSStackParser(PSBaseParser): objs = [ obj for (_,obj) in self.curstack ] (pos, self.curtype, self.curstack) = self.context.pop() if 2 <= self.debug: - print >>stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs) + print >>sys.stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs) return (pos, objs) def do_keyword(self, pos, token): @@ -580,7 +579,7 @@ class PSStackParser(PSBaseParser): if STRICT: raise else: if 2 <= self.debug: - print >>stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \ + print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \ (pos, token, self.curstack) self.do_keyword(pos, token) if self.context: @@ -589,7 +588,7 @@ class PSStackParser(PSBaseParser): self.flush() obj = self.results.pop(0) if 2 <= self.debug: - print >>stderr, 'nextobject: %r' % (obj,) + print >>sys.stderr, 'nextobject: %r' % (obj,) return obj diff --git a/pdfminer/rijndael.py b/pdfminer/rijndael.py index 5bd8c49..85cac59 100644 --- a/pdfminer/rijndael.py +++ b/pdfminer/rijndael.py @@ -11,7 +11,7 @@ by Philip J. Erdelsky: """ import sys -from struct import pack, unpack +import struct def KEYLENGTH(keybits): return (keybits)/8 def RKLENGTH(keybits): return (keybits)/8+28 @@ -694,14 +694,14 @@ rcon = [ # 128-bit blocks, Rijndael never uses more than 10 rcon values ] -if len(pack('L',0)) == 4: +if len(struct.pack('L',0)) == 4: # 32bit - def GETU32(x): return unpack('>L', x)[0] - def PUTU32(x): return pack('>L', x) + def GETU32(x): return struct.unpack('>L', x)[0] + def PUTU32(x): return struct.pack('>L', x) else: # 64bit - def GETU32(x): return unpack('>I', x)[0] - def PUTU32(x): return pack('>I', x) + def GETU32(x): return struct.unpack('>I', x)[0] + def PUTU32(x): return struct.pack('>I', x) # Expand the cipher key into the encryption key schedule. # diff --git a/pdfminer/utils.py b/pdfminer/utils.py index feddfa1..d3d0b76 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -2,8 +2,8 @@ """ Miscellaneous Routines. """ +import struct from sys import maxint as INF -from struct import pack, unpack ## Matrix operations @@ -107,11 +107,11 @@ def nunpack(s, default=0): elif l == 1: return ord(s) elif l == 2: - return unpack('>H', s)[0] + return struct.unpack('>H', s)[0] elif l == 3: - return unpack('>L', '\x00'+s)[0] + return struct.unpack('>L', '\x00'+s)[0] elif l == 4: - return unpack('>L', s)[0] + return struct.unpack('>L', s)[0] else: raise TypeError('invalid length: %d' % l) @@ -247,9 +247,9 @@ class Plane(object): # create_bmp def create_bmp(data, bits, width, height): - info = pack('