String-Bytes distinction (first attempt).
parent
8791355e1d
commit
1ccfaff411
|
@ -12,11 +12,11 @@ This code is in the public domain.
|
||||||
class Arcfour(object):
|
class Arcfour(object):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
>>> Arcfour('Key').process('Plaintext').encode('hex')
|
>>> Arcfour(b'Key').process(b'Plaintext').encode('hex')
|
||||||
'bbf316e8d940af0ad3'
|
'bbf316e8d940af0ad3'
|
||||||
>>> Arcfour('Wiki').process('pedia').encode('hex')
|
>>> Arcfour(b'Wiki').process(b'pedia').encode('hex')
|
||||||
'1021bf0420'
|
'1021bf0420'
|
||||||
>>> Arcfour('Secret').process('Attack at dawn').encode('hex')
|
>>> Arcfour(b'Secret').process(b'Attack at dawn').encode('hex')
|
||||||
'45a01f645fc35b383552544b9bf5'
|
'45a01f645fc35b383552544b9bf5'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ class Arcfour(object):
|
||||||
def process(self, data):
|
def process(self, data):
|
||||||
(i, j) = (self.i, self.j)
|
(i, j) = (self.i, self.j)
|
||||||
s = self.s
|
s = self.s
|
||||||
r = ''
|
r = b''
|
||||||
for c in data:
|
for c in data:
|
||||||
i = (i+1) % 256
|
i = (i+1) % 256
|
||||||
j = (j+s[i]) % 256
|
j = (j+s[i]) % 256
|
||||||
|
|
|
@ -24,24 +24,24 @@ def ascii85decode(data):
|
||||||
The sample string is taken from:
|
The sample string is taken from:
|
||||||
http://en.wikipedia.org/w/index.php?title=Ascii85
|
http://en.wikipedia.org/w/index.php?title=Ascii85
|
||||||
|
|
||||||
>>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
|
>>> ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q')
|
||||||
'Man is distinguished'
|
'Man is distinguished'
|
||||||
>>> ascii85decode('E,9)oF*2M7/c~>')
|
>>> ascii85decode(b'E,9)oF*2M7/c~>')
|
||||||
'pleasure.'
|
'pleasure.'
|
||||||
"""
|
"""
|
||||||
n = b = 0
|
n = b = 0
|
||||||
out = ''
|
out = b''
|
||||||
for c in data:
|
for c in data:
|
||||||
if '!' <= c and c <= 'u':
|
if b'!' <= c and c <= b'u':
|
||||||
n += 1
|
n += 1
|
||||||
b = b*85+(ord(c)-33)
|
b = b*85+(ord(c)-33)
|
||||||
if n == 5:
|
if n == 5:
|
||||||
out += struct.pack('>L', b)
|
out += struct.pack('>L', b)
|
||||||
n = b = 0
|
n = b = 0
|
||||||
elif c == 'z':
|
elif c == b'z':
|
||||||
assert n == 0
|
assert n == 0
|
||||||
out += '\0\0\0\0'
|
out += b'\0\0\0\0'
|
||||||
elif c == '~':
|
elif c == b'~':
|
||||||
if n:
|
if n:
|
||||||
for _ in range(5-n):
|
for _ in range(5-n):
|
||||||
b = b*85+84
|
b = b*85+84
|
||||||
|
@ -64,19 +64,19 @@ def asciihexdecode(data):
|
||||||
the EOD marker after reading an odd number of hexadecimal digits, it
|
the EOD marker after reading an odd number of hexadecimal digits, it
|
||||||
will behave as if a 0 followed the last digit.
|
will behave as if a 0 followed the last digit.
|
||||||
|
|
||||||
>>> asciihexdecode('61 62 2e6364 65')
|
>>> asciihexdecode(b'61 62 2e6364 65')
|
||||||
'ab.cde'
|
'ab.cde'
|
||||||
>>> asciihexdecode('61 62 2e6364 657>')
|
>>> asciihexdecode(b'61 62 2e6364 657>')
|
||||||
'ab.cdep'
|
'ab.cdep'
|
||||||
>>> asciihexdecode('7>')
|
>>> asciihexdecode(b'7>')
|
||||||
'p'
|
'p'
|
||||||
"""
|
"""
|
||||||
decode = (lambda hx: chr(int(hx, 16)))
|
decode = (lambda hx: chr(int(hx, 16)))
|
||||||
out = map(decode, hex_re.findall(data))
|
out = map(decode, hex_re.findall(data))
|
||||||
m = trail_re.search(data)
|
m = trail_re.search(data)
|
||||||
if m:
|
if m:
|
||||||
out.append(decode("%c0" % m.group(1)))
|
out.append(decode('%c0' % m.group(1)))
|
||||||
return ''.join(out)
|
return b''.join(out)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -691,7 +691,7 @@ class CCITTFaxDecoder(CCITTG4Parser):
|
||||||
def __init__(self, width, bytealign=False, reversed=False):
|
def __init__(self, width, bytealign=False, reversed=False):
|
||||||
CCITTG4Parser.__init__(self, width, bytealign=bytealign)
|
CCITTG4Parser.__init__(self, width, bytealign=bytealign)
|
||||||
self.reversed = reversed
|
self.reversed = reversed
|
||||||
self._buf = ''
|
self._buf = b''
|
||||||
return
|
return
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
|
|
|
@ -237,7 +237,7 @@ class CMapDB(object):
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
gzfile = gzip.open(path)
|
gzfile = gzip.open(path)
|
||||||
try:
|
try:
|
||||||
return type(name, (), pickle.loads(gzfile.read()))
|
return type(str(name), (), pickle.loads(gzfile.read()))
|
||||||
finally:
|
finally:
|
||||||
gzfile.close()
|
gzfile.close()
|
||||||
else:
|
else:
|
||||||
|
@ -288,17 +288,17 @@ class CMapParser(PSStackParser):
|
||||||
|
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
name = token.name
|
name = token.name
|
||||||
if name == 'begincmap':
|
if name == b'begincmap':
|
||||||
self._in_cmap = True
|
self._in_cmap = True
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
elif name == 'endcmap':
|
elif name == b'endcmap':
|
||||||
self._in_cmap = False
|
self._in_cmap = False
|
||||||
return
|
return
|
||||||
if not self._in_cmap:
|
if not self._in_cmap:
|
||||||
return
|
return
|
||||||
#
|
#
|
||||||
if name == 'def':
|
if name == b'def':
|
||||||
try:
|
try:
|
||||||
((_, k), (_, v)) = self.pop(2)
|
((_, k), (_, v)) = self.pop(2)
|
||||||
self.cmap.set_attr(literal_name(k), v)
|
self.cmap.set_attr(literal_name(k), v)
|
||||||
|
@ -306,7 +306,7 @@ class CMapParser(PSStackParser):
|
||||||
pass
|
pass
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'usecmap':
|
if name == b'usecmap':
|
||||||
try:
|
try:
|
||||||
((_, cmapname),) = self.pop(1)
|
((_, cmapname),) = self.pop(1)
|
||||||
self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
|
self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
|
||||||
|
@ -316,17 +316,17 @@ class CMapParser(PSStackParser):
|
||||||
pass
|
pass
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'begincodespacerange':
|
if name == b'begincodespacerange':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
if name == 'endcodespacerange':
|
if name == b'endcodespacerange':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'begincidrange':
|
if name == b'begincidrange':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
if name == 'endcidrange':
|
if name == b'endcidrange':
|
||||||
objs = [obj for (__, obj) in self.popall()]
|
objs = [obj for (__, obj) in self.popall()]
|
||||||
for (s, e, cid) in choplist(3, objs):
|
for (s, e, cid) in choplist(3, objs):
|
||||||
if (not isinstance(s, str) or not isinstance(e, str) or
|
if (not isinstance(s, str) or not isinstance(e, str) or
|
||||||
|
@ -347,20 +347,20 @@ class CMapParser(PSStackParser):
|
||||||
self.cmap.add_code2cid(x, cid+i)
|
self.cmap.add_code2cid(x, cid+i)
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'begincidchar':
|
if name == b'begincidchar':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
if name == 'endcidchar':
|
if name == b'endcidchar':
|
||||||
objs = [obj for (__, obj) in self.popall()]
|
objs = [obj for (__, obj) in self.popall()]
|
||||||
for (cid, code) in choplist(2, objs):
|
for (cid, code) in choplist(2, objs):
|
||||||
if isinstance(code, str) and isinstance(cid, str):
|
if isinstance(code, str) and isinstance(cid, str):
|
||||||
self.cmap.add_code2cid(code, nunpack(cid))
|
self.cmap.add_code2cid(code, nunpack(cid))
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'beginbfrange':
|
if name == b'beginbfrange':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
if name == 'endbfrange':
|
if name == b'endbfrange':
|
||||||
objs = [obj for (__, obj) in self.popall()]
|
objs = [obj for (__, obj) in self.popall()]
|
||||||
for (s, e, code) in choplist(3, objs):
|
for (s, e, code) in choplist(3, objs):
|
||||||
if (not isinstance(s, str) or not isinstance(e, str) or
|
if (not isinstance(s, str) or not isinstance(e, str) or
|
||||||
|
@ -382,20 +382,20 @@ class CMapParser(PSStackParser):
|
||||||
self.cmap.add_cid2unichr(s1+i, x)
|
self.cmap.add_cid2unichr(s1+i, x)
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'beginbfchar':
|
if name == b'beginbfchar':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
if name == 'endbfchar':
|
if name == b'endbfchar':
|
||||||
objs = [obj for (__, obj) in self.popall()]
|
objs = [obj for (__, obj) in self.popall()]
|
||||||
for (cid, code) in choplist(2, objs):
|
for (cid, code) in choplist(2, objs):
|
||||||
if isinstance(cid, str) and isinstance(code, str):
|
if isinstance(cid, str) and isinstance(code, str):
|
||||||
self.cmap.add_cid2unichr(nunpack(cid), code)
|
self.cmap.add_cid2unichr(nunpack(cid), code)
|
||||||
return
|
return
|
||||||
|
|
||||||
if name == 'beginnotdefrange':
|
if name == b'beginnotdefrange':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
if name == 'endnotdefrange':
|
if name == b'endnotdefrange':
|
||||||
self.popall()
|
self.popall()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ class BMPWriter(object):
|
||||||
headersize = 14+40+ncols*4
|
headersize = 14+40+ncols*4
|
||||||
info = struct.pack('<IiiHHIIIIII', 40, self.width, self.height, 1, self.bits, 0, self.datasize, 0, 0, ncols, 0)
|
info = struct.pack('<IiiHHIIIIII', 40, self.width, self.height, 1, self.bits, 0, self.datasize, 0, 0, ncols, 0)
|
||||||
assert len(info) == 40, len(info)
|
assert len(info) == 40, len(info)
|
||||||
header = struct.pack('<ccIHHI', 'B', 'M', headersize+self.datasize, 0, 0, headersize)
|
header = struct.pack('<ccIHHI', b'B', b'M', headersize+self.datasize, 0, 0, headersize)
|
||||||
assert len(header) == 14, len(header)
|
assert len(header) == 14, len(header)
|
||||||
self.fp.write(header)
|
self.fp.write(header)
|
||||||
self.fp.write(info)
|
self.fp.write(info)
|
||||||
|
|
|
@ -45,12 +45,12 @@ class LZWDecoder(object):
|
||||||
return v
|
return v
|
||||||
|
|
||||||
def feed(self, code):
|
def feed(self, code):
|
||||||
x = ''
|
x = b''
|
||||||
if code == 256:
|
if code == 256:
|
||||||
self.table = [chr(c) for c in xrange(256)] # 0-255
|
self.table = [chr(c) for c in xrange(256)] # 0-255
|
||||||
self.table.append(None) # 256
|
self.table.append(None) # 256
|
||||||
self.table.append(None) # 257
|
self.table.append(None) # 257
|
||||||
self.prevbuf = ''
|
self.prevbuf = b''
|
||||||
self.nbits = 9
|
self.nbits = 9
|
||||||
elif code == 257:
|
elif code == 257:
|
||||||
pass
|
pass
|
||||||
|
@ -95,11 +95,11 @@ class LZWDecoder(object):
|
||||||
# lzwdecode
|
# lzwdecode
|
||||||
def lzwdecode(data):
|
def lzwdecode(data):
|
||||||
"""
|
"""
|
||||||
>>> lzwdecode('\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01')
|
>>> lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01')
|
||||||
'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
|
'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
|
||||||
"""
|
"""
|
||||||
fp = BytesIO(data)
|
fp = BytesIO(data)
|
||||||
return ''.join(LZWDecoder(fp).run())
|
return b''.join(LZWDecoder(fp).run())
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import doctest
|
import doctest
|
||||||
|
|
|
@ -100,10 +100,10 @@ class PDFXRef(PDFBaseXRef):
|
||||||
raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
|
raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
|
||||||
if not line:
|
if not line:
|
||||||
raise PDFNoValidXRef('Premature eof: %r' % parser)
|
raise PDFNoValidXRef('Premature eof: %r' % parser)
|
||||||
if line.startswith('trailer'):
|
if line.startswith(b'trailer'):
|
||||||
parser.seek(pos)
|
parser.seek(pos)
|
||||||
break
|
break
|
||||||
f = line.strip().split(' ')
|
f = line.strip().split(b' ')
|
||||||
if len(f) != 2:
|
if len(f) != 2:
|
||||||
raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
|
raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
|
||||||
try:
|
try:
|
||||||
|
@ -115,11 +115,11 @@ class PDFXRef(PDFBaseXRef):
|
||||||
(_, line) = parser.nextline()
|
(_, line) = parser.nextline()
|
||||||
except PSEOF:
|
except PSEOF:
|
||||||
raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
|
raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
|
||||||
f = line.strip().split(' ')
|
f = line.strip().split(b' ')
|
||||||
if len(f) != 3:
|
if len(f) != 3:
|
||||||
raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
|
raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
|
||||||
(pos, genno, use) = f
|
(pos, genno, use) = f
|
||||||
if use != 'n':
|
if use != b'n':
|
||||||
continue
|
continue
|
||||||
self.offsets[objid] = (None, long(pos), int(genno))
|
self.offsets[objid] = (None, long(pos), int(genno))
|
||||||
logging.info('xref objects: %r' % self.offsets)
|
logging.info('xref objects: %r' % self.offsets)
|
||||||
|
@ -170,7 +170,7 @@ class PDFXRefFallback(PDFXRef):
|
||||||
(pos, line) = parser.nextline()
|
(pos, line) = parser.nextline()
|
||||||
except PSEOF:
|
except PSEOF:
|
||||||
break
|
break
|
||||||
if line.startswith('trailer'):
|
if line.startswith(b'trailer'):
|
||||||
parser.seek(pos)
|
parser.seek(pos)
|
||||||
self.load_trailer(parser)
|
self.load_trailer(parser)
|
||||||
logging.info('trailer: %r' % self.get_trailer())
|
logging.info('trailer: %r' % self.get_trailer())
|
||||||
|
@ -284,10 +284,10 @@ class PDFXRefStream(PDFBaseXRef):
|
||||||
##
|
##
|
||||||
class PDFStandardSecurityHandler(object):
|
class PDFStandardSecurityHandler(object):
|
||||||
|
|
||||||
PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
|
PASSWORD_PADDING = b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
|
||||||
supported_revisions = (2, 3)
|
supported_revisions = (2, 3)
|
||||||
|
|
||||||
def __init__(self, docid, param, password=''):
|
def __init__(self, docid, param, password=b''):
|
||||||
self.docid = docid
|
self.docid = docid
|
||||||
self.param = param
|
self.param = param
|
||||||
self.password = password
|
self.password = password
|
||||||
|
@ -331,7 +331,7 @@ class PDFStandardSecurityHandler(object):
|
||||||
hash.update(self.docid[0]) # 3
|
hash.update(self.docid[0]) # 3
|
||||||
result = ARC4.new(key).encrypt(hash.digest()) # 4
|
result = ARC4.new(key).encrypt(hash.digest()) # 4
|
||||||
for i in range(1, 20): # 5
|
for i in range(1, 20): # 5
|
||||||
k = ''.join(chr(ord(c) ^ i) for c in key)
|
k = b''.join(chr(ord(c) ^ i) for c in key)
|
||||||
result = ARC4.new(k).encrypt(result)
|
result = ARC4.new(k).encrypt(result)
|
||||||
result += result # 6
|
result += result # 6
|
||||||
return result
|
return result
|
||||||
|
@ -345,7 +345,7 @@ class PDFStandardSecurityHandler(object):
|
||||||
hash.update(self.docid[0]) # 5
|
hash.update(self.docid[0]) # 5
|
||||||
if self.r >= 4:
|
if self.r >= 4:
|
||||||
if not self.encrypt_metadata:
|
if not self.encrypt_metadata:
|
||||||
hash.update('\xff\xff\xff\xff')
|
hash.update(b'\xff\xff\xff\xff')
|
||||||
result = hash.digest()
|
result = hash.digest()
|
||||||
n = 5
|
n = 5
|
||||||
if self.r >= 3:
|
if self.r >= 3:
|
||||||
|
@ -388,7 +388,7 @@ class PDFStandardSecurityHandler(object):
|
||||||
else:
|
else:
|
||||||
user_password = self.o
|
user_password = self.o
|
||||||
for i in range(19, -1, -1):
|
for i in range(19, -1, -1):
|
||||||
k = ''.join(chr(ord(c) ^ i) for c in key)
|
k = b''.join(chr(ord(c) ^ i) for c in key)
|
||||||
user_password = ARC4.new(k).decrypt(user_password)
|
user_password = ARC4.new(k).decrypt(user_password)
|
||||||
return self.authenticate_user_password(user_password)
|
return self.authenticate_user_password(user_password)
|
||||||
|
|
||||||
|
@ -444,7 +444,7 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def decrypt_aes128(self, objid, genno, data):
|
def decrypt_aes128(self, objid, genno, data):
|
||||||
key = self.key + struct.pack('<L', objid)[:3] + struct.pack('<L', genno)[:2] + "sAlT"
|
key = self.key + struct.pack('<L', objid)[:3] + struct.pack('<L', genno)[:2] + b'sAlT'
|
||||||
hash = md5.md5(key)
|
hash = md5.md5(key)
|
||||||
key = hash.digest()[:min(len(key), 16)]
|
key = hash.digest()[:min(len(key), 16)]
|
||||||
return AES.new(key, mode=AES.MODE_CBC, IV=data[:16]).decrypt(data[16:])
|
return AES.new(key, mode=AES.MODE_CBC, IV=data[:16]).decrypt(data[16:])
|
||||||
|
@ -479,13 +479,13 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
|
||||||
hash = SHA256.new(password)
|
hash = SHA256.new(password)
|
||||||
hash.update(self.o_key_salt)
|
hash.update(self.o_key_salt)
|
||||||
hash.update(self.u)
|
hash.update(self.u)
|
||||||
return AES.new(hash.digest(), mode=AES.MODE_CBC, IV='\x00' * 16).decrypt(self.oe)
|
return AES.new(hash.digest(), mode=AES.MODE_CBC, IV=b'\x00' * 16).decrypt(self.oe)
|
||||||
hash = SHA256.new(password)
|
hash = SHA256.new(password)
|
||||||
hash.update(self.u_validation_salt)
|
hash.update(self.u_validation_salt)
|
||||||
if hash.digest() == self.u_hash:
|
if hash.digest() == self.u_hash:
|
||||||
hash = SHA256.new(password)
|
hash = SHA256.new(password)
|
||||||
hash.update(self.u_key_salt)
|
hash.update(self.u_key_salt)
|
||||||
return AES.new(hash.digest(), mode=AES.MODE_CBC, IV='\x00' * 16).decrypt(self.ue)
|
return AES.new(hash.digest(), mode=AES.MODE_CBC, IV=b'\x00' * 16).decrypt(self.ue)
|
||||||
|
|
||||||
def decrypt_aes256(self, objid, genno, data):
|
def decrypt_aes256(self, objid, genno, data):
|
||||||
return AES.new(self.key, mode=AES.MODE_CBC, IV=data[:16]).decrypt(data[16:])
|
return AES.new(self.key, mode=AES.MODE_CBC, IV=data[:16]).decrypt(data[16:])
|
||||||
|
@ -517,7 +517,7 @@ class PDFDocument(object):
|
||||||
security_handler_registry[5] = PDFStandardSecurityHandlerV5
|
security_handler_registry[5] = PDFStandardSecurityHandlerV5
|
||||||
debug = 0
|
debug = 0
|
||||||
|
|
||||||
def __init__(self, parser, password='', caching=True, fallback=True):
|
def __init__(self, parser, password=b'', caching=True, fallback=True):
|
||||||
"Set the document to use a given PDFParser object."
|
"Set the document to use a given PDFParser object."
|
||||||
self.caching = caching
|
self.caching = caching
|
||||||
self.xrefs = []
|
self.xrefs = []
|
||||||
|
@ -566,9 +566,9 @@ class PDFDocument(object):
|
||||||
raise PDFSyntaxError('Catalog not found!')
|
raise PDFSyntaxError('Catalog not found!')
|
||||||
return
|
return
|
||||||
|
|
||||||
# _initialize_password(password='')
|
# _initialize_password(password=b'')
|
||||||
# Perform the initialization with a given password.
|
# Perform the initialization with a given password.
|
||||||
def _initialize_password(self, password=''):
|
def _initialize_password(self, password=b''):
|
||||||
(docid, param) = self.encryption
|
(docid, param) = self.encryption
|
||||||
if literal_name(param.get('Filter')) != 'Standard':
|
if literal_name(param.get('Filter')) != 'Standard':
|
||||||
raise PDFEncryptionError('Unknown filter: param=%r' % param)
|
raise PDFEncryptionError('Unknown filter: param=%r' % param)
|
||||||
|
@ -740,7 +740,7 @@ class PDFDocument(object):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if self.debug:
|
if self.debug:
|
||||||
logging.debug('find_xref: %r' % line)
|
logging.debug('find_xref: %r' % line)
|
||||||
if line == 'startxref':
|
if line == b'startxref':
|
||||||
break
|
break
|
||||||
if line:
|
if line:
|
||||||
prev = line
|
prev = line
|
||||||
|
|
|
@ -88,15 +88,15 @@ class FontMetricsDB(object):
|
||||||
##
|
##
|
||||||
class Type1FontHeaderParser(PSStackParser):
|
class Type1FontHeaderParser(PSStackParser):
|
||||||
|
|
||||||
KEYWORD_BEGIN = KWD('begin')
|
KEYWORD_BEGIN = KWD(b'begin')
|
||||||
KEYWORD_END = KWD('end')
|
KEYWORD_END = KWD(b'end')
|
||||||
KEYWORD_DEF = KWD('def')
|
KEYWORD_DEF = KWD(b'def')
|
||||||
KEYWORD_PUT = KWD('put')
|
KEYWORD_PUT = KWD(b'put')
|
||||||
KEYWORD_DICT = KWD('dict')
|
KEYWORD_DICT = KWD(b'dict')
|
||||||
KEYWORD_ARRAY = KWD('array')
|
KEYWORD_ARRAY = KWD(b'array')
|
||||||
KEYWORD_READONLY = KWD('readonly')
|
KEYWORD_READONLY = KWD(b'readonly')
|
||||||
KEYWORD_FOR = KWD('for')
|
KEYWORD_FOR = KWD(b'for')
|
||||||
KEYWORD_FOR = KWD('for')
|
KEYWORD_FOR = KWD(b'for')
|
||||||
|
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
PSStackParser.__init__(self, data)
|
PSStackParser.__init__(self, data)
|
||||||
|
@ -311,13 +311,13 @@ class CFFFont(object):
|
||||||
self.gid2code = {}
|
self.gid2code = {}
|
||||||
self.fp.seek(encoding_pos)
|
self.fp.seek(encoding_pos)
|
||||||
format = self.fp.read(1)
|
format = self.fp.read(1)
|
||||||
if format == '\x00':
|
if format == b'\x00':
|
||||||
# Format 0
|
# Format 0
|
||||||
(n,) = struct.unpack('B', self.fp.read(1))
|
(n,) = struct.unpack('B', self.fp.read(1))
|
||||||
for (code, gid) in enumerate(struct.unpack('B'*n, self.fp.read(n))):
|
for (code, gid) in enumerate(struct.unpack('B'*n, self.fp.read(n))):
|
||||||
self.code2gid[code] = gid
|
self.code2gid[code] = gid
|
||||||
self.gid2code[gid] = code
|
self.gid2code[gid] = code
|
||||||
elif format == '\x01':
|
elif format == b'\x01':
|
||||||
# Format 1
|
# Format 1
|
||||||
(n,) = struct.unpack('B', self.fp.read(1))
|
(n,) = struct.unpack('B', self.fp.read(1))
|
||||||
code = 0
|
code = 0
|
||||||
|
@ -334,7 +334,7 @@ class CFFFont(object):
|
||||||
self.gid2name = {}
|
self.gid2name = {}
|
||||||
self.fp.seek(charset_pos)
|
self.fp.seek(charset_pos)
|
||||||
format = self.fp.read(1)
|
format = self.fp.read(1)
|
||||||
if format == '\x00':
|
if format == b'\x00':
|
||||||
# Format 0
|
# Format 0
|
||||||
n = self.nglyphs-1
|
n = self.nglyphs-1
|
||||||
for (gid, sid) in enumerate(struct.unpack('>'+'H'*n, self.fp.read(2*n))):
|
for (gid, sid) in enumerate(struct.unpack('>'+'H'*n, self.fp.read(2*n))):
|
||||||
|
@ -342,7 +342,7 @@ class CFFFont(object):
|
||||||
name = self.getstr(sid)
|
name = self.getstr(sid)
|
||||||
self.name2gid[name] = gid
|
self.name2gid[name] = gid
|
||||||
self.gid2name[gid] = name
|
self.gid2name[gid] = name
|
||||||
elif format == '\x01':
|
elif format == b'\x01':
|
||||||
# Format 1
|
# Format 1
|
||||||
(n,) = struct.unpack('B', self.fp.read(1))
|
(n,) = struct.unpack('B', self.fp.read(1))
|
||||||
sid = 0
|
sid = 0
|
||||||
|
@ -353,7 +353,7 @@ class CFFFont(object):
|
||||||
self.name2gid[name] = gid
|
self.name2gid[name] = gid
|
||||||
self.gid2name[gid] = name
|
self.gid2name[gid] = name
|
||||||
sid += 1
|
sid += 1
|
||||||
elif format == '\x02':
|
elif format == b'\x02':
|
||||||
# Format 2
|
# Format 2
|
||||||
assert 0
|
assert 0
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -246,10 +246,10 @@ class PDFContentParser(PSStackParser):
|
||||||
self.charpos = 0
|
self.charpos = 0
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_inline_data(self, pos, target='EI'):
|
def get_inline_data(self, pos, target=b'EI'):
|
||||||
self.seek(pos)
|
self.seek(pos)
|
||||||
i = 0
|
i = 0
|
||||||
data = ''
|
data = b''
|
||||||
while i <= len(target):
|
while i <= len(target):
|
||||||
self.fillbuf()
|
self.fillbuf()
|
||||||
if i:
|
if i:
|
||||||
|
@ -273,16 +273,16 @@ class PDFContentParser(PSStackParser):
|
||||||
data += self.buf[self.charpos:]
|
data += self.buf[self.charpos:]
|
||||||
self.charpos = len(self.buf)
|
self.charpos = len(self.buf)
|
||||||
data = data[:-(len(target)+1)] # strip the last part
|
data = data[:-(len(target)+1)] # strip the last part
|
||||||
data = re.sub(r'(\x0d\x0a|[\x0d\x0a])$', '', data)
|
data = re.sub(br'(\x0d\x0a|[\x0d\x0a])$', b'', data)
|
||||||
return (pos, data)
|
return (pos, data)
|
||||||
|
|
||||||
def flush(self):
|
def flush(self):
|
||||||
self.add_results(*self.popall())
|
self.add_results(*self.popall())
|
||||||
return
|
return
|
||||||
|
|
||||||
KEYWORD_BI = KWD('BI')
|
KEYWORD_BI = KWD(b'BI')
|
||||||
KEYWORD_ID = KWD('ID')
|
KEYWORD_ID = KWD(b'ID')
|
||||||
KEYWORD_EI = KWD('EI')
|
KEYWORD_EI = KWD(b'EI')
|
||||||
|
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
if token is self.KEYWORD_BI:
|
if token is self.KEYWORD_BI:
|
||||||
|
@ -294,7 +294,7 @@ class PDFContentParser(PSStackParser):
|
||||||
if len(objs) % 2 != 0:
|
if len(objs) % 2 != 0:
|
||||||
raise PSTypeError('Invalid dictionary construct: %r' % objs)
|
raise PSTypeError('Invalid dictionary construct: %r' % objs)
|
||||||
d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
|
d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
|
||||||
(pos, data) = self.get_inline_data(pos+len('ID '))
|
(pos, data) = self.get_inline_data(pos+len(b'ID '))
|
||||||
obj = PDFStream(d, data)
|
obj = PDFStream(d, data)
|
||||||
self.push((pos, obj))
|
self.push((pos, obj))
|
||||||
self.push((pos, self.KEYWORD_EI))
|
self.push((pos, self.KEYWORD_EI))
|
||||||
|
|
|
@ -112,7 +112,7 @@ class PDFPage(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_pages(klass, fp,
|
def get_pages(klass, fp,
|
||||||
pagenos=None, maxpages=0, password='',
|
pagenos=None, maxpages=0, password=b'',
|
||||||
caching=True, check_extractable=True):
|
caching=True, check_extractable=True):
|
||||||
# Create a PDF parser object associated with the file object.
|
# Create a PDF parser object associated with the file object.
|
||||||
parser = PDFParser(fp)
|
parser = PDFParser(fp)
|
||||||
|
|
|
@ -50,12 +50,12 @@ class PDFParser(PSStackParser):
|
||||||
self.doc = doc
|
self.doc = doc
|
||||||
return
|
return
|
||||||
|
|
||||||
KEYWORD_R = KWD('R')
|
KEYWORD_R = KWD(b'R')
|
||||||
KEYWORD_NULL = KWD('null')
|
KEYWORD_NULL = KWD(b'null')
|
||||||
KEYWORD_ENDOBJ = KWD('endobj')
|
KEYWORD_ENDOBJ = KWD(b'endobj')
|
||||||
KEYWORD_STREAM = KWD('stream')
|
KEYWORD_STREAM = KWD(b'stream')
|
||||||
KEYWORD_XREF = KWD('xref')
|
KEYWORD_XREF = KWD(b'xref')
|
||||||
KEYWORD_STARTXREF = KWD('startxref')
|
KEYWORD_STARTXREF = KWD(b'startxref')
|
||||||
|
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
"""Handles PDF-related keywords."""
|
"""Handles PDF-related keywords."""
|
||||||
|
@ -109,8 +109,8 @@ class PDFParser(PSStackParser):
|
||||||
if STRICT:
|
if STRICT:
|
||||||
raise PDFSyntaxError('Unexpected EOF')
|
raise PDFSyntaxError('Unexpected EOF')
|
||||||
break
|
break
|
||||||
if 'endstream' in line:
|
if b'endstream' in line:
|
||||||
i = line.index('endstream')
|
i = line.index(b'endstream')
|
||||||
objlen += i
|
objlen += i
|
||||||
if self.fallback:
|
if self.fallback:
|
||||||
data += line[:i]
|
data += line[:i]
|
||||||
|
@ -153,7 +153,7 @@ class PDFStreamParser(PDFParser):
|
||||||
self.add_results(*self.popall())
|
self.add_results(*self.popall())
|
||||||
return
|
return
|
||||||
|
|
||||||
KEYWORD_OBJ = KWD('obj')
|
KEYWORD_OBJ = KWD(b'obj')
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
if token is self.KEYWORD_R:
|
if token is self.KEYWORD_R:
|
||||||
# reference to indirect object
|
# reference to indirect object
|
||||||
|
@ -169,7 +169,7 @@ class PDFStreamParser(PDFParser):
|
||||||
if STRICT:
|
if STRICT:
|
||||||
# See PDF Spec 3.4.6: Only the object values are stored in the
|
# See PDF Spec 3.4.6: Only the object values are stored in the
|
||||||
# stream; the obj and endobj keywords are not used.
|
# stream; the obj and endobj keywords are not used.
|
||||||
raise PDFSyntaxError("Keyword endobj found in stream")
|
raise PDFSyntaxError('Keyword endobj found in stream')
|
||||||
return
|
return
|
||||||
# others
|
# others
|
||||||
self.push((pos, token))
|
self.push((pos, token))
|
||||||
|
|
|
@ -244,7 +244,7 @@ class PDFStream(PDFObject):
|
||||||
except zlib.error as e:
|
except zlib.error as e:
|
||||||
if STRICT:
|
if STRICT:
|
||||||
raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
|
raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
|
||||||
data = ''
|
data = b''
|
||||||
elif f in LITERALS_LZW_DECODE:
|
elif f in LITERALS_LZW_DECODE:
|
||||||
data = lzwdecode(data)
|
data = lzwdecode(data)
|
||||||
elif f in LITERALS_ASCII85_DECODE:
|
elif f in LITERALS_ASCII85_DECODE:
|
||||||
|
|
|
@ -111,12 +111,12 @@ PSLiteralTable = PSSymbolTable(PSLiteral)
|
||||||
PSKeywordTable = PSSymbolTable(PSKeyword)
|
PSKeywordTable = PSSymbolTable(PSKeyword)
|
||||||
LIT = PSLiteralTable.intern
|
LIT = PSLiteralTable.intern
|
||||||
KWD = PSKeywordTable.intern
|
KWD = PSKeywordTable.intern
|
||||||
KEYWORD_PROC_BEGIN = KWD('{')
|
KEYWORD_PROC_BEGIN = KWD(b'{')
|
||||||
KEYWORD_PROC_END = KWD('}')
|
KEYWORD_PROC_END = KWD(b'}')
|
||||||
KEYWORD_ARRAY_BEGIN = KWD('[')
|
KEYWORD_ARRAY_BEGIN = KWD(b'[')
|
||||||
KEYWORD_ARRAY_END = KWD(']')
|
KEYWORD_ARRAY_END = KWD(b']')
|
||||||
KEYWORD_DICT_BEGIN = KWD('<<')
|
KEYWORD_DICT_BEGIN = KWD(b'<<')
|
||||||
KEYWORD_DICT_END = KWD('>>')
|
KEYWORD_DICT_END = KWD(b'>>')
|
||||||
|
|
||||||
|
|
||||||
def literal_name(x):
|
def literal_name(x):
|
||||||
|
@ -139,18 +139,18 @@ def keyword_name(x):
|
||||||
|
|
||||||
## PSBaseParser
|
## PSBaseParser
|
||||||
##
|
##
|
||||||
EOL = re.compile(r'[\r\n]')
|
EOL = re.compile(br'[\r\n]')
|
||||||
SPC = re.compile(r'\s')
|
SPC = re.compile(br'\s')
|
||||||
NONSPC = re.compile(r'\S')
|
NONSPC = re.compile(br'\S')
|
||||||
HEX = re.compile(r'[0-9a-fA-F]')
|
HEX = re.compile(br'[0-9a-fA-F]')
|
||||||
END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
|
END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]')
|
||||||
END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
|
END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]')
|
||||||
HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
|
HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.')
|
||||||
END_NUMBER = re.compile(r'[^0-9]')
|
END_NUMBER = re.compile(br'[^0-9]')
|
||||||
END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
|
END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]')
|
||||||
END_STRING = re.compile(r'[()\134]')
|
END_STRING = re.compile(br'[()\134]')
|
||||||
OCT_STRING = re.compile(r'[0-7]')
|
OCT_STRING = re.compile(br'[0-7]')
|
||||||
ESC_STRING = {'b': 8, 't': 9, 'n': 10, 'f': 12, 'r': 13, '(': 40, ')': 41, '\\': 92}
|
ESC_STRING = {b'b': 8, b't': 9, b'n': 10, b'f': 12, b'r': 13, b'(': 40, b')': 41, b'\\': 92}
|
||||||
|
|
||||||
|
|
||||||
class PSBaseParser(object):
|
class PSBaseParser(object):
|
||||||
|
@ -196,11 +196,11 @@ class PSBaseParser(object):
|
||||||
self.fp.seek(pos)
|
self.fp.seek(pos)
|
||||||
# reset the status for nextline()
|
# reset the status for nextline()
|
||||||
self.bufpos = pos
|
self.bufpos = pos
|
||||||
self.buf = ''
|
self.buf = b''
|
||||||
self.charpos = 0
|
self.charpos = 0
|
||||||
# reset the status for nexttoken()
|
# reset the status for nexttoken()
|
||||||
self._parse1 = self._parse_main
|
self._parse1 = self._parse_main
|
||||||
self._curtoken = ''
|
self._curtoken = b''
|
||||||
self._curtokenpos = 0
|
self._curtokenpos = 0
|
||||||
self._tokens = []
|
self._tokens = []
|
||||||
return
|
return
|
||||||
|
@ -219,15 +219,15 @@ class PSBaseParser(object):
|
||||||
def nextline(self):
|
def nextline(self):
|
||||||
"""Fetches a next line that ends either with \\r or \\n.
|
"""Fetches a next line that ends either with \\r or \\n.
|
||||||
"""
|
"""
|
||||||
linebuf = ''
|
linebuf = b''
|
||||||
linepos = self.bufpos + self.charpos
|
linepos = self.bufpos + self.charpos
|
||||||
eol = False
|
eol = False
|
||||||
while 1:
|
while 1:
|
||||||
self.fillbuf()
|
self.fillbuf()
|
||||||
if eol:
|
if eol:
|
||||||
c = self.buf[self.charpos]
|
c = self.buf[self.charpos]
|
||||||
# handle '\r\n'
|
# handle b'\r\n'
|
||||||
if c == '\n':
|
if c == b'\n':
|
||||||
linebuf += c
|
linebuf += c
|
||||||
self.charpos += 1
|
self.charpos += 1
|
||||||
break
|
break
|
||||||
|
@ -235,7 +235,7 @@ class PSBaseParser(object):
|
||||||
if m:
|
if m:
|
||||||
linebuf += self.buf[self.charpos:m.end(0)]
|
linebuf += self.buf[self.charpos:m.end(0)]
|
||||||
self.charpos = m.end(0)
|
self.charpos = m.end(0)
|
||||||
if linebuf[-1] == '\r':
|
if linebuf[-1] == b'\r':
|
||||||
eol = True
|
eol = True
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
@ -253,7 +253,7 @@ class PSBaseParser(object):
|
||||||
"""
|
"""
|
||||||
self.fp.seek(0, 2)
|
self.fp.seek(0, 2)
|
||||||
pos = self.fp.tell()
|
pos = self.fp.tell()
|
||||||
buf = ''
|
buf = b''
|
||||||
while 0 < pos:
|
while 0 < pos:
|
||||||
prevpos = pos
|
prevpos = pos
|
||||||
pos = max(0, pos-self.BUFSIZ)
|
pos = max(0, pos-self.BUFSIZ)
|
||||||
|
@ -262,13 +262,13 @@ class PSBaseParser(object):
|
||||||
if not s:
|
if not s:
|
||||||
break
|
break
|
||||||
while 1:
|
while 1:
|
||||||
n = max(s.rfind('\r'), s.rfind('\n'))
|
n = max(s.rfind(b'\r'), s.rfind(b'\n'))
|
||||||
if n == -1:
|
if n == -1:
|
||||||
buf = s + buf
|
buf = s + buf
|
||||||
break
|
break
|
||||||
yield s[n:]+buf
|
yield s[n:]+buf
|
||||||
s = s[:n]
|
s = s[:n]
|
||||||
buf = ''
|
buf = b''
|
||||||
return
|
return
|
||||||
|
|
||||||
def _parse_main(self, s, i):
|
def _parse_main(self, s, i):
|
||||||
|
@ -278,19 +278,19 @@ class PSBaseParser(object):
|
||||||
j = m.start(0)
|
j = m.start(0)
|
||||||
c = s[j]
|
c = s[j]
|
||||||
self._curtokenpos = self.bufpos+j
|
self._curtokenpos = self.bufpos+j
|
||||||
if c == '%':
|
if c == b'%':
|
||||||
self._curtoken = '%'
|
self._curtoken = b'%'
|
||||||
self._parse1 = self._parse_comment
|
self._parse1 = self._parse_comment
|
||||||
return j+1
|
return j+1
|
||||||
elif c == '/':
|
elif c == b'/':
|
||||||
self._curtoken = ''
|
self._curtoken = b''
|
||||||
self._parse1 = self._parse_literal
|
self._parse1 = self._parse_literal
|
||||||
return j+1
|
return j+1
|
||||||
elif c in '-+' or c.isdigit():
|
elif c in b'-+' or c.isdigit():
|
||||||
self._curtoken = c
|
self._curtoken = c
|
||||||
self._parse1 = self._parse_number
|
self._parse1 = self._parse_number
|
||||||
return j+1
|
return j+1
|
||||||
elif c == '.':
|
elif c == b'.':
|
||||||
self._curtoken = c
|
self._curtoken = c
|
||||||
self._parse1 = self._parse_float
|
self._parse1 = self._parse_float
|
||||||
return j+1
|
return j+1
|
||||||
|
@ -298,17 +298,17 @@ class PSBaseParser(object):
|
||||||
self._curtoken = c
|
self._curtoken = c
|
||||||
self._parse1 = self._parse_keyword
|
self._parse1 = self._parse_keyword
|
||||||
return j+1
|
return j+1
|
||||||
elif c == '(':
|
elif c == b'(':
|
||||||
self._curtoken = ''
|
self._curtoken = b''
|
||||||
self.paren = 1
|
self.paren = 1
|
||||||
self._parse1 = self._parse_string
|
self._parse1 = self._parse_string
|
||||||
return j+1
|
return j+1
|
||||||
elif c == '<':
|
elif c == b'<':
|
||||||
self._curtoken = ''
|
self._curtoken = b''
|
||||||
self._parse1 = self._parse_wopen
|
self._parse1 = self._parse_wopen
|
||||||
return j+1
|
return j+1
|
||||||
elif c == '>':
|
elif c == b'>':
|
||||||
self._curtoken = ''
|
self._curtoken = b''
|
||||||
self._parse1 = self._parse_wclose
|
self._parse1 = self._parse_wclose
|
||||||
return j+1
|
return j+1
|
||||||
else:
|
else:
|
||||||
|
@ -339,11 +339,11 @@ class PSBaseParser(object):
|
||||||
j = m.start(0)
|
j = m.start(0)
|
||||||
self._curtoken += s[i:j]
|
self._curtoken += s[i:j]
|
||||||
c = s[j]
|
c = s[j]
|
||||||
if c == '#':
|
if c == b'#':
|
||||||
self.hex = ''
|
self.hex = b''
|
||||||
self._parse1 = self._parse_literal_hex
|
self._parse1 = self._parse_literal_hex
|
||||||
return j+1
|
return j+1
|
||||||
self._add_token(LIT(self._curtoken))
|
self._add_token(LIT(unicode(self._curtoken)))
|
||||||
self._parse1 = self._parse_main
|
self._parse1 = self._parse_main
|
||||||
return j
|
return j
|
||||||
|
|
||||||
|
@ -365,7 +365,7 @@ class PSBaseParser(object):
|
||||||
j = m.start(0)
|
j = m.start(0)
|
||||||
self._curtoken += s[i:j]
|
self._curtoken += s[i:j]
|
||||||
c = s[j]
|
c = s[j]
|
||||||
if c == '.':
|
if c == b'.':
|
||||||
self._curtoken += c
|
self._curtoken += c
|
||||||
self._parse1 = self._parse_float
|
self._parse1 = self._parse_float
|
||||||
return j+1
|
return j+1
|
||||||
|
@ -397,9 +397,9 @@ class PSBaseParser(object):
|
||||||
return len(s)
|
return len(s)
|
||||||
j = m.start(0)
|
j = m.start(0)
|
||||||
self._curtoken += s[i:j]
|
self._curtoken += s[i:j]
|
||||||
if self._curtoken == 'true':
|
if self._curtoken == b'true':
|
||||||
token = True
|
token = True
|
||||||
elif self._curtoken == 'false':
|
elif self._curtoken == b'false':
|
||||||
token = False
|
token = False
|
||||||
else:
|
else:
|
||||||
token = KWD(self._curtoken)
|
token = KWD(self._curtoken)
|
||||||
|
@ -415,20 +415,20 @@ class PSBaseParser(object):
|
||||||
j = m.start(0)
|
j = m.start(0)
|
||||||
self._curtoken += s[i:j]
|
self._curtoken += s[i:j]
|
||||||
c = s[j]
|
c = s[j]
|
||||||
if c == '\\':
|
if c == b'\\':
|
||||||
self.oct = ''
|
self.oct = b''
|
||||||
self._parse1 = self._parse_string_1
|
self._parse1 = self._parse_string_1
|
||||||
return j+1
|
return j+1
|
||||||
if c == '(':
|
if c == b'(':
|
||||||
self.paren += 1
|
self.paren += 1
|
||||||
self._curtoken += c
|
self._curtoken += c
|
||||||
return j+1
|
return j+1
|
||||||
if c == ')':
|
if c == b')':
|
||||||
self.paren -= 1
|
self.paren -= 1
|
||||||
if self.paren: # WTF, they said balanced parens need no special treatment.
|
if self.paren: # WTF, they said balanced parens need no special treatment.
|
||||||
self._curtoken += c
|
self._curtoken += c
|
||||||
return j+1
|
return j+1
|
||||||
self._add_token(self._curtoken)
|
self._add_token(str(self._curtoken))
|
||||||
self._parse1 = self._parse_main
|
self._parse1 = self._parse_main
|
||||||
return j+1
|
return j+1
|
||||||
|
|
||||||
|
@ -448,7 +448,7 @@ class PSBaseParser(object):
|
||||||
|
|
||||||
def _parse_wopen(self, s, i):
|
def _parse_wopen(self, s, i):
|
||||||
c = s[i]
|
c = s[i]
|
||||||
if c == '<':
|
if c == b'<':
|
||||||
self._add_token(KEYWORD_DICT_BEGIN)
|
self._add_token(KEYWORD_DICT_BEGIN)
|
||||||
self._parse1 = self._parse_main
|
self._parse1 = self._parse_main
|
||||||
i += 1
|
i += 1
|
||||||
|
@ -458,7 +458,7 @@ class PSBaseParser(object):
|
||||||
|
|
||||||
def _parse_wclose(self, s, i):
|
def _parse_wclose(self, s, i):
|
||||||
c = s[i]
|
c = s[i]
|
||||||
if c == '>':
|
if c == b'>':
|
||||||
self._add_token(KEYWORD_DICT_END)
|
self._add_token(KEYWORD_DICT_END)
|
||||||
i += 1
|
i += 1
|
||||||
self._parse1 = self._parse_main
|
self._parse1 = self._parse_main
|
||||||
|
@ -472,7 +472,7 @@ class PSBaseParser(object):
|
||||||
j = m.start(0)
|
j = m.start(0)
|
||||||
self._curtoken += s[i:j]
|
self._curtoken += s[i:j]
|
||||||
token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
|
token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
|
||||||
SPC.sub('', self._curtoken))
|
SPC.sub(b'', self._curtoken))
|
||||||
self._add_token(token)
|
self._add_token(token)
|
||||||
self._parse1 = self._parse_main
|
self._parse1 = self._parse_main
|
||||||
return j
|
return j
|
||||||
|
@ -616,7 +616,7 @@ import unittest
|
||||||
##
|
##
|
||||||
class TestPSBaseParser(unittest.TestCase):
|
class TestPSBaseParser(unittest.TestCase):
|
||||||
|
|
||||||
TESTDATA = r'''%!PS
|
TESTDATA = br'''%!PS
|
||||||
begin end
|
begin end
|
||||||
" @ #
|
" @ #
|
||||||
/a/BCD /Some_Name /foo#5f#xbaa
|
/a/BCD /Some_Name /foo#5f#xbaa
|
||||||
|
@ -637,18 +637,18 @@ func/a/b{(c)do*}def
|
||||||
'''
|
'''
|
||||||
|
|
||||||
TOKENS = [
|
TOKENS = [
|
||||||
(5, KWD('begin')), (11, KWD('end')), (16, KWD('"')), (19, KWD('@')),
|
(5, KWD(b'begin')), (11, KWD(b'end')), (16, KWD(b'"')), (19, KWD(b'@')),
|
||||||
(21, KWD('#')), (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
|
(21, KWD(b'#')), (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
|
||||||
(41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5),
|
(41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5),
|
||||||
(65, 1.234), (71, 'abc'), (77, ''), (80, 'abc ( def ) ghi'),
|
(65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'),
|
||||||
(98, 'def \x00 4ghi'), (118, 'bach\\slask'), (132, 'foo\nbaa'),
|
(98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'),
|
||||||
(143, 'this % is not a comment.'), (170, 'foo\nbaa'), (180, 'foobaa'),
|
(143, b'this % is not a comment.'), (170, b'foo\nbaa'), (180, b'foobaa'),
|
||||||
(191, ''), (194, ' '), (199, '@@ '), (211, '\xab\xcd\x00\x124\x05'),
|
(191, b''), (194, b' '), (199, b'@@ '), (211, b'\xab\xcd\x00\x124\x05'),
|
||||||
(226, KWD('func')), (230, LIT('a')), (232, LIT('b')),
|
(226, KWD(b'func')), (230, LIT('a')), (232, LIT('b')),
|
||||||
(234, KWD('{')), (235, 'c'), (238, KWD('do*')), (241, KWD('}')),
|
(234, KWD(b'{')), (235, b'c'), (238, KWD(b'do*')), (241, KWD(b'}')),
|
||||||
(242, KWD('def')), (246, KWD('[')), (248, 1), (250, 'z'), (254, KWD('!')),
|
(242, KWD(b'def')), (246, KWD(b'[')), (248, 1), (250, b'z'), (254, KWD(b'!')),
|
||||||
(256, KWD(']')), (258, KWD('<<')), (261, LIT('foo')), (266, 'bar'),
|
(256, KWD(b']')), (258, KWD(b'<<')), (261, LIT('foo')), (266, b'bar'),
|
||||||
(272, KWD('>>'))
|
(272, KWD(b'>>'))
|
||||||
]
|
]
|
||||||
|
|
||||||
OBJS = [
|
OBJS = [
|
||||||
|
|
|
@ -898,7 +898,7 @@ def rijndaelEncrypt(rk, nrounds, plaintext):
|
||||||
Te3[(t2 ) & 0xff] ^
|
Te3[(t2 ) & 0xff] ^
|
||||||
rk[p+3])
|
rk[p+3])
|
||||||
|
|
||||||
ciphertext = ''
|
ciphertext = b''
|
||||||
|
|
||||||
# apply last round and
|
# apply last round and
|
||||||
# map cipher state to byte array block:
|
# map cipher state to byte array block:
|
||||||
|
@ -1001,7 +1001,7 @@ def rijndaelDecrypt(rk, nrounds, ciphertext):
|
||||||
Td3[(t0 ) & 0xff] ^
|
Td3[(t0 ) & 0xff] ^
|
||||||
rk[p+3])
|
rk[p+3])
|
||||||
|
|
||||||
plaintext = ''
|
plaintext = b''
|
||||||
|
|
||||||
# apply last round and
|
# apply last round and
|
||||||
# map cipher state to byte array block:
|
# map cipher state to byte array block:
|
||||||
|
@ -1042,8 +1042,8 @@ def rijndaelDecrypt(rk, nrounds, ciphertext):
|
||||||
class RijndaelDecryptor(object):
|
class RijndaelDecryptor(object):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
>>> key = '00010203050607080a0b0c0d0f101112'.decode('hex')
|
>>> key = b'00010203050607080a0b0c0d0f101112'.decode('hex')
|
||||||
>>> ciphertext = 'd8f532538289ef7d06b506a4fd5be9c9'.decode('hex')
|
>>> ciphertext = b'd8f532538289ef7d06b506a4fd5be9c9'.decode('hex')
|
||||||
>>> RijndaelDecryptor(key, 128).decrypt(ciphertext).encode('hex')
|
>>> RijndaelDecryptor(key, 128).decrypt(ciphertext).encode('hex')
|
||||||
'506812a45f08c889b97f5980038b8359'
|
'506812a45f08c889b97f5980038b8359'
|
||||||
"""
|
"""
|
||||||
|
@ -1064,8 +1064,8 @@ class RijndaelDecryptor(object):
|
||||||
class RijndaelEncryptor(object):
|
class RijndaelEncryptor(object):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
>>> key = '00010203050607080a0b0c0d0f101112'.decode('hex')
|
>>> key = b'00010203050607080a0b0c0d0f101112'.decode('hex')
|
||||||
>>> plaintext = '506812a45f08c889b97f5980038b8359'.decode('hex')
|
>>> plaintext = b'506812a45f08c889b97f5980038b8359'.decode('hex')
|
||||||
>>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex')
|
>>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex')
|
||||||
'd8f532538289ef7d06b506a4fd5be9c9'
|
'd8f532538289ef7d06b506a4fd5be9c9'
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -19,28 +19,28 @@ def rldecode(data):
|
||||||
129 to 255, the following single byte is to be copied 257 - length
|
129 to 255, the following single byte is to be copied 257 - length
|
||||||
(2 to 128) times during decompression. A length value of 128
|
(2 to 128) times during decompression. A length value of 128
|
||||||
denotes EOD.
|
denotes EOD.
|
||||||
>>> s = "\x05123456\xfa7\x04abcde\x80junk"
|
>>> s = b'\x05123456\xfa7\x04abcde\x80junk'
|
||||||
>>> rldecode(s)
|
>>> rldecode(s)
|
||||||
'1234567777777abcde'
|
'1234567777777abcde'
|
||||||
"""
|
"""
|
||||||
decoded = []
|
decoded = []
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(data):
|
while i < len(data):
|
||||||
#print "data[%d]=:%d:" % (i,ord(data[i]))
|
#print 'data[%d]=:%d:' % (i,ord(data[i]))
|
||||||
length = ord(data[i])
|
length = ord(data[i])
|
||||||
if length == 128:
|
if length == 128:
|
||||||
break
|
break
|
||||||
if length >= 0 and length < 128:
|
if length >= 0 and length < 128:
|
||||||
run = data[i+1:(i+1)+(length+1)]
|
run = data[i+1:(i+1)+(length+1)]
|
||||||
#print "length=%d, run=%s" % (length+1,run)
|
#print 'length=%d, run=%s' % (length+1,run)
|
||||||
decoded.append(run)
|
decoded.append(run)
|
||||||
i = (i+1) + (length+1)
|
i = (i+1) + (length+1)
|
||||||
if length > 128:
|
if length > 128:
|
||||||
run = data[i+1]*(257-length)
|
run = data[i+1]*(257-length)
|
||||||
#print "length=%d, run=%s" % (257-length,run)
|
#print 'length=%d, run=%s' % (257-length,run)
|
||||||
decoded.append(run)
|
decoded.append(run)
|
||||||
i = (i+1) + 1
|
i = (i+1) + 1
|
||||||
return ''.join(decoded)
|
return b''.join(decoded)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -14,28 +14,28 @@ def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
|
||||||
raise ValueError(bitspercomponent)
|
raise ValueError(bitspercomponent)
|
||||||
nbytes = colors*columns*bitspercomponent//8
|
nbytes = colors*columns*bitspercomponent//8
|
||||||
i = 0
|
i = 0
|
||||||
buf = ''
|
buf = b''
|
||||||
line0 = '\x00' * columns
|
line0 = b'\x00' * columns
|
||||||
for i in xrange(0, len(data), nbytes+1):
|
for i in xrange(0, len(data), nbytes+1):
|
||||||
ft = data[i]
|
ft = data[i]
|
||||||
i += 1
|
i += 1
|
||||||
line1 = data[i:i+nbytes]
|
line1 = data[i:i+nbytes]
|
||||||
line2 = ''
|
line2 = b''
|
||||||
if ft == '\x00':
|
if ft == b'\x00':
|
||||||
# PNG none
|
# PNG none
|
||||||
line2 += line1
|
line2 += line1
|
||||||
elif ft == '\x01':
|
elif ft == b'\x01':
|
||||||
# PNG sub (UNTESTED)
|
# PNG sub (UNTESTED)
|
||||||
c = 0
|
c = 0
|
||||||
for b in line1:
|
for b in line1:
|
||||||
c = (c+ord(b)) & 255
|
c = (c+ord(b)) & 255
|
||||||
line2 += chr(c)
|
line2 += chr(c)
|
||||||
elif ft == '\x02':
|
elif ft == b'\x02':
|
||||||
# PNG up
|
# PNG up
|
||||||
for (a, b) in zip(line0, line1):
|
for (a, b) in zip(line0, line1):
|
||||||
c = (ord(a)+ord(b)) & 255
|
c = (ord(a)+ord(b)) & 255
|
||||||
line2 += chr(c)
|
line2 += chr(c)
|
||||||
elif ft == '\x03':
|
elif ft == b'\x03':
|
||||||
# PNG average (UNTESTED)
|
# PNG average (UNTESTED)
|
||||||
c = 0
|
c = 0
|
||||||
for (a, b) in zip(line0, line1):
|
for (a, b) in zip(line0, line1):
|
||||||
|
@ -176,7 +176,7 @@ def nunpack(s, default=0):
|
||||||
elif l == 2:
|
elif l == 2:
|
||||||
return struct.unpack('>H', s)[0]
|
return struct.unpack('>H', s)[0]
|
||||||
elif l == 3:
|
elif l == 3:
|
||||||
return struct.unpack('>L', '\x00'+s)[0]
|
return struct.unpack('>L', b'\x00'+s)[0]
|
||||||
elif l == 4:
|
elif l == 4:
|
||||||
return struct.unpack('>L', s)[0]
|
return struct.unpack('>L', s)[0]
|
||||||
else:
|
else:
|
||||||
|
@ -222,7 +222,7 @@ PDFDocEncoding = ''.join(unichr(x) for x in (
|
||||||
|
|
||||||
def decode_text(s):
|
def decode_text(s):
|
||||||
"""Decodes a PDFDocEncoding string to Unicode."""
|
"""Decodes a PDFDocEncoding string to Unicode."""
|
||||||
if s.startswith('\xfe\xff'):
|
if s.startswith(b'\xfe\xff'):
|
||||||
return unicode(s[2:], 'utf-16be', 'ignore')
|
return unicode(s[2:], 'utf-16be', 'ignore')
|
||||||
else:
|
else:
|
||||||
return ''.join(PDFDocEncoding[ord(c)] for c in s)
|
return ''.join(PDFDocEncoding[ord(c)] for c in s)
|
||||||
|
|
Loading…
Reference in New Issue