2013-10-17 14:05:27 +00:00
|
|
|
#!/usr/bin/env python
|
2009-11-04 11:28:32 +00:00
|
|
|
|
|
|
|
""" Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
|
|
|
|
|
|
|
|
This code is in the public domain.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import struct
|
2008-08-30 07:40:52 +00:00
|
|
|
|
2013-11-07 08:35:04 +00:00
|
|
|
|
2008-08-30 07:40:52 +00:00
|
|
|
# ascii85decode(data)
|
|
|
|
def ascii85decode(data):
|
2009-11-04 11:28:32 +00:00
|
|
|
"""
|
|
|
|
In ASCII85 encoding, every four bytes are encoded with five ASCII
|
|
|
|
letters, using 85 different types of characters (as 256**4 < 85**5).
|
|
|
|
When the length of the original bytes is not a multiple of 4, a special
|
|
|
|
rule is used for round up.
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-11-04 11:28:32 +00:00
|
|
|
The Adobe's ASCII85 implementation is slightly different from
|
|
|
|
its original in handling the last characters.
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2009-11-04 11:28:32 +00:00
|
|
|
The sample string is taken from:
|
|
|
|
http://en.wikipedia.org/w/index.php?title=Ascii85
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2014-06-30 10:05:56 +00:00
|
|
|
>>> ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q')
|
2009-11-04 11:28:32 +00:00
|
|
|
'Man is distinguished'
|
2014-06-30 10:05:56 +00:00
|
|
|
>>> ascii85decode(b'E,9)oF*2M7/c~>')
|
2009-11-04 11:28:32 +00:00
|
|
|
'pleasure.'
|
|
|
|
"""
|
2009-10-24 04:41:59 +00:00
|
|
|
n = b = 0
|
2014-06-30 10:05:56 +00:00
|
|
|
out = b''
|
2009-10-24 04:41:59 +00:00
|
|
|
for c in data:
|
2014-06-30 10:05:56 +00:00
|
|
|
if b'!' <= c and c <= b'u':
|
2009-10-24 04:41:59 +00:00
|
|
|
n += 1
|
|
|
|
b = b*85+(ord(c)-33)
|
|
|
|
if n == 5:
|
2013-11-07 08:35:04 +00:00
|
|
|
out += struct.pack('>L', b)
|
2009-10-24 04:41:59 +00:00
|
|
|
n = b = 0
|
2014-06-30 10:05:56 +00:00
|
|
|
elif c == b'z':
|
2009-10-24 04:41:59 +00:00
|
|
|
assert n == 0
|
2014-06-30 10:05:56 +00:00
|
|
|
out += b'\0\0\0\0'
|
|
|
|
elif c == b'~':
|
2009-10-24 04:41:59 +00:00
|
|
|
if n:
|
|
|
|
for _ in range(5-n):
|
|
|
|
b = b*85+84
|
2013-11-07 08:35:04 +00:00
|
|
|
out += struct.pack('>L', b)[:n-1]
|
2009-10-24 04:41:59 +00:00
|
|
|
break
|
|
|
|
return out
|
2008-08-30 07:40:52 +00:00
|
|
|
|
2009-04-08 10:55:01 +00:00
|
|
|
# asciihexdecode(data)
|
2009-11-04 11:28:32 +00:00
|
|
|
hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE)
|
|
|
|
trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
|
2013-11-07 08:35:04 +00:00
|
|
|
|
|
|
|
|
2009-04-08 10:55:01 +00:00
|
|
|
def asciihexdecode(data):
|
2009-10-24 04:41:59 +00:00
|
|
|
"""
|
|
|
|
ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
|
|
|
|
For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
|
|
|
|
ASCIIHexDecode filter produces one byte of binary data. All white-space
|
|
|
|
characters are ignored. A right angle bracket character (>) indicates
|
|
|
|
EOD. Any other characters will cause an error. If the filter encounters
|
|
|
|
the EOD marker after reading an odd number of hexadecimal digits, it
|
|
|
|
will behave as if a 0 followed the last digit.
|
2013-11-07 07:14:53 +00:00
|
|
|
|
2014-06-30 10:05:56 +00:00
|
|
|
>>> asciihexdecode(b'61 62 2e6364 65')
|
2009-10-24 04:41:59 +00:00
|
|
|
'ab.cde'
|
2014-06-30 10:05:56 +00:00
|
|
|
>>> asciihexdecode(b'61 62 2e6364 657>')
|
2009-10-24 04:41:59 +00:00
|
|
|
'ab.cdep'
|
2014-06-30 10:05:56 +00:00
|
|
|
>>> asciihexdecode(b'7>')
|
2009-10-24 04:41:59 +00:00
|
|
|
'p'
|
|
|
|
"""
|
|
|
|
decode = (lambda hx: chr(int(hx, 16)))
|
|
|
|
out = map(decode, hex_re.findall(data))
|
|
|
|
m = trail_re.search(data)
|
|
|
|
if m:
|
2014-06-30 10:05:56 +00:00
|
|
|
out.append(decode('%c0' % m.group(1)))
|
|
|
|
return b''.join(out)
|
2009-04-08 10:55:01 +00:00
|
|
|
|
|
|
|
|
2008-08-30 07:40:52 +00:00
|
|
|
if __name__ == '__main__':
|
2009-10-24 04:41:59 +00:00
|
|
|
import doctest
|
|
|
|
doctest.testmod()
|