73 lines
2.0 KiB
Python
73 lines
2.0 KiB
Python
""" Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
|
|
|
|
This code is in the public domain.
|
|
|
|
"""
|
|
|
|
import re
|
|
import struct
|
|
|
|
|
|
# ascii85decode(data)
|
|
def ascii85decode(data: bytes) -> bytes:
|
|
"""
|
|
In ASCII85 encoding, every four bytes are encoded with five ASCII
|
|
letters, using 85 different types of characters (as 256**4 < 85**5).
|
|
When the length of the original bytes is not a multiple of 4, a special
|
|
rule is used for round up.
|
|
|
|
The Adobe's ASCII85 implementation is slightly different from
|
|
its original in handling the last characters.
|
|
|
|
"""
|
|
n = b = 0
|
|
out = b""
|
|
for i in iter(data):
|
|
c = bytes((i,))
|
|
if b"!" <= c and c <= b"u":
|
|
n += 1
|
|
b = b * 85 + (ord(c) - 33)
|
|
if n == 5:
|
|
out += struct.pack(">L", b)
|
|
n = b = 0
|
|
elif c == b"z":
|
|
assert n == 0, str(n)
|
|
out += b"\0\0\0\0"
|
|
elif c == b"~":
|
|
if n:
|
|
for _ in range(5 - n):
|
|
b = b * 85 + 84
|
|
out += struct.pack(">L", b)[: n - 1]
|
|
break
|
|
return out
|
|
|
|
|
|
# asciihexdecode(data)
|
|
hex_re = re.compile(rb"([a-f\d]{2})", re.IGNORECASE)
|
|
trail_re = re.compile(rb"^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$", re.IGNORECASE)
|
|
|
|
|
|
def asciihexdecode(data: bytes) -> bytes:
|
|
"""
|
|
ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
|
|
For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
|
|
ASCIIHexDecode filter produces one byte of binary data. All white-space
|
|
characters are ignored. A right angle bracket character (>) indicates
|
|
EOD. Any other characters will cause an error. If the filter encounters
|
|
the EOD marker after reading an odd number of hexadecimal digits, it
|
|
will behave as if a 0 followed the last digit.
|
|
"""
|
|
|
|
def decode(x: bytes) -> bytes:
|
|
i = int(x, 16)
|
|
return bytes((i,))
|
|
|
|
out = b""
|
|
for x in hex_re.findall(data):
|
|
out += decode(x)
|
|
|
|
m = trail_re.search(data)
|
|
if m:
|
|
out += decode(m.group(1) + b"0")
|
|
return out
|