AsciiHexDecode filter patch incorporated. Thanks to Troy Bollinger.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@86 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
d11012d9f7
commit
f8510edffc
|
@ -1,13 +1,12 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
#
|
#
|
||||||
# ASCII85 decoder (Adobe version) implementation
|
# ASCII85/ASCIIHex decoder (Adobe version) implementation
|
||||||
# * public domain *
|
# * public domain *
|
||||||
#
|
#
|
||||||
|
|
||||||
import struct
|
|
||||||
|
|
||||||
# ascii85decode(data)
|
# ascii85decode(data)
|
||||||
def ascii85decode(data):
|
def ascii85decode(data):
|
||||||
|
import struct
|
||||||
n = b = 0
|
n = b = 0
|
||||||
out = ''
|
out = ''
|
||||||
for c in data:
|
for c in data:
|
||||||
|
@ -28,6 +27,34 @@ def ascii85decode(data):
|
||||||
break
|
break
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
# asciihexdecode(data)
|
||||||
|
def asciihexdecode(data):
|
||||||
|
"""
|
||||||
|
ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
|
||||||
|
For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
|
||||||
|
ASCIIHexDecode filter produces one byte of binary data. All white-space
|
||||||
|
characters are ignored. A right angle bracket character (>) indicates
|
||||||
|
EOD. Any other characters will cause an error. If the filter encounters
|
||||||
|
the EOD marker after reading an odd number of hexadecimal digits, it
|
||||||
|
will behave as if a 0 followed the last digit.
|
||||||
|
>>> asciihexdecode("61 62 2e6364 65")
|
||||||
|
'ab.cde'
|
||||||
|
>>> asciihexdecode("61 62 2e6364 657>")
|
||||||
|
'ab.cdep'
|
||||||
|
>>> asciihexdecode("7>")
|
||||||
|
'p'
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE)
|
||||||
|
trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
|
||||||
|
decode = (lambda hx: chr(int(hx, 16)))
|
||||||
|
out = map(decode, hex_re.findall(data))
|
||||||
|
m = trail_re.search(data)
|
||||||
|
if m:
|
||||||
|
out.append(decode("%c0" % m.group(1)))
|
||||||
|
return ''.join(out)
|
||||||
|
|
||||||
|
|
||||||
# test
|
# test
|
||||||
# sample taken from: http://en.wikipedia.org/w/index.php?title=Ascii85
|
# sample taken from: http://en.wikipedia.org/w/index.php?title=Ascii85
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -44,4 +71,7 @@ if __name__ == '__main__':
|
||||||
'continued and indefatigable generation of knowledge, exceeds the short vehemence of '\
|
'continued and indefatigable generation of knowledge, exceeds the short vehemence of '\
|
||||||
'any carnal pleasure.'
|
'any carnal pleasure.'
|
||||||
assert ascii85decode(orig) == data
|
assert ascii85decode(orig) == data
|
||||||
print 'test succeeded'
|
print 'ascii85decode test succeeded'
|
||||||
|
|
||||||
|
import doctest
|
||||||
|
doctest.testmod()
|
||||||
|
|
|
@ -10,6 +10,7 @@ LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
|
||||||
LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
|
LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
|
||||||
LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
|
LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
|
||||||
LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
|
LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
|
||||||
|
LITERALS_ASCIIHEX_DECODE = (PSLiteralTable.intern('ASCIIHexDecode'), PSLiteralTable.intern('AHx'))
|
||||||
|
|
||||||
|
|
||||||
## PDF Objects
|
## PDF Objects
|
||||||
|
@ -199,6 +200,9 @@ class PDFStream(PDFObject):
|
||||||
elif f in LITERALS_ASCII85_DECODE:
|
elif f in LITERALS_ASCII85_DECODE:
|
||||||
import ascii85
|
import ascii85
|
||||||
data = ascii85.ascii85decode(data)
|
data = ascii85.ascii85decode(data)
|
||||||
|
elif f in LITERALS_ASCIIHEX_DECODE:
|
||||||
|
import ascii85
|
||||||
|
data = ascii85.asciihexdecode(data)
|
||||||
elif f == LITERAL_CRYPT:
|
elif f == LITERAL_CRYPT:
|
||||||
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -139,6 +139,7 @@ def dumppdf(outfp, fname, objids, pagenos, password='',
|
||||||
if (not objids) and (not pagenos) and (not dumpall):
|
if (not objids) and (not pagenos) and (not dumpall):
|
||||||
dumptrailers(outfp, doc)
|
dumptrailers(outfp, doc)
|
||||||
fp.close()
|
fp.close()
|
||||||
|
if codec not in ('raw','binary'):
|
||||||
outfp.write('\n')
|
outfp.write('\n')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue