Added RunLengthDecode filter by Troy Bollinger.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@167 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
6590ad42f5
commit
7093bdbdfa
|
@ -3,6 +3,7 @@ import sys
|
||||||
import zlib
|
import zlib
|
||||||
from lzw import lzwdecode
|
from lzw import lzwdecode
|
||||||
from ascii85 import ascii85decode, asciihexdecode
|
from ascii85 import ascii85decode, asciihexdecode
|
||||||
|
from runlength import rldecode
|
||||||
from psparser import PSException, PSObject
|
from psparser import PSException, PSObject
|
||||||
from psparser import LIT, KWD, STRICT
|
from psparser import LIT, KWD, STRICT
|
||||||
|
|
||||||
|
@ -11,6 +12,7 @@ LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl'))
|
||||||
LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW'))
|
LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW'))
|
||||||
LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85'))
|
LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85'))
|
||||||
LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx'))
|
LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx'))
|
||||||
|
LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL'))
|
||||||
|
|
||||||
|
|
||||||
## PDF Objects
|
## PDF Objects
|
||||||
|
@ -196,7 +198,10 @@ class PDFStream(PDFObject):
|
||||||
data = ascii85decode(data)
|
data = ascii85decode(data)
|
||||||
elif f in LITERALS_ASCIIHEX_DECODE:
|
elif f in LITERALS_ASCIIHEX_DECODE:
|
||||||
data = asciihexdecode(data)
|
data = asciihexdecode(data)
|
||||||
|
elif f in LITERALS_RUNLENGTH_DECODE:
|
||||||
|
data = rldecode(data)
|
||||||
elif f == LITERAL_CRYPT:
|
elif f == LITERAL_CRYPT:
|
||||||
|
# not yet..
|
||||||
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
raise PDFNotImplementedError('/Crypt filter is unsupported')
|
||||||
else:
|
else:
|
||||||
raise PDFNotImplementedError('Unsupported filter: %r' % f)
|
raise PDFNotImplementedError('Unsupported filter: %r' % f)
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# RunLength decoder (Adobe version) implementation based on PDF Reference
|
||||||
|
# version 1.4 section 3.3.4.
|
||||||
|
#
|
||||||
|
# * public domain *
|
||||||
|
#
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def rldecode(data):
|
||||||
|
"""
|
||||||
|
RunLength decoder (Adobe version) implementation based on PDF Reference
|
||||||
|
version 1.4 section 3.3.4:
|
||||||
|
The RunLengthDecode filter decodes data that has been encoded in a
|
||||||
|
simple byte-oriented format based on run length. The encoded data
|
||||||
|
is a sequence of runs, where each run consists of a length byte
|
||||||
|
followed by 1 to 128 bytes of data. If the length byte is in the
|
||||||
|
range 0 to 127, the following length + 1 (1 to 128) bytes are
|
||||||
|
copied literally during decompression. If length is in the range
|
||||||
|
129 to 255, the following single byte is to be copied 257 - length
|
||||||
|
(2 to 128) times during decompression. A length value of 128
|
||||||
|
denotes EOD.
|
||||||
|
>>> s = "\x05123456\xfa7\x04abcde\x80junk"
|
||||||
|
>>> rldecode(s)
|
||||||
|
'1234567777777abcde'
|
||||||
|
"""
|
||||||
|
decoded = []
|
||||||
|
i=0
|
||||||
|
while i < len(data):
|
||||||
|
#print "data[%d]=:%d:" % (i,ord(data[i]))
|
||||||
|
length = ord(data[i])
|
||||||
|
if length == 128:
|
||||||
|
break
|
||||||
|
if length >= 0 and length < 128:
|
||||||
|
run = data[i+1:(i+1)+(length+1)]
|
||||||
|
#print "length=%d, run=%s" % (length+1,run)
|
||||||
|
decoded.append(run)
|
||||||
|
i = (i+1) + (length+1)
|
||||||
|
if length > 128:
|
||||||
|
run = data[i+1]*(257-length)
|
||||||
|
#print "length=%d, run=%s" % (257-length,run)
|
||||||
|
decoded.append(run)
|
||||||
|
i = (i+1) + 1
|
||||||
|
return ''.join(decoded)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import doctest
|
||||||
|
doctest.testmod()
|
Loading…
Reference in New Issue