From 7093bdbdfa1537f18bda7b7cffbadec987ff889b Mon Sep 17 00:00:00 2001
From: "yusuke.shinyama.dummy"
 <yusuke.shinyama.dummy@1aa58f4a-7d42-0410-adbc-911cccaed67c>
Date: Thu, 24 Dec 2009 11:51:43 +0000
Subject: [PATCH] Added RunLengthDecode filter by Troy Bollinger.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@167 1aa58f4a-7d42-0410-adbc-911cccaed67c
---
 pdfminer/pdftypes.py  |  5 +++++
 pdfminer/runlength.py | 50 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 pdfminer/runlength.py

diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
index 4abd307..b102b1d 100644
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@@ -3,6 +3,7 @@ import sys
 import zlib
 from lzw import lzwdecode
 from ascii85 import ascii85decode, asciihexdecode
+from runlength import rldecode
 from psparser import PSException, PSObject
 from psparser import LIT, KWD, STRICT
 
@@ -11,6 +12,7 @@ LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl'))
 LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW'))
 LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85'))
 LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx'))
+LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL'))
 
 
 ##  PDF Objects
@@ -196,7 +198,10 @@ class PDFStream(PDFObject):
                 data = ascii85decode(data)
             elif f in LITERALS_ASCIIHEX_DECODE:
                 data = asciihexdecode(data)
+            elif f in LITERALS_RUNLENGTH_DECODE:
+                data = rldecode(data)
             elif f == LITERAL_CRYPT:
+                # not yet..
                 raise PDFNotImplementedError('/Crypt filter is unsupported')
             else:
                 raise PDFNotImplementedError('Unsupported filter: %r' % f)
diff --git a/pdfminer/runlength.py b/pdfminer/runlength.py
new file mode 100644
index 0000000..e17389e
--- /dev/null
+++ b/pdfminer/runlength.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+#
+# RunLength decoder (Adobe version) implementation based on PDF Reference
+# version 1.4 section 3.3.4.
+#
+#  * public domain *
+#
+
+import sys
+
+def rldecode(data):
+    """
+    RunLength decoder (Adobe version) implementation based on PDF Reference
+    version 1.4 section 3.3.4:
+        The RunLengthDecode filter decodes data that has been encoded in a
+        simple byte-oriented format based on run length. The encoded data
+        is a sequence of runs, where each run consists of a length byte
+        followed by 1 to 128 bytes of data. If the length byte is in the
+        range 0 to 127, the following length + 1 (1 to 128) bytes are
+        copied literally during decompression. If length is in the range
+        129 to 255, the following single byte is to be copied 257 - length
+        (2 to 128) times during decompression. A length value of 128
+        denotes EOD.
+    >>> s = "\x05123456\xfa7\x04abcde\x80junk"
+    >>> rldecode(s)
+    '1234567777777abcde'
+    """
+    decoded = []
+    i=0
+    while i < len(data):
+        #print "data[%d]=:%d:" % (i,ord(data[i]))
+        length = ord(data[i])
+        if length == 128:
+            break
+        if length >= 0 and length < 128:
+            run = data[i+1:(i+1)+(length+1)]
+            #print "length=%d, run=%s" % (length+1,run)
+            decoded.append(run)
+            i = (i+1) + (length+1)
+        if length > 128:
+            run = data[i+1]*(257-length)
+            #print "length=%d, run=%s" % (257-length,run)
+            decoded.append(run)
+            i = (i+1) + 1
+    return ''.join(decoded)
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()