Merged.

2013-11-07 19:50:41 +09:00 · 2013-11-07 19:50:41 +09:00 · 2b56b2eedf
parent 76b9275853 2caa5edc25
commit 2b56b2eedf
28 changed files with 1484 additions and 1216 deletions
--- a/pdfminer/init.py
+++ b/pdfminer/init.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python
 __version__ = '20131022'
-if __name__ == '__main__': print __version__
+if __name__ == '__main__':
    print __version__
--- a/pdfminer/arcfour.py
+++ b/pdfminer/arcfour.py
@ -6,6 +6,7 @@ This code is in the public domain.
 """
 ##  Arcfour
 ##
 class Arcfour(object):
--- a/pdfminer/ascii85.py
+++ b/pdfminer/ascii85.py
@ -9,6 +9,7 @@ This code is in the public domain.
 import re
 import struct
 # ascii85decode(data)
 def ascii85decode(data):
    """
@ -16,13 +17,13 @@ def ascii85decode(data):
    letters, using 85 different types of characters (as 256**4 < 85**5).
    When the length of the original bytes is not a multiple of 4, a special
    rule is used for round up.
-    
+
    The Adobe's ASCII85 implementation is slightly different from
    its original in handling the last characters.
-    
+
    The sample string is taken from:
      http://en.wikipedia.org/w/index.php?title=Ascii85
-    
+
    >>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
    'Man is distinguished'
    >>> ascii85decode('E,9)oF*2M7/c~>')
@ -35,7 +36,7 @@ def ascii85decode(data):
            n += 1
            b = b*85+(ord(c)-33)
            if n == 5:
-                out += struct.pack('>L',b)
+                out += struct.pack('>L', b)
                n = b = 0
        elif c == 'z':
            assert n == 0
@ -44,13 +45,15 @@ def ascii85decode(data):
            if n:
                for _ in range(5-n):
                    b = b*85+84
-                out += struct.pack('>L',b)[:n-1]
+                out += struct.pack('>L', b)[:n-1]
            break
    return out
 # asciihexdecode(data)
 hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE)
 trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
 def asciihexdecode(data):
    """
    ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
@ -60,7 +63,7 @@ def asciihexdecode(data):
    EOD. Any other characters will cause an error. If the filter encounters
    the EOD marker after reading an odd number of hexadecimal digits, it
    will behave as if a 0 followed the last digit.
-    
+
    >>> asciihexdecode('61 62 2e6364   65')
    'ab.cde'
    >>> asciihexdecode('61 62 2e6364   657>')
--- a/pdfminer/ccitt.py
+++ b/pdfminer/ccitt.py
@ -29,7 +29,7 @@ class BitParser(object):
        for i in xrange(len(bits)):
            if 0 < i:
                if p[b] is None:
-                    p[b] = [None,None]
+                    p[b] = [None, None]
                p = p[b]
            if bits[i] == '1':
                b = 1
@ -41,7 +41,7 @@ class BitParser(object):
    def feedbytes(self, data):
        for c in data:
            b = ord(c)
-            for m in (128,64,32,16,8,4,2,1):
+            for m in (128, 64, 32, 16, 8, 4, 2, 1):
                self._parse_bit(b & m)
        return
@ -62,7 +62,7 @@ class BitParser(object):
 ##
 class CCITTG4Parser(BitParser):
-    MODE = [None,None]
+    MODE = [None, None]
    BitParser.add(MODE, 0,   '1')
    BitParser.add(MODE, +1,  '011')
    BitParser.add(MODE, -1,  '010')
@ -82,7 +82,7 @@ class CCITTG4Parser(BitParser):
    BitParser.add(MODE, 'x7', '0000001110')
    BitParser.add(MODE, 'e', '000000000001000000000001')
-    WHITE = [None,None]
+    WHITE = [None, None]
    BitParser.add(WHITE, 0   , '00110101')
    BitParser.add(WHITE, 1   , '000111')
    BitParser.add(WHITE, 2   , '0111')
@ -188,7 +188,7 @@ class CCITTG4Parser(BitParser):
    BitParser.add(WHITE, 2496, '000000011110')
    BitParser.add(WHITE, 2560, '000000011111')
-    BLACK = [None,None]
+    BLACK = [None, None]
    BitParser.add(BLACK, 0   , '0000110111')
    BitParser.add(BLACK, 1   , '010')
    BitParser.add(BLACK, 2   , '11')
@ -294,25 +294,30 @@ class CCITTG4Parser(BitParser):
    BitParser.add(BLACK, 2496, '000000011110')
    BitParser.add(BLACK, 2560, '000000011111')
-    UNCOMPRESSED = [None,None]
+    UNCOMPRESSED = [None, None]
-    BitParser.add(UNCOMPRESSED, '1' , '1')
+    BitParser.add(UNCOMPRESSED, '1', '1')
-    BitParser.add(UNCOMPRESSED, '01' , '01')
+    BitParser.add(UNCOMPRESSED, '01', '01')
-    BitParser.add(UNCOMPRESSED, '001' , '001')
+    BitParser.add(UNCOMPRESSED, '001', '001')
-    BitParser.add(UNCOMPRESSED, '0001' , '0001')
+    BitParser.add(UNCOMPRESSED, '0001', '0001')
-    BitParser.add(UNCOMPRESSED, '00001' , '00001')
+    BitParser.add(UNCOMPRESSED, '00001', '00001')
-    BitParser.add(UNCOMPRESSED, '00000' , '000001')
+    BitParser.add(UNCOMPRESSED, '00000', '000001')
-    BitParser.add(UNCOMPRESSED, 'T00' , '00000011')
+    BitParser.add(UNCOMPRESSED, 'T00', '00000011')
-    BitParser.add(UNCOMPRESSED, 'T10' , '00000010')
+    BitParser.add(UNCOMPRESSED, 'T10', '00000010')
-    BitParser.add(UNCOMPRESSED, 'T000' , '000000011')
+    BitParser.add(UNCOMPRESSED, 'T000', '000000011')
-    BitParser.add(UNCOMPRESSED, 'T100' , '000000010')
+    BitParser.add(UNCOMPRESSED, 'T100', '000000010')
-    BitParser.add(UNCOMPRESSED, 'T0000' , '0000000011')
+    BitParser.add(UNCOMPRESSED, 'T0000', '0000000011')
-    BitParser.add(UNCOMPRESSED, 'T1000' , '0000000010')
+    BitParser.add(UNCOMPRESSED, 'T1000', '0000000010')
-    BitParser.add(UNCOMPRESSED, 'T00000' , '00000000011')
+    BitParser.add(UNCOMPRESSED, 'T00000', '00000000011')
-    BitParser.add(UNCOMPRESSED, 'T10000' , '00000000010')
+    BitParser.add(UNCOMPRESSED, 'T10000', '00000000010')
-    
+
-    class EOFB(Exception): pass
+    class EOFB(Exception):
-    class InvalidData(Exception): pass
+        pass
-    class ByteSkip(Exception): pass
+
    class InvalidData(Exception):
        pass
    class ByteSkip(Exception):
        pass
    def __init__(self, width, bytealign=False):
        BitParser.__init__(self)
@ -325,7 +330,7 @@ class CCITTG4Parser(BitParser):
        for c in data:
            b = ord(c)
            try:
-                for m in (128,64,32,16,8,4,2,1):
+                for m in (128, 64, 32, 16, 8, 4, 2, 1):
                    self._parse_bit(b & m)
            except self.ByteSkip:
                self._accept = self._parse_mode
@ -359,7 +364,8 @@ class CCITTG4Parser(BitParser):
            raise self.InvalidData(mode)
    def _parse_horiz1(self, n):
-        if n is None: raise self.InvalidData
+        if n is None:
            raise self.InvalidData
        self._n1 += n
        if n < 64:
            self._n2 = 0
@ -371,7 +377,8 @@ class CCITTG4Parser(BitParser):
            return self.BLACK
    def _parse_horiz2(self, n):
-        if n is None: raise self.InvalidData
+        if n is None:
            raise self.InvalidData
        self._n2 += n
        if n < 64:
            self._color = 1-self._color
@ -385,9 +392,10 @@ class CCITTG4Parser(BitParser):
            return self.BLACK
    def _parse_uncompressed(self, bits):
-        if not bits: raise self.InvalidData
+        if not bits:
            raise self.InvalidData
        if bits.startswith('T'):
-            self._accept = self._parse_mode            
+            self._accept = self._parse_mode
            self._color = int(bits[1])
            self._do_uncompressed(bits[2:])
            return self.MODE
@ -396,17 +404,17 @@ class CCITTG4Parser(BitParser):
            return self.UNCOMPRESSED
    def _get_bits(self):
-        return ''.join( str(b) for b in self._curline[:self._curpos] )
+        return ''.join(str(b) for b in self._curline[:self._curpos])
    def _get_refline(self, i):
        if i < 0:
-            return '[]'+''.join( str(b) for b in self._refline )
+            return '[]'+''.join(str(b) for b in self._refline)
        elif len(self._refline) <= i:
-            return ''.join( str(b) for b in self._refline )+'[]'
+            return ''.join(str(b) for b in self._refline)+'[]'
        else:
-            return (''.join( str(b) for b in self._refline[:i] )+
+            return (''.join(str(b) for b in self._refline[:i]) +
-                    '['+str(self._refline[i])+']'+
+                    '['+str(self._refline[i])+']' +
-                    ''.join( str(b) for b in self._refline[i+1:] ))
+                    ''.join(str(b) for b in self._refline[i+1:]))
    def reset(self):
        self._y = 0
@ -417,16 +425,16 @@ class CCITTG4Parser(BitParser):
        return
    def output_line(self, y, bits):
-        print y, ''.join( str(b) for b in bits )
+        print y, ''.join(str(b) for b in bits)
        return
-    
+
    def _reset_line(self):
        self._refline = self._curline
        self._curline = array.array('b', [1]*self.width)
        self._curpos = -1
        self._color = 1
        return
-        
+
    def _flush_line(self):
        if self.width <= self._curpos:
            self.output_line(self._y, self._curline)
@ -442,12 +450,13 @@ class CCITTG4Parser(BitParser):
        x1 = self._curpos+1
        while 1:
            if x1 == 0:
-                if (self._color == 1 and
+                if (self._color == 1 and self._refline[x1] != self._color):
-                    self._refline[x1] != self._color): break
+                    break
            elif x1 == len(self._refline):
                break
            elif (self._refline[x1-1] == self._color and
-                  self._refline[x1] != self._color): break
+                  self._refline[x1] != self._color):
                break
            x1 += 1
        x1 += dx
        x0 = max(0, self._curpos)
@ -461,50 +470,54 @@ class CCITTG4Parser(BitParser):
        self._curpos = x1
        self._color = 1-self._color
        return
-    
+
    def _do_pass(self):
        #print '* pass: curpos=%r, color=%r' % (self._curpos, self._color)
        #print '  refline:', self._get_refline(self._curpos+1)
        x1 = self._curpos+1
        while 1:
            if x1 == 0:
-                if (self._color == 1 and
+                if (self._color == 1 and self._refline[x1] != self._color):
-                    self._refline[x1] != self._color): break
+                    break
            elif x1 == len(self._refline):
                break
            elif (self._refline[x1-1] == self._color and
-                  self._refline[x1] != self._color): break
+                  self._refline[x1] != self._color):
                break
            x1 += 1
        while 1:
            if x1 == 0:
-                if (self._color == 0 and
+                if (self._color == 0 and self._refline[x1] == self._color):
-                    self._refline[x1] == self._color): break
+                    break
            elif x1 == len(self._refline):
                break
            elif (self._refline[x1-1] != self._color and
-                  self._refline[x1] == self._color): break
+                  self._refline[x1] == self._color):
                break
            x1 += 1
        for x in xrange(self._curpos, x1):
            self._curline[x] = self._color
        self._curpos = x1
        return
-    
+
    def _do_horizontal(self, n1, n2):
        #print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color)
        if self._curpos < 0:
            self._curpos = 0
        x = self._curpos
        for _ in xrange(n1):
-            if len(self._curline) <= x: break
+            if len(self._curline) <= x:
                break
            self._curline[x] = self._color
            x += 1
        for _ in xrange(n2):
-            if len(self._curline) <= x: break
+            if len(self._curline) <= x:
                break
            self._curline[x] = 1-self._color
            x += 1
        self._curpos = x
        return
-    
+
    def _do_uncompressed(self, bits):
        #print '* uncompressed(%r): curpos=%r' % (bits, self._curpos)
        for c in bits:
@ -513,15 +526,16 @@ class CCITTG4Parser(BitParser):
            self._flush_line()
        return
 import unittest
 ##  Test cases
 ##
 import unittest
 class TestCCITTG4Parser(unittest.TestCase):
    def get_parser(self, bits):
        parser = CCITTG4Parser(len(bits))
-        parser._curline = [ int(c) for c in bits ]
+        parser._curline = [int(c) for c in bits]
        parser._reset_line()
        return parser
@ -656,7 +670,7 @@ class TestCCITTG4Parser(unittest.TestCase):
        parser._do_vertical(-1)
        parser._do_vertical(-1)
        parser._do_vertical(1)
-        parser._do_horizontal(1,1)
+        parser._do_horizontal(1, 1)
        self.assertEqual(parser._get_bits(), '011101')
        return
@ -673,23 +687,23 @@ class TestCCITTG4Parser(unittest.TestCase):
 ##  CCITTFaxDecoder
 ##
 class CCITTFaxDecoder(CCITTG4Parser):
-    
+
    def __init__(self, width, bytealign=False, reversed=False):
        CCITTG4Parser.__init__(self, width, bytealign=bytealign)
        self.reversed = reversed
        self._buf = ''
        return
-    
+
    def close(self):
        return self._buf
-    
+
    def output_line(self, y, bits):
        bytes = array.array('B', [0]*((len(bits)+7)/8))
        if self.reversed:
-            bits = [ 1-b for b in bits ]
+            bits = [1-b for b in bits]
-        for (i,b) in enumerate(bits):
+        for (i, b) in enumerate(bits):
            if b:
-                bytes[i/8] += (128,64,32,16,8,4,2,1)[i%8]
+                bytes[i/8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
        self._buf += bytes.tostring()
        return
@ -705,35 +719,39 @@ def ccittfaxdecode(data, params):
        raise ValueError(K)
    parser.feedbytes(data)
    return parser.close()
-    
+
-    
+
 # test
 def main(argv):
    import pygame
    if not argv[1:]:
        return unittest.main()
    class Parser(CCITTG4Parser):
        def __init__(self, width, bytealign=False):
            CCITTG4Parser.__init__(self, width, bytealign=bytealign)
-            self.img = pygame.Surface((self.width,1000))
+            self.img = pygame.Surface((self.width, 1000))
            return
        def output_line(self, y, bits):
-            for (x,b) in enumerate(bits):
+            for (x, b) in enumerate(bits):
                if b:
-                    self.img.set_at((x,y), (255,255,255))
+                    self.img.set_at((x, y), (255, 255, 255))
                else:
-                    self.img.set_at((x,y), (0,0,0))
+                    self.img.set_at((x, y), (0, 0, 0))
            return
        def close(self):
            pygame.image.save(self.img, 'out.bmp')
            return
    for path in argv[1:]:
-        fp = file(path,'rb')
+        fp = file(path, 'rb')
-        (_,_,k,w,h,_) = path.split('.')
+        (_, _, k, w, h, _) = path.split('.')
        parser = Parser(int(w))
        parser.feedbytes(fp.read())
        parser.close()
        fp.close()
    return
-if __name__ == '__main__': sys.exit(main(sys.argv))
+if __name__ == '__main__':
    sys.exit(main(sys.argv))
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@ -25,7 +25,8 @@ from encodingdb import name2unicode
 from utils import choplist, nunpack
-class CMapError(Exception): pass
+class CMapError(Exception):
    pass
 ##  CMap
@ -43,8 +44,9 @@ class CMap(object):
    def use_cmap(self, cmap):
        assert isinstance(cmap, CMap)
        def copy(dst, src):
-            for (k,v) in src.iteritems():
+            for (k, v) in src.iteritems():
                if isinstance(v, dict):
                    d = {}
                    dst[k] = d
@ -73,14 +75,14 @@ class CMap(object):
        if code2cid is None:
            code2cid = self.code2cid
            code = ()
-        for (k,v) in sorted(code2cid.iteritems()):
+        for (k, v) in sorted(code2cid.iteritems()):
            c = code+(k,)
            if isinstance(v, int):
-                out.write('code %r = cid %d\n' % (c,v))
+                out.write('code %r = cid %d\n' % (c, v))
            else:
                self.dump(out=out, code2cid=v, code=c)
        return
-    
+
 ##  IdentityCMap
 ##
@ -99,8 +101,7 @@ class IdentityCMap(object):
            return struct.unpack('>%dH' % n, code)
        else:
            return ()
-        
+
 ##  UnicodeMap
 ##
@ -118,8 +119,8 @@ class UnicodeMap(object):
        return self.cid2unichr[cid]
    def dump(self, out=sys.stdout):
-        for (k,v) in sorted(self.cid2unichr.iteritems()):
+        for (k, v) in sorted(self.cid2unichr.iteritems()):
-            out.write('cid %d = unicode %r\n' % (k,v))
+            out.write('cid %d = unicode %r\n' % (k, v))
        return
@ -152,7 +153,7 @@ class FileCMap(CMap):
            else:
                t = {}
                d[c] = t
-                d =t
+                d = t
        c = ord(code[-1])
        d[c] = cid
        return
@ -161,7 +162,7 @@ class FileCMap(CMap):
 ##  FileUnicodeMap
 ##
 class FileUnicodeMap(UnicodeMap):
-    
+
    def __init__(self):
        UnicodeMap.__init__(self)
        self.attrs = {}
@ -204,12 +205,12 @@ class PyCMap(CMap):
    def is_vertical(self):
        return self._is_vertical
-    
+
 ##  PyUnicodeMap
 ##
 class PyUnicodeMap(UnicodeMap):
-    
+
    def __init__(self, name, module, vertical):
        if vertical:
            cid2unichr = module.CID2UNICHR_V
@ -230,18 +231,17 @@ class CMapDB(object):
    debug = 0
    _cmap_cache = {}
    _umap_cache = {}
-    
+
-    class CMapNotFound(CMapError): pass
+    class CMapNotFound(CMapError):
        pass
    @classmethod
    def _load_data(klass, name):
        filename = '%s.pickle.gz' % name
        if klass.debug:
            print >>sys.stderr, 'loading:', name
-        cmap_paths = (
+        cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
-            os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
+                      os.path.join(os.path.dirname(__file__), 'cmap'),)
            os.path.join(os.path.dirname(__file__), 'cmap'),
            )
        for directory in cmap_paths:
            path = os.path.join(directory, filename)
            if os.path.exists(path):
@ -305,11 +305,12 @@ class CMapParser(PSStackParser):
        elif name == 'endcmap':
            self._in_cmap = False
            return
-        if not self._in_cmap: return
+        if not self._in_cmap:
            return
        #
        if name == 'def':
            try:
-                ((_,k),(_,v)) = self.pop(2)
+                ((_, k), (_, v)) = self.pop(2)
                self.cmap.set_attr(literal_name(k), v)
            except PSSyntaxError:
                pass
@ -317,7 +318,7 @@ class CMapParser(PSStackParser):
        if name == 'usecmap':
            try:
-                ((_,cmapname),) = self.pop(1)
+                ((_, cmapname),) = self.pop(1)
                self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
@ -336,13 +337,15 @@ class CMapParser(PSStackParser):
            self.popall()
            return
        if name == 'endcidrange':
-            objs = [ obj for (_,obj) in self.popall() ]
+            objs = [obj for (__, obj) in self.popall()]
-            for (s,e,cid) in choplist(3, objs):
+            for (s, e, cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
-                    not isinstance(cid, int) or len(s) != len(e)): continue
+                   not isinstance(cid, int) or len(s) != len(e)):
                    continue
                sprefix = s[:-4]
                eprefix = e[:-4]
-                if sprefix != eprefix: continue
+                if sprefix != eprefix:
                    continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
@ -350,7 +353,7 @@ class CMapParser(PSStackParser):
                vlen = len(svar)
                #assert s1 <= e1
                for i in xrange(e1-s1+1):
-                    x = sprefix+struct.pack('>L',s1+i)[-vlen:]
+                    x = sprefix+struct.pack('>L', s1+i)[-vlen:]
                    self.cmap.add_code2cid(x, cid+i)
            return
@ -358,8 +361,8 @@ class CMapParser(PSStackParser):
            self.popall()
            return
        if name == 'endcidchar':
-            objs = [ obj for (_,obj) in self.popall() ]
+            objs = [obj for (__, obj) in self.popall()]
-            for (cid,code) in choplist(2, objs):
+            for (cid, code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.add_code2cid(code, nunpack(cid))
            return
@ -368,10 +371,11 @@ class CMapParser(PSStackParser):
            self.popall()
            return
        if name == 'endbfrange':
-            objs = [ obj for (_,obj) in self.popall() ]
+            objs = [obj for (__, obj) in self.popall()]
-            for (s,e,code) in choplist(3, objs):
+            for (s, e, code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
-                    len(s) != len(e)): continue
+                   len(s) != len(e)):
                        continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
@ -384,7 +388,7 @@ class CMapParser(PSStackParser):
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in xrange(e1-s1+1):
-                        x = prefix+struct.pack('>L',base+i)[-vlen:]
+                        x = prefix+struct.pack('>L', base+i)[-vlen:]
                        self.cmap.add_cid2unichr(s1+i, x)
            return
@ -392,8 +396,8 @@ class CMapParser(PSStackParser):
            self.popall()
            return
        if name == 'endbfchar':
-            objs = [ obj for (_,obj) in self.popall() ]
+            objs = [obj for (__, obj) in self.popall()]
-            for (cid,code) in choplist(2, objs):
+            for (cid, code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.add_cid2unichr(nunpack(cid), code)
            return
@ -408,6 +412,7 @@ class CMapParser(PSStackParser):
        self.push((pos, token))
        return
 # test
 def main(argv):
    args = argv[1:]
@ -420,4 +425,5 @@ def main(argv):
        cmap.dump()
    return
-if __name__ == '__main__': sys.exit(main(sys.argv))
+if __name__ == '__main__':
    sys.exit(main(sys.argv))
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@ -21,9 +21,9 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        return
    def begin_page(self, page, ctm):
-        (x0,y0,x1,y1) = page.mediabox
+        (x0, y0, x1, y1) = page.mediabox
-        (x0,y0) = apply_matrix_pt(ctm, (x0,y0))
+        (x0, y0) = apply_matrix_pt(ctm, (x0, y0))
-        (x1,y1) = apply_matrix_pt(ctm, (x1,y1))
+        (x1, y1) = apply_matrix_pt(ctm, (x1, y1))
        mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
        self.cur_item = LTPage(self.pageno, mediabox)
        return
@ -61,26 +61,26 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        shape = ''.join(x[0] for x in path)
        if shape == 'ml':
            # horizontal/vertical line
-            (_,x0,y0) = path[0]
+            (_, x0, y0) = path[0]
-            (_,x1,y1) = path[1]
+            (_, x1, y1) = path[1]
-            (x0,y0) = apply_matrix_pt(self.ctm, (x0,y0))
+            (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
-            (x1,y1) = apply_matrix_pt(self.ctm, (x1,y1))
+            (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
            if x0 == x1 or y0 == y1:
-                self.cur_item.add(LTLine(gstate.linewidth, (x0,y0), (x1,y1)))
+                self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1)))
                return
        if shape == 'mlllh':
            # rectangle
-            (_,x0,y0) = path[0]
+            (_, x0, y0) = path[0]
-            (_,x1,y1) = path[1]
+            (_, x1, y1) = path[1]
-            (_,x2,y2) = path[2]
+            (_, x2, y2) = path[2]
-            (_,x3,y3) = path[3]
+            (_, x3, y3) = path[3]
-            (x0,y0) = apply_matrix_pt(self.ctm, (x0,y0))
+            (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
-            (x1,y1) = apply_matrix_pt(self.ctm, (x1,y1))
+            (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
-            (x2,y2) = apply_matrix_pt(self.ctm, (x2,y2))
+            (x2, y2) = apply_matrix_pt(self.ctm, (x2, y2))
-            (x3,y3) = apply_matrix_pt(self.ctm, (x3,y3))
+            (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))
            if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or
                (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)):
-                self.cur_item.add(LTRect(gstate.linewidth, (x0,y0,x2,y2)))
+                self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2)))
                return
        # other shapes
        pts = []
@ -119,7 +119,7 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
        self.result = None
        return
-    
+
    def receive_layout(self, ltpage):
        self.result = ltpage
        return
@ -137,7 +137,7 @@ class PDFConverter(PDFLayoutAnalyzer):
        self.outfp = outfp
        self.codec = codec
        return
-    
+
 ##  TextConverter
 ##
@ -176,10 +176,11 @@ class TextConverter(PDFConverter):
    # is text.  This stops all the image and drawing ouput from being
    # recorded and taking up RAM.
    def render_image(self, name, stream):
-        if self.imagewriter is None: return
+        if self.imagewriter is None:
            return
        PDFConverter.render_image(self, name, stream)
        return
-    
+
    def paint_path(self, gstate, stroke, fill, evenodd, path):
        return
@ -196,18 +197,18 @@ class HTMLConverter(PDFConverter):
        'textgroup': 'red',
        'curve': 'black',
        'page': 'gray',
-        }
+    }
-    
+
    TEXT_COLORS = {
        'textbox': 'blue',
        'char': 'black',
-        }
+    }
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, 
+    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
                 scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
                 pagemargin=50, imagewriter=None,
-                 rect_colors={'curve':'black', 'page':'gray'},
+                 rect_colors={'curve': 'black', 'page': 'gray'},
-                 text_colors={'char':'black'}):
+                 text_colors={'char': 'black'}):
        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
        self.scale = scale
        self.fontscale = fontscale
@ -238,7 +239,7 @@ class HTMLConverter(PDFConverter):
    def write_footer(self):
        self.write('<div style="position:absolute; top:0px;">Page: %s</div>\n' %
-                   ', '.join('<a href="#%s">%s</a>' % (i,i) for i in xrange(1,self.pageno)))
+                   ', '.join('<a href="#%s">%s</a>' % (i, i) for i in xrange(1, self.pageno)))
        self.write('</body></html>\n')
        return
@ -295,7 +296,7 @@ class HTMLConverter(PDFConverter):
        self._font = self._fontstack.pop()
        self.write('</div>')
        return
-    
+
    def put_text(self, text, fontname, fontsize):
        font = (fontname, fontsize)
        if font != self._font:
@ -318,6 +319,7 @@ class HTMLConverter(PDFConverter):
                for child in item:
                    show_group(child)
            return
        def render(item):
            if isinstance(item, LTPage):
                self._yoffset += item.y1
@ -399,7 +401,7 @@ class XMLConverter(PDFConverter):
    def write_footer(self):
        self.outfp.write('</pages>\n')
        return
-    
+
    def write_text(self, text):
        self.outfp.write(enc(text, self.codec))
        return
@ -415,6 +417,7 @@ class XMLConverter(PDFConverter):
                    show_group(child)
                self.outfp.write('</textgroup>\n')
            return
        def render(item):
            if isinstance(item, LTPage):
                self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' %
--- a/pdfminer/encodingdb.py
+++ b/pdfminer/encodingdb.py
@ -6,15 +6,18 @@ from glyphlist import glyphname2unicode
 from latin_enc import ENCODING
 STRIP_NAME = re.compile(r'[0-9]+')
 ##  name2unicode
 ##
 STRIP_NAME = re.compile(r'[0-9]+')
 def name2unicode(name):
    """Converts Adobe glyph names to Unicode numbers."""
    if name in glyphname2unicode:
        return glyphname2unicode[name]
    m = STRIP_NAME.search(name)
-    if not m: raise KeyError(name)
+    if not m:
        raise KeyError(name)
    return unichr(int(m.group(0)))
@ -26,19 +29,23 @@ class EncodingDB(object):
    mac2unicode = {}
    win2unicode = {}
    pdf2unicode = {}
-    for (name,std,mac,win,pdf) in ENCODING:
+    for (name, std, mac, win, pdf) in ENCODING:
        c = name2unicode(name)
-        if std: std2unicode[std] = c
+        if std:
-        if mac: mac2unicode[mac] = c
+            std2unicode[std] = c
-        if win: win2unicode[win] = c
+        if mac:
-        if pdf: pdf2unicode[pdf] = c
+            mac2unicode[mac] = c
        if win:
            win2unicode[win] = c
        if pdf:
            pdf2unicode[pdf] = c
    encodings = {
-      'StandardEncoding': std2unicode,
+        'StandardEncoding': std2unicode,
-      'MacRomanEncoding': mac2unicode,
+        'MacRomanEncoding': mac2unicode,
-      'WinAnsiEncoding': win2unicode,
+        'WinAnsiEncoding': win2unicode,
-      'PDFDocEncoding': pdf2unicode,
+        'PDFDocEncoding': pdf2unicode,
-      }
+    }
    @classmethod
    def get_encoding(klass, name, diff=None):
--- a/pdfminer/fontmetrics.py
+++ b/pdfminer/fontmetrics.py
@ -8,7 +8,7 @@ written with a proportional font.
 The following data were extracted from the AFM files:
  http://www.ctan.org/tex-archive/fonts/adobe/afm/
-  
+
 """
 ###  BEGIN Verbatim copy of the license part
--- a/pdfminer/image.py
+++ b/pdfminer/image.py
@ -5,9 +5,11 @@ import os, os.path
 from pdftypes import LITERALS_DCT_DECODE
 from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK
 def align32(x):
    return ((x+3)/4)*4
 ##  BMPWriter
 ##
 class BMPWriter(object):
@ -36,12 +38,12 @@ class BMPWriter(object):
        self.fp.write(info)
        if ncols == 2:
            # B&W color table
-            for i in (0,255):
+            for i in (0, 255):
-                self.fp.write(struct.pack('BBBx', i,i,i))
+                self.fp.write(struct.pack('BBBx', i, i, i))
        elif ncols == 256:
            # grayscale color table
            for i in xrange(256):
-                self.fp.write(struct.pack('BBBx', i,i,i))
+                self.fp.write(struct.pack('BBBx', i, i, i))
        self.pos0 = self.fp.tell()
        self.pos1 = self.pos0 + self.datasize
        return
@ -68,7 +70,7 @@ class ImageWriter(object):
        (width, height) = image.srcsize
        if len(filters) == 1 and filters[0] in LITERALS_DCT_DECODE:
            ext = '.jpg'
-        elif (image.bits == 1 or 
+        elif (image.bits == 1 or
              image.bits == 8 and image.colorspace in (LITERAL_DEVICE_RGB, LITERAL_DEVICE_GRAY)):
            ext = '.%dx%d.bmp' % (width, height)
        else:
@ -82,7 +84,7 @@ class ImageWriter(object):
                from PIL import Image
                from PIL import ImageChops
                ifp = cStringIO.StringIO(raw_data)
-                i = Image.open(ifp) 
+                i = Image.open(ifp)
                i = ImageChops.invert(i)
                i = i.convert('RGB')
                i.save(fp, 'JPEG')
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@ -81,7 +81,7 @@ class LTComponent(LTItem):
        return ('<%s %s>' %
                (self.__class__.__name__, bbox2str(self.bbox)))
-    def set_bbox(self, (x0,y0,x1,y1)):
+    def set_bbox(self, (x0, y0, x1, y1)):
        self.x0 = x0
        self.y0 = y0
        self.x1 = x1
@ -93,7 +93,7 @@ class LTComponent(LTItem):
    def is_empty(self):
        return self.width <= 0 or self.height <= 0
-        
+
    def is_hoverlap(self, obj):
        assert isinstance(obj, LTComponent)
        return obj.x0 <= self.x1 and self.x0 <= obj.x1
@ -142,7 +142,7 @@ class LTCurve(LTComponent):
        return
    def get_pts(self):
-        return ','.join( '%.3f,%.3f' % p for p in self.pts )
+        return ','.join('%.3f,%.3f' % p for p in self.pts)
 ##  LTLine
@ -158,8 +158,8 @@ class LTLine(LTCurve):
 ##
 class LTRect(LTCurve):
-    def __init__(self, linewidth, (x0,y0,x1,y1)):
+    def __init__(self, linewidth, (x0, y0, x1, y1)):
-        LTCurve.__init__(self, linewidth, [(x0,y0), (x1,y0), (x1,y1), (x0,y1)])
+        LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)])
        return
@ -212,7 +212,7 @@ class LTChar(LTComponent, LTText):
        if font.is_vertical():
            # vertical
            width = font.get_width() * fontsize
-            (vx,vy) = textdisp
+            (vx, vy) = textdisp
            if vx is None:
                vx = width/2
            else:
@ -229,15 +229,15 @@ class LTChar(LTComponent, LTText):
            ty = descent + rise
            bll = (0, ty)
            bur = (self.adv, ty+height)
-        (a,b,c,d,e,f) = self.matrix
+        (a, b, c, d, e, f) = self.matrix
        self.upright = (0 < a*d*scaling and b*c <= 0)
-        (x0,y0) = apply_matrix_pt(self.matrix, bll)
+        (x0, y0) = apply_matrix_pt(self.matrix, bll)
-        (x1,y1) = apply_matrix_pt(self.matrix, bur)
+        (x1, y1) = apply_matrix_pt(self.matrix, bur)
        if x1 < x0:
-            (x0,x1) = (x1,x0)
+            (x0, x1) = (x1, x0)
        if y1 < y0:
-            (y0,y1) = (y1,y0)
+            (y0, y1) = (y1, y0)
-        LTComponent.__init__(self, (x0,y0,x1,y1))
+        LTComponent.__init__(self, (x0, y0, x1, y1))
        if font.is_vertical():
            self.size = self.width
        else:
@ -246,7 +246,7 @@ class LTChar(LTComponent, LTText):
    def __repr__(self):
        return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
-                (self.__class__.__name__, bbox2str(self.bbox), 
+                (self.__class__.__name__, bbox2str(self.bbox),
                 matrix2str(self.matrix), self.fontname, self.adv,
                 self.get_text()))
@ -257,7 +257,7 @@ class LTChar(LTComponent, LTText):
        """Returns True if two characters can coexist in the same line."""
        return True
-    
+
 ##  LTContainer
 ##
 class LTContainer(LTComponent):
@ -286,14 +286,14 @@ class LTContainer(LTComponent):
        for obj in self._objs:
            obj.analyze(laparams)
        return
-    
+
 ##  LTExpandableContainer
 ##
 class LTExpandableContainer(LTContainer):
    def __init__(self):
-        LTContainer.__init__(self, (+INF,+INF,-INF,-INF))
+        LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
        return
    def add(self, obj):
@ -313,8 +313,8 @@ class LTTextContainer(LTExpandableContainer, LTText):
        return
    def get_text(self):
-        return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) )
+        return ''.join(obj.get_text() for obj in self if isinstance(obj, LTText))
-    
+
 ##  LTTextLine
 ##
@ -338,6 +338,7 @@ class LTTextLine(LTTextContainer):
    def find_neighbors(self, plane, ratio):
        raise NotImplementedError
 class LTTextLineHorizontal(LTTextLine):
    def __init__(self, word_margin):
@ -357,12 +358,13 @@ class LTTextLineHorizontal(LTTextLine):
    def find_neighbors(self, plane, ratio):
        d = ratio*self.height
        objs = plane.find((self.x0, self.y0-d, self.x1, self.y1+d))
-        return [ obj for obj in objs
+        return [obj for obj in objs
-                 if (isinstance(obj, LTTextLineHorizontal) and
+                if (isinstance(obj, LTTextLineHorizontal) and
-                     abs(obj.height-self.height) < d and
+                    abs(obj.height-self.height) < d and
-                     (abs(obj.x0-self.x0) < d or
+                    (abs(obj.x0-self.x0) < d or
-                      abs(obj.x1-self.x1) < d)) ]
+                     abs(obj.x1-self.x1) < d))]
-    
+
 class LTTextLineVertical(LTTextLine):
    def __init__(self, word_margin):
@ -378,16 +380,16 @@ class LTTextLineVertical(LTTextLine):
        self._y0 = obj.y0
        LTTextLine.add(self, obj)
        return
-        
+
    def find_neighbors(self, plane, ratio):
        d = ratio*self.width
        objs = plane.find((self.x0-d, self.y0, self.x1+d, self.y1))
-        return [ obj for obj in objs
+        return [obj for obj in objs
-                 if (isinstance(obj, LTTextLineVertical) and
+                if (isinstance(obj, LTTextLineVertical) and
-                     abs(obj.width-self.width) < d and
+                    abs(obj.width-self.width) < d and
-                     (abs(obj.y0-self.y0) < d or
+                    (abs(obj.y0-self.y0) < d or
-                      abs(obj.y1-self.y1) < d)) ]                     
+                     abs(obj.y1-self.y1) < d))]
-    
+
 ##  LTTextBox
 ##
@ -406,8 +408,9 @@ class LTTextBox(LTTextContainer):
                (self.__class__.__name__,
                 self.index, bbox2str(self.bbox), self.get_text()))
 class LTTextBoxHorizontal(LTTextBox):
-    
+
    def analyze(self, laparams):
        LTTextBox.analyze(self, laparams)
        self._objs = csort(self._objs, key=lambda obj: -obj.y1)
@ -416,6 +419,7 @@ class LTTextBoxHorizontal(LTTextBox):
    def get_writing_mode(self):
        return 'lr-tb'
 class LTTextBoxVertical(LTTextBox):
    def analyze(self, laparams):
@ -436,8 +440,9 @@ class LTTextGroup(LTTextContainer):
        self.extend(objs)
        return
 class LTTextGroupLRTB(LTTextGroup):
-    
+
    def analyze(self, laparams):
        LTTextGroup.analyze(self, laparams)
        # reorder the objects from top-left to bottom-right.
@ -446,14 +451,15 @@ class LTTextGroupLRTB(LTTextGroup):
                           (1+laparams.boxes_flow)*(obj.y0+obj.y1))
        return
 class LTTextGroupTBRL(LTTextGroup):
-    
+
    def analyze(self, laparams):
        LTTextGroup.analyze(self, laparams)
        # reorder the objects from top-right to bottom-left.
        self._objs = csort(self._objs, key=lambda obj:
                           -(1+laparams.boxes_flow)*(obj.x0+obj.x1)
-                           -(1-laparams.boxes_flow)*(obj.y1))
+                           - (1-laparams.boxes_flow)*(obj.y1))
        return
@ -465,14 +471,14 @@ class LTLayoutContainer(LTContainer):
        LTContainer.__init__(self, bbox)
        self.groups = None
        return
-        
+
    def get_textlines(self, laparams, objs):
        obj0 = None
        line = None
        for obj1 in objs:
            if obj0 is not None:
                k = 0
-                if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and 
+                if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and
                    min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1) and
                    obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin):
                    # obj0 and obj1 is horizontally aligned:
@ -487,7 +493,7 @@ class LTLayoutContainer(LTContainer):
                    #        (char_margin)
                    k |= 1
                if (laparams.detect_vertical and
-                    obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and 
+                    obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
                    min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
                    obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
                    # obj0 and obj1 is vertically aligned:
@ -505,8 +511,8 @@ class LTLayoutContainer(LTContainer):
                    #     |<-->|
                    #   (line_overlap)
                    k |= 2
-                if ( (k & 1 and isinstance(line, LTTextLineHorizontal)) or
+                if ((k & 1 and isinstance(line, LTTextLineHorizontal)) or
-                     (k & 2 and isinstance(line, LTTextLineVertical)) ):
+                    (k & 2 and isinstance(line, LTTextLineVertical))):
                    line.add(obj1)
                elif line is not None:
                    yield line
@ -554,7 +560,8 @@ class LTLayoutContainer(LTContainer):
        done = set()
        for line in lines:
            box = boxes[line]
-            if box in done: continue
+            if box in done:
                continue
            done.add(box)
            if not box.is_empty():
                yield box
@ -562,32 +569,34 @@ class LTLayoutContainer(LTContainer):
    def group_textboxes(self, laparams, boxes):
        assert boxes
        def dist(obj1, obj2):
            """A distance function between two TextBoxes.
-            
+
            Consider the bounding rectangle for obj1 and obj2.
-            Return its area less the areas of obj1 and obj2, 
+            Return its area less the areas of obj1 and obj2,
            shown as 'www' below. This value may be negative.
-                    +------+..........+ (x1,y1)
+                    +------+..........+ (x1, y1)
                    | obj1 |wwwwwwwwww:
                    +------+www+------+
                    :wwwwwwwwww| obj2 |
-            (x0,y0) +..........+------+
+            (x0, y0) +..........+------+
            """
-            x0 = min(obj1.x0,obj2.x0)
+            x0 = min(obj1.x0, obj2.x0)
-            y0 = min(obj1.y0,obj2.y0)
+            y0 = min(obj1.y0, obj2.y0)
-            x1 = max(obj1.x1,obj2.x1)
+            x1 = max(obj1.x1, obj2.x1)
-            y1 = max(obj1.y1,obj2.y1)
+            y1 = max(obj1.y1, obj2.y1)
            return ((x1-x0)*(y1-y0) - obj1.width*obj1.height - obj2.width*obj2.height)
        def isany(obj1, obj2):
            """Check if there's any other object between obj1 and obj2.
            """
-            x0 = min(obj1.x0,obj2.x0)
+            x0 = min(obj1.x0, obj2.x0)
-            y0 = min(obj1.y0,obj2.y0)
+            y0 = min(obj1.y0, obj2.y0)
-            x1 = max(obj1.x1,obj2.x1)
+            x1 = max(obj1.x1, obj2.x1)
-            y1 = max(obj1.y1,obj2.y1)
+            y1 = max(obj1.y1, obj2.y1)
-            objs = set(plane.find((x0,y0,x1,y1)))
+            objs = set(plane.find((x0, y0, x1, y1)))
-            return objs.difference((obj1,obj2))
+            return objs.difference((obj1, obj2))
        # XXX this still takes O(n^2)  :(
        dists = []
        for i in xrange(len(boxes)):
@ -599,49 +608,50 @@ class LTLayoutContainer(LTContainer):
        plane = Plane(self.bbox)
        plane.extend(boxes)
        while dists:
-            (c,d,obj1,obj2) = dists.pop(0)
+            (c, d, obj1, obj2) = dists.pop(0)
            if c == 0 and isany(obj1, obj2):
-                dists.append((1,d,obj1,obj2))
+                dists.append((1, d, obj1, obj2))
                continue
            if (isinstance(obj1, LTTextBoxVertical) or
                isinstance(obj1, LTTextGroupTBRL) or
                isinstance(obj2, LTTextBoxVertical) or
                isinstance(obj2, LTTextGroupTBRL)):
-                group = LTTextGroupTBRL([obj1,obj2])
+                group = LTTextGroupTBRL([obj1, obj2])
            else:
-                group = LTTextGroupLRTB([obj1,obj2])
+                group = LTTextGroupLRTB([obj1, obj2])
            plane.remove(obj1)
            plane.remove(obj2)
            # this line is optimized -- don't change without profiling
-            dists = [ n for n in dists if n[2] in plane._objs and n[3] in plane._objs ]
+            dists = [n for n in dists if n[2] in plane._objs and n[3] in plane._objs]
            for other in plane:
-                dists.append((0, dist(group,other), group, other))
+                dists.append((0, dist(group, other), group, other))
            dists.sort()
            plane.add(group)
        assert len(plane) == 1
        return list(plane)
-    
+
    def analyze(self, laparams):
        # textobjs is a list of LTChar objects, i.e.
        # it has all the individual characters in the page.
        (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar), self._objs)
        for obj in otherobjs:
            obj.analyze(laparams)
-        if not textobjs: return
+        if not textobjs:
            return
        textlines = list(self.get_textlines(laparams, textobjs))
-        assert len(textobjs) <= sum( len(line._objs) for line in textlines )
+        assert len(textobjs) <= sum(len(line._objs) for line in textlines)
        (empties, textlines) = fsplit(lambda obj: obj.is_empty(), textlines)
        for obj in empties:
            obj.analyze(laparams)
        textboxes = list(self.get_textboxes(laparams, textlines))
-        assert len(textlines) == sum( len(box._objs) for box in textboxes )
+        assert len(textlines) == sum(len(box._objs) for box in textboxes)
        if textboxes:
            self.groups = self.group_textboxes(laparams, textboxes)
            assigner = IndexAssigner()
            for group in self.groups:
                group.analyze(laparams)
                assigner.run(group)
-            textboxes.sort(key=lambda box:box.index)
+            textboxes.sort(key=lambda box: box.index)
        self._objs = textboxes + otherobjs + empties
        return
@ -653,9 +663,9 @@ class LTFigure(LTLayoutContainer):
    def __init__(self, name, bbox, matrix):
        self.name = name
        self.matrix = matrix
-        (x,y,w,h) = bbox
+        (x, y, w, h) = bbox
-        bbox = get_bound( apply_matrix_pt(matrix, (p,q))
+        bbox = get_bound(apply_matrix_pt(matrix, (p, q))
-                          for (p,q) in ((x,y), (x+w,y), (x,y+h), (x+w,y+h)) )
+                         for (p, q) in ((x, y), (x+w, y), (x, y+h), (x+w, y+h)))
        LTLayoutContainer.__init__(self, bbox)
        return
@ -665,9 +675,10 @@ class LTFigure(LTLayoutContainer):
                 bbox2str(self.bbox), matrix2str(self.matrix)))
    def analyze(self, laparams):
-        if not laparams.all_texts: return
+        if not laparams.all_texts:
            return
        LTLayoutContainer.analyze(self, laparams)
-        return 
+        return
 ##  LTPage
--- a/pdfminer/lzw.py
+++ b/pdfminer/lzw.py
@ -34,17 +34,18 @@ class LZWDecoder(object):
                # |-----8-bits-----|
                # |-bpos-|-bits-|  |
                # |      |----r----|
-                v = (v<<bits) | ((self.buff>>(r-bits)) & ((1<<bits)-1))
+                v = (v << bits) | ((self.buff >> (r-bits)) & ((1 << bits)-1))
                self.bpos += bits
                break
            else:
                # |-----8-bits-----|
                # |-bpos-|---bits----...
                # |      |----r----|
-                v = (v<<r) | (self.buff & ((1<<r)-1))
+                v = (v << r) | (self.buff & ((1 << r)-1))
                bits -= r
                x = self.fp.read(1)
-                if not x: raise EOFError
+                if not x:
                    raise EOFError
                self.buff = ord(x)
                self.bpos = 0
        return v
@ -52,9 +53,9 @@ class LZWDecoder(object):
    def feed(self, code):
        x = ''
        if code == 256:
-            self.table = [ chr(c) for c in xrange(256) ] # 0-255
+            self.table = [chr(c) for c in xrange(256)]  # 0-255
-            self.table.append(None) # 256
+            self.table.append(None)  # 256
-            self.table.append(None) # 257
+            self.table.append(None)  # 257
            self.prevbuf = ''
            self.nbits = 9
        elif code == 257:
@ -97,6 +98,7 @@ class LZWDecoder(object):
                                     (self.nbits, code, x, self.table[258:]))
        return
 # lzwdecode
 def lzwdecode(data):
    """
--- a/pdfminer/pdfcolor.py
+++ b/pdfminer/pdfcolor.py
@ -8,6 +8,7 @@ LITERAL_DEVICE_GRAY = LIT('DeviceGray')
 LITERAL_DEVICE_RGB = LIT('DeviceRGB')
 LITERAL_DEVICE_CMYK = LIT('DeviceCMYK')
 class PDFColorSpace(object):
    def __init__(self, name, ncomponents):
@ -20,14 +21,14 @@ class PDFColorSpace(object):
 PREDEFINED_COLORSPACE = dict(
-  (name, PDFColorSpace(name,n)) for (name,n) in {
+    (name, PDFColorSpace(name, n)) for (name, n) in {
-  'CalRGB': 3,
+        'CalRGB': 3,
-  'CalGray': 1,
+        'CalGray': 1,
-  'Lab': 3,
+        'Lab': 3,
-  'DeviceRGB': 3,
+        'DeviceRGB': 3,
-  'DeviceCMYK': 4,
+        'DeviceCMYK': 4,
-  'DeviceGray': 1,
+        'DeviceGray': 1,
-  'Separation': 1,
+        'Separation': 1,
-  'Indexed': 1,
+        'Indexed': 1,
-  'Pattern': 1,
+        'Pattern': 1,
-  }.iteritems())
+    }.iteritems())
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@ -27,24 +27,31 @@ class PDFDevice(object):
    def begin_tag(self, tag, props=None):
        return
    def end_tag(self):
        return
    def do_tag(self, tag, props=None):
        return
    def begin_page(self, page, ctm):
        return
    def end_page(self, page):
        return
    def begin_figure(self, name, bbox, matrix):
        return
    def end_figure(self, name):
        return
    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
        return
    def render_image(self, name, stream):
        return
    def render_string(self, textstate, seq):
        return
@ -73,8 +80,8 @@ class PDFTextDevice(PDFDevice):
                seq, matrix, textstate.linematrix, font, fontsize,
                scaling, charspace, wordspace, rise, dxscale)
        return
-    
+
-    def render_string_horizontal(self, seq, matrix, (x,y), 
+    def render_string_horizontal(self, seq, matrix, (x, y),
                                 font, fontsize, scaling, charspace, wordspace, rise, dxscale):
        needcharspace = False
        for obj in seq:
@ -85,14 +92,14 @@ class PDFTextDevice(PDFDevice):
                for cid in font.decode(obj):
                    if needcharspace:
                        x += charspace
-                    x += self.render_char(translate_matrix(matrix, (x,y)),
+                    x += self.render_char(translate_matrix(matrix, (x, y)),
                                          font, fontsize, scaling, rise, cid)
                    if cid == 32 and wordspace:
                        x += wordspace
                    needcharspace = True
        return (x, y)
-    def render_string_vertical(self, seq, matrix, (x,y), 
+    def render_string_vertical(self, seq, matrix, (x, y),
                               font, fontsize, scaling, charspace, wordspace, rise, dxscale):
        needcharspace = False
        for obj in seq:
@ -103,7 +110,7 @@ class PDFTextDevice(PDFDevice):
                for cid in font.decode(obj):
                    if needcharspace:
                        y += charspace
-                    y += self.render_char(translate_matrix(matrix, (x,y)), 
+                    y += self.render_char(translate_matrix(matrix, (x, y)),
                                          font, fontsize, scaling, rise, cid)
                    if cid == 32 and wordspace:
                        y += wordspace
@ -131,7 +138,8 @@ class TagExtractor(PDFDevice):
        font = textstate.font
        text = ''
        for obj in seq:
-            if not isinstance(obj, str): continue
+            if not isinstance(obj, str):
                continue
            chars = font.decode(obj)
            for cid in chars:
                try:
@ -155,8 +163,8 @@ class TagExtractor(PDFDevice):
    def begin_tag(self, tag, props=None):
        s = ''
        if isinstance(props, dict):
-            s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
+            s = ''.join(' %s="%s"' % (enc(k), enc(str(v))) for (k, v)
-                         in sorted(props.iteritems()) )
+                        in sorted(props.iteritems()))
        self.outfp.write('<%s%s>' % (enc(tag.name), s))
        self._stack.append(tag)
        return
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@ -23,11 +23,24 @@ from utils import decode_text
 ##  Exceptions
 ##
-class PDFNoValidXRef(PDFSyntaxError): pass
+class PDFNoValidXRef(PDFSyntaxError):
-class PDFNoOutlines(PDFException): pass
+    pass
-class PDFDestinationNotFound(PDFException): pass
+
-class PDFEncryptionError(PDFException): pass
+
-class PDFPasswordIncorrect(PDFEncryptionError): pass
+class PDFNoOutlines(PDFException):
    pass
 class PDFDestinationNotFound(PDFException):
    pass
 class PDFEncryptionError(PDFException):
    pass
 class PDFPasswordIncorrect(PDFEncryptionError):
    pass
 # some predefined literals and keywords.
 LITERAL_OBJSTM = LIT('ObjStm')
@ -68,7 +81,8 @@ class PDFXRef(PDFBaseXRef):
        while 1:
            try:
                (pos, line) = parser.nextline()
-                if not line.strip(): continue
+                if not line.strip():
                    continue
            except PSEOF:
                raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
            if not line:
@ -92,7 +106,8 @@ class PDFXRef(PDFBaseXRef):
                if len(f) != 3:
                    raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
                (pos, genno, use) = f
-                if use != 'n': continue
+                if use != 'n':
                    continue
                self.offsets[objid] = (None, long(pos), int(genno))
        if 1 <= debug:
            print >>sys.stderr, 'xref objects:', self.offsets
@ -100,16 +115,17 @@ class PDFXRef(PDFBaseXRef):
        return
    KEYWORD_TRAILER = KWD('trailer')
    def load_trailer(self, parser):
        try:
-            (_,kwd) = parser.nexttoken()
+            (_, kwd) = parser.nexttoken()
            assert kwd is self.KEYWORD_TRAILER
-            (_,dic) = parser.nextobject()
+            (_, dic) = parser.nextobject()
        except PSEOF:
            x = parser.pop(1)
            if not x:
                raise PDFNoValidXRef('Unexpected EOF - file corrupted')
-            (_,dic) = x[0]
+            (_, dic) = x[0]
        self.trailer.update(dict_value(dic))
        return
@ -134,6 +150,7 @@ class PDFXRefFallback(PDFXRef):
        return '<PDFXRefFallback: offsets=%r>' % (self.offsets.keys())
    PDFOBJ_CUE = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
    def load(self, parser, debug=0):
        parser.seek(0)
        while 1:
@ -148,14 +165,15 @@ class PDFXRefFallback(PDFXRef):
                    print >>sys.stderr, 'trailer: %r' % self.get_trailer()
                break
            m = self.PDFOBJ_CUE.match(line)
-            if not m: continue
+            if not m:
                continue
            (objid, genno) = m.groups()
            objid = int(objid)
            genno = int(genno)
            self.offsets[objid] = (None, pos, genno)
            # expand ObjStm.
            parser.seek(pos)
-            (_,obj) = parser.nextobject()
+            (_, obj) = parser.nextobject()
            if isinstance(obj, PDFStream) and obj.get('Type') is LITERAL_OBJSTM:
                stream = stream_value(obj)
                try:
@ -168,7 +186,7 @@ class PDFXRefFallback(PDFXRef):
                objs = []
                try:
                    while 1:
-                        (_,obj) = parser1.nextobject()
+                        (_, obj) = parser1.nextobject()
                        objs.append(obj)
                except PSEOF:
                    pass
@ -193,14 +211,14 @@ class PDFXRefStream(PDFBaseXRef):
        return '<PDFXRefStream: ranges=%r>' % (self.ranges)
    def load(self, parser, debug=0):
-        (_,objid) = parser.nexttoken() # ignored
+        (_, objid) = parser.nexttoken()  # ignored
-        (_,genno) = parser.nexttoken() # ignored
+        (_, genno) = parser.nexttoken()  # ignored
-        (_,kwd) = parser.nexttoken()
+        (_, kwd) = parser.nexttoken()
-        (_,stream) = parser.nextobject()
+        (_, stream) = parser.nextobject()
        if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF:
            raise PDFNoValidXRef('Invalid PDF stream spec.')
        size = stream['Size']
-        index_array = stream.get('Index', (1,size))
+        index_array = stream.get('Index', (1, size))
        if len(index_array) % 2 != 0:
            raise PDFSyntaxError('Invalid index number')
        self.ranges.extend(choplist(2, index_array))
@ -210,22 +228,22 @@ class PDFXRefStream(PDFBaseXRef):
        self.trailer = stream.attrs
        if 1 <= debug:
            print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
-                             (', '.join(map(repr, self.ranges)),
+                                 (', '.join(map(repr, self.ranges)),
-                              self.fl1, self.fl2, self.fl3))
+                                 self.fl1, self.fl2, self.fl3))
        return
    def get_trailer(self):
        return self.trailer
    def get_objids(self):
-        for (start,nobjs) in self.ranges:
+        for (start, nobjs) in self.ranges:
            for i in xrange(nobjs):
                yield start+i
        return
    def get_pos(self, objid):
        index = 0
-        for (start,nobjs) in self.ranges:
+        for (start, nobjs) in self.ranges:
            if start <= objid and objid < start+nobjs:
                index += objid - start
            else:
@ -260,7 +278,7 @@ class PDFDocument(object):
      doc = PDFDocument(parser)
      doc.initialize(password)
      obj = doc.getobj(objid)
-    
+
    """
    debug = 0
@ -292,7 +310,8 @@ class PDFDocument(object):
            self.xrefs.append(xref)
        for xref in self.xrefs:
            trailer = xref.get_trailer()
-            if not trailer: continue
+            if not trailer:
                continue
            # If there's an encryption info, remember it.
            if 'Encrypt' in trailer:
                #assert not self.encryption
@ -316,6 +335,7 @@ class PDFDocument(object):
    #   This step is mandatory even if there's no password associated
    #   with the document.
    PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
    def initialize(self, password=''):
        if not self.encryption:
            self.is_printable = self.is_modifiable = self.is_extractable = True
@ -326,9 +346,9 @@ class PDFDocument(object):
        V = int_value(param.get('V', 0))
        if not (V == 1 or V == 2):
            raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
-        length = int_value(param.get('Length', 40)) # Key length (bits)
+        length = int_value(param.get('Length', 40))  # Key length (bits)
        O = str_value(param['O'])
-        R = int_value(param['R']) # Revision
+        R = int_value(param['R'])  # Revision
        if 5 <= R:
            raise PDFEncryptionError('Unknown revision: %r' % R)
        U = str_value(param['U'])
@ -337,11 +357,11 @@ class PDFDocument(object):
        self.is_modifiable = bool(P & 8)
        self.is_extractable = bool(P & 16)
        # Algorithm 3.2
-        password = (password+self.PASSWORD_PADDING)[:32] # 1
+        password = (password+self.PASSWORD_PADDING)[:32]  # 1
-        hash = md5.md5(password) # 2
+        hash = md5.md5(password)  # 2
-        hash.update(O) # 3
+        hash.update(O)  # 3
-        hash.update(struct.pack('<l', P)) # 4
+        hash.update(struct.pack('<l', P))  # 4
-        hash.update(docid[0]) # 5
+        hash.update(docid[0])  # 5
        if 4 <= R:
            # 6
            raise PDFNotImplementedError('Revision 4 encryption is currently unsupported')
@ -355,13 +375,13 @@ class PDFDocument(object):
            u1 = Arcfour(key).process(self.PASSWORD_PADDING)
        elif R == 3:
            # Algorithm 3.5
-            hash = md5.md5(self.PASSWORD_PADDING) # 2
+            hash = md5.md5(self.PASSWORD_PADDING)  # 2
-            hash.update(docid[0]) # 3
+            hash.update(docid[0])  # 3
-            x = Arcfour(key).process(hash.digest()[:16]) # 4
+            x = Arcfour(key).process(hash.digest()[:16])  # 4
-            for i in xrange(1,19+1):
+            for i in xrange(1, 19+1):
-                k = ''.join( chr(ord(c) ^ i) for c in key )
+                k = ''.join(chr(ord(c) ^ i) for c in key)
                x = Arcfour(k).process(x)
-            u1 = x+x # 32bytes total
+            u1 = x+x  # 32bytes total
        if R == 2:
            is_authenticated = (u1 == U)
        else:
@ -373,18 +393,18 @@ class PDFDocument(object):
        return
    def decrypt_rc4(self, objid, genno, data):
-        key = self.decrypt_key + struct.pack('<L',objid)[:3]+struct.pack('<L',genno)[:2]
+        key = self.decrypt_key + struct.pack('<L', objid)[:3]+struct.pack('<L', genno)[:2]
        hash = md5.md5(key)
-        key = hash.digest()[:min(len(key),16)]
+        key = hash.digest()[:min(len(key), 16)]
        return Arcfour(key).process(data)
    def _getobj_objstm(self, stream, index, objid):
        if stream.objid in self._parsed_objs:
-            (objs,n) = self._parsed_objs[stream.objid]
+            (objs, n) = self._parsed_objs[stream.objid]
        else:
-            (objs,n) = self._get_objects(stream)
+            (objs, n) = self._get_objects(stream)
            if self.caching:
-                self._parsed_objs[stream.objid] = (objs,n)
+                self._parsed_objs[stream.objid] = (objs, n)
        i = n*2+index
        try:
            obj = objs[i]
@ -407,25 +427,26 @@ class PDFDocument(object):
        objs = []
        try:
            while 1:
-                (_,obj) = parser.nextobject()
+                (_, obj) = parser.nextobject()
                objs.append(obj)
        except PSEOF:
            pass
        return (objs, n)
    KEYWORD_OBJ = KWD('obj')
    def _getobj_parse(self, pos, objid):
        self._parser.seek(pos)
-        (_,objid1) = self._parser.nexttoken() # objid
+        (_, objid1) = self._parser.nexttoken()  # objid
        if objid1 != objid:
            raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid))
-        (_,genno) = self._parser.nexttoken() # genno
+        (_, genno) = self._parser.nexttoken()  # genno
-        (_,kwd) = self._parser.nexttoken()
+        (_, kwd) = self._parser.nexttoken()
        if kwd is not self.KEYWORD_OBJ:
            raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
-        (_,obj) = self._parser.nextobject()
+        (_, obj) = self._parser.nextobject()
        return obj
-        
+
    # can raise PDFObjectNotFound
    def getobj(self, objid):
        assert objid != 0
@ -465,6 +486,7 @@ class PDFDocument(object):
    def get_outlines(self):
        if 'Outlines' not in self.catalog:
            raise PDFNoOutlines
        def search(entry, level):
            entry = dict_value(entry)
            if 'Title' in entry:
@ -487,13 +509,15 @@ class PDFDocument(object):
        try:
            names = dict_value(self.catalog['Names'])
        except (PDFTypeError, KeyError):
-            raise KeyError((cat,key))
+            raise KeyError((cat, key))
        # may raise KeyError
        d0 = dict_value(names[cat])
        def lookup(d):
            if 'Limits' in d:
-                (k1,k2) = list_value(d['Limits'])
+                (k1, k2) = list_value(d['Limits'])
-                if key < k1 or k2 < key: return None
+                if key < k1 or k2 < key:
                    return None
            if 'Names' in d:
                objs = list_value(d['Names'])
                names = dict(choplist(2, objs))
@ -501,8 +525,9 @@ class PDFDocument(object):
            if 'Kids' in d:
                for c in list_value(d['Kids']):
                    v = lookup(dict_value(c))
-                    if v: return v
+                    if v:
-            raise KeyError((cat,key))
+                        return v
            raise KeyError((cat, key))
        return lookup(d0)
    def get_dest(self, name):
@ -528,7 +553,8 @@ class PDFDocument(object):
            line = line.strip()
            if 2 <= self.debug:
                print >>sys.stderr, 'find_xref: %r' % line
-            if line == 'startxref': break
+            if line == 'startxref':
                break
            if line:
                prev = line
        else:
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@ -25,13 +25,13 @@ def get_widths(seq):
        if isinstance(v, list):
            if r:
                char1 = r[-1]
-                for (i,w) in enumerate(v):
+                for (i, w) in enumerate(v):
                    widths[char1+i] = w
                r = []
        elif isinstance(v, int):
            r.append(v)
            if len(r) == 3:
-                (char1,char2,w) = r
+                (char1, char2, w) = r
                for i in xrange(char1, char2+1):
                    widths[i] = w
                r = []
@ -40,6 +40,7 @@ def get_widths(seq):
 #assert get_widths([1,2,3]) == {1:3, 2:3}
 #assert get_widths([1,[2,3],6,[7,8]]) == {1:2,2:3, 6:7,7:8}
 def get_widths2(seq):
    widths = {}
    r = []
@ -47,20 +48,20 @@ def get_widths2(seq):
        if isinstance(v, list):
            if r:
                char1 = r[-1]
-                for (i,(w,vx,vy)) in enumerate(choplist(3,v)):
+                for (i, (w, vx, vy)) in enumerate(choplist(3, v)):
-                    widths[char1+i] = (w,(vx,vy))
+                    widths[char1+i] = (w, (vx, vy))
                r = []
        elif isinstance(v, int):
            r.append(v)
            if len(r) == 5:
-                (char1,char2,w,vx,vy) = r
+                (char1, char2, w, vx, vy) = r
                for i in xrange(char1, char2+1):
-                    widths[i] = (w,(vx,vy))
+                    widths[i] = (w, (vx, vy))
                r = []
    return widths
 #assert get_widths2([1]) == {}
-#assert get_widths2([1,2,3,4,5]) == {1:(3,(4,5)), 2:(3,(4,5))}
+#assert get_widths2([1,2,3,4,5]) == {1:(3, (4,5)), 2:(3, (4,5))}
-#assert get_widths2([1,[2,3,4,5],6,[7,8,9]]) == {1:(2,(3,4)), 6:(7,(8,9))}
+#assert get_widths2([1,[2,3,4,5],6,[7,8,9]]) == {1:(2, (3,4)), 6:(7, (8,9))}
 ##  FontMetricsDB
@ -94,7 +95,7 @@ class Type1FontHeaderParser(PSStackParser):
    def get_encoding(self):
        while 1:
            try:
-                (cid,name) = self.nextobject()
+                (cid, name) = self.nextobject()
            except PSEOF:
                break
            try:
@ -102,28 +103,31 @@ class Type1FontHeaderParser(PSStackParser):
            except KeyError:
                pass
        return self._cid2unicode
-    
+
    def do_keyword(self, pos, token):
        if token is self.KEYWORD_PUT:
-            ((_,key),(_,value)) = self.pop(2)
+            ((_, key), (_, value)) = self.pop(2)
            if (isinstance(key, int) and
                isinstance(value, PSLiteral)):
                self.add_results((key, literal_name(value)))
        return
-    
+
 NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-', None, '-')
 ##  CFFFont
 ##  (Format specified in Adobe Technical Note: #5176
 ##   "The Compact Font Format Specification")
 ##
 NIBBLES = ('0','1','2','3','4','5','6','7','8','9','.','e','e-',None,'-')
 def getdict(data):
    d = {}
    fp = StringIO(data)
    stack = []
    while 1:
        c = fp.read(1)
-        if not c: break
+        if not c:
            break
        b0 = ord(c)
        if b0 <= 21:
            d[b0] = stack
@ -145,19 +149,21 @@ def getdict(data):
        else:
            b1 = ord(fp.read(1))
            if 247 <= b0 and b0 <= 250:
-                value = ((b0-247)<<8)+b1+108
+                value = ((b0-247) << 8)+b1+108
            elif 251 <= b0 and b0 <= 254:
-                value = -((b0-251)<<8)-b1-108
+                value = -((b0-251) << 8)-b1-108
            else:
                b2 = ord(fp.read(1))
-                if 128 <= b1: b1 -= 256
+                if 128 <= b1:
                    b1 -= 256
                if b0 == 28:
-                    value = b1<<8 | b2
+                    value = b1 << 8 | b2
                else:
-                    value = b1<<24 | b2<<16 | struct.unpack('>H', fp.read(2))[0]
+                    value = b1 << 24 | b2 << 16 | struct.unpack('>H', fp.read(2))[0]
        stack.append(value)
    return d
 class CFFFont(object):
    STANDARD_STRINGS = (
@ -239,7 +245,7 @@ class CFFFont(object):
      'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000',
      '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book',
      'Light', 'Medium', 'Regular', 'Roman', 'Semibold',
-      )
+    )
    class INDEX(object):
@ -264,13 +270,13 @@ class CFFFont(object):
            return self.fp.read(self.offsets[i+1]-self.offsets[i])
        def __iter__(self):
-            return iter( self[i] for i in xrange(len(self)) )
+            return iter(self[i] for i in xrange(len(self)))
    def __init__(self, name, fp):
        self.name = name
        self.fp = fp
        # Header
-        (_major,_minor,hdrsize,offsize) = struct.unpack('BBBB', self.fp.read(4))
+        (_major, _minor, hdrsize, offsize) = struct.unpack('BBBB', self.fp.read(4))
        self.fp.read(hdrsize-4)
        # Name INDEX
        self.name_index = self.INDEX(self.fp)
@ -297,7 +303,7 @@ class CFFFont(object):
        if format == '\x00':
            # Format 0
            (n,) = struct.unpack('B', self.fp.read(1))
-            for (code,gid) in enumerate(struct.unpack('B'*n, self.fp.read(n))):
+            for (code, gid) in enumerate(struct.unpack('B'*n, self.fp.read(n))):
                self.code2gid[code] = gid
                self.gid2code[gid] = code
        elif format == '\x01':
@ -305,8 +311,8 @@ class CFFFont(object):
            (n,) = struct.unpack('B', self.fp.read(1))
            code = 0
            for i in xrange(n):
-                (first,nleft) = struct.unpack('BB', self.fp.read(2))
+                (first, nleft) = struct.unpack('BB', self.fp.read(2))
-                for gid in xrange(first,first+nleft+1):
+                for gid in xrange(first, first+nleft+1):
                    self.code2gid[code] = gid
                    self.gid2code[gid] = code
                    code += 1
@ -320,7 +326,7 @@ class CFFFont(object):
        if format == '\x00':
            # Format 0
            n = self.nglyphs-1
-            for (gid,sid) in enumerate(struct.unpack('>'+'H'*n, self.fp.read(2*n))):
+            for (gid, sid) in enumerate(struct.unpack('>'+'H'*n, self.fp.read(2*n))):
                gid += 1
                name = self.getstr(sid)
                self.name2gid[name] = gid
@ -330,8 +336,8 @@ class CFFFont(object):
            (n,) = struct.unpack('B', self.fp.read(1))
            sid = 0
            for i in xrange(n):
-                (first,nleft) = struct.unpack('BB', self.fp.read(2))
+                (first, nleft) = struct.unpack('BB', self.fp.read(2))
-                for gid in xrange(first,first+nleft+1):
+                for gid in xrange(first, first+nleft+1):
                    name = self.getstr(sid)
                    self.name2gid[name] = gid
                    self.gid2name[gid] = name
@ -356,7 +362,8 @@ class CFFFont(object):
 ##
 class TrueTypeFont(object):
-    class CMapNotFound(Exception): pass
+    class CMapNotFound(Exception):
        pass
    def __init__(self, name, fp):
        self.name = name
@ -389,15 +396,16 @@ class TrueTypeFont(object):
            elif fmttype == 2:
                subheaderkeys = struct.unpack('>256H', fp.read(512))
                firstbytes = [0]*8192
-                for (i,k) in enumerate(subheaderkeys):
+                for (i, k) in enumerate(subheaderkeys):
                    firstbytes[k/8] = i
                nhdrs = max(subheaderkeys)/8 + 1
                hdrs = []
                for i in xrange(nhdrs):
-                    (firstcode,entcount,delta,offset) = struct.unpack('>HHhH', fp.read(8))
+                    (firstcode, entcount, delta, offset) = struct.unpack('>HHhH', fp.read(8))
-                    hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset))
+                    hdrs.append((i, firstcode, entcount, delta, fp.tell()-2+offset))
-                for (i,firstcode,entcount,delta,pos) in hdrs:
+                for (i, firstcode, entcount, delta, pos) in hdrs:
-                    if not entcount: continue
+                    if not entcount:
                        continue
                    first = firstcode + (firstbytes[i] << 8)
                    fp.seek(pos)
                    for c in xrange(entcount):
@ -414,7 +422,7 @@ class TrueTypeFont(object):
                idds = struct.unpack('>%dh' % segcount, fp.read(2*segcount))
                pos = fp.tell()
                idrs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
-                for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs):
+                for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs):
                    if idr:
                        fp.seek(pos+idr)
                        for c in xrange(sc, ec+1):
@ -426,16 +434,19 @@ class TrueTypeFont(object):
                assert 0
        # create unicode map
        unicode_map = FileUnicodeMap()
-        for (char,gid) in char2gid.iteritems():
+        for (char, gid) in char2gid.iteritems():
            unicode_map.add_cid2unichr(gid, char)
        return unicode_map
 ##  Fonts
 ##
 class PDFFontError(PDFException):
    pass
-class PDFFontError(PDFException): pass
+
-class PDFUnicodeNotDefined(PDFFontError): pass
+class PDFUnicodeNotDefined(PDFFontError):
    pass
 LITERAL_STANDARD_ENCODING = LIT('StandardEncoding')
 LITERAL_TYPE1C = LIT('Type1C')
@ -456,7 +467,7 @@ class PDFFont(object):
        self.italic_angle = num_value(descriptor.get('ItalicAngle', 0))
        self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0))
        self.leading = num_value(descriptor.get('Leading', 0))
-        self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
+        self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0)))
        self.hscale = self.vscale = .001
        return
@ -474,6 +485,7 @@ class PDFFont(object):
    def get_ascent(self):
        return self.ascent * self.vscale
    def get_descent(self):
        return self.descent * self.vscale
@ -482,6 +494,7 @@ class PDFFont(object):
        if w == 0:
            w = -self.default_width
        return w * self.hscale
    def get_height(self):
        h = self.bbox[3]-self.bbox[1]
        if h == 0:
@ -501,7 +514,7 @@ class PDFFont(object):
        return 0
    def string_width(self, s):
-        return sum( self.char_width(cid) for cid in self.decode(s) )
+        return sum(self.char_width(cid) for cid in self.decode(s))
 # PDFSimpleFont
@ -540,6 +553,7 @@ class PDFSimpleFont(PDFFont):
        except KeyError:
            raise PDFUnicodeNotDefined(None, cid)
 # PDFType1Font
 class PDFType1Font(PDFSimpleFont):
@ -557,7 +571,7 @@ class PDFType1Font(PDFSimpleFont):
            firstchar = int_value(spec.get('FirstChar', 0))
            lastchar = int_value(spec.get('LastChar', 255))
            widths = list_value(spec.get('Widths', [0]*256))
-            widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
+            widths = dict((i+firstchar, w) for (i, w) in enumerate(widths))
        PDFSimpleFont.__init__(self, descriptor, widths, spec)
        if 'Encoding' not in spec and 'FontFile' in descriptor:
            # try to recover the missing encoding info from the font file.
@ -571,12 +585,14 @@ class PDFType1Font(PDFSimpleFont):
    def __repr__(self):
        return '<PDFType1Font: basefont=%r>' % self.basefont
 # PDFTrueTypeFont
 class PDFTrueTypeFont(PDFType1Font):
    def __repr__(self):
        return '<PDFTrueTypeFont: basefont=%r>' % self.basefont
 # PDFType3Font
 class PDFType3Font(PDFSimpleFont):
@ -584,16 +600,16 @@ class PDFType3Font(PDFSimpleFont):
        firstchar = int_value(spec.get('FirstChar', 0))
        lastchar = int_value(spec.get('LastChar', 0))
        widths = list_value(spec.get('Widths', [0]*256))
-        widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths))
+        widths = dict((i+firstchar, w) for (i, w) in enumerate(widths))
        if 'FontDescriptor' in spec:
            descriptor = dict_value(spec['FontDescriptor'])
        else:
-            descriptor = {'Ascent':0, 'Descent':0,
+            descriptor = {'Ascent': 0, 'Descent': 0,
-                          'FontBBox':spec['FontBBox']}
+                          'FontBBox': spec['FontBBox']}
        PDFSimpleFont.__init__(self, descriptor, widths, spec)
        self.matrix = tuple(list_value(spec.get('FontMatrix')))
-        (_,self.descent,_,self.ascent) = self.bbox
+        (_, self.descent, _, self.ascent) = self.bbox
-        (self.hscale,self.vscale) = apply_matrix_norm(self.matrix, (1,1))
+        (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1))
        return
    def __repr__(self):
@ -657,10 +673,10 @@ class PDFCIDFont(PDFFont):
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get('W2', [])))
-            self.disps = dict( (cid,(vx,vy)) for (cid,(_,(vx,vy))) in widths.iteritems() )
+            self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems())
-            (vy,w) = spec.get('DW2', [880, -1000])
+            (vy, w) = spec.get('DW2', [880, -1000])
-            self.default_disp = (None,vy)
+            self.default_disp = (None, vy)
-            widths = dict( (cid,w) for (cid,(w,_)) in widths.iteritems() )
+            widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems())
            default_width = w
        else:
            # writing mode: horizontal
@ -689,7 +705,8 @@ class PDFCIDFont(PDFFont):
    def to_unichr(self, cid):
        try:
-            if not self.unicode_map: raise KeyError(cid)
+            if not self.unicode_map:
                raise KeyError(cid)
            return self.unicode_map.get_unichr(cid)
        except KeyError:
            raise PDFUnicodeNotDefined(self.cidcoding, cid)
@ -705,4 +722,5 @@ def main(argv):
        fp.close()
    return
-if __name__ == '__main__': sys.exit(main(sys.argv))
+if __name__ == '__main__':
    sys.exit(main(sys.argv))
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -26,8 +26,12 @@ from utils import mult_matrix, MATRIX_IDENTITY
 ##  Exceptions
 ##
-class PDFResourceError(PDFException): pass
+class PDFResourceError(PDFException):
-class PDFInterpreterError(PDFException): pass
+    pass
 class PDFInterpreterError(PDFException):
    pass
 ##  Constants
@ -116,12 +120,13 @@ class PDFGraphicState(object):
                (self.linewidth, self.linecap, self.linejoin,
                 self.miterlimit, self.dash, self.intent, self.flatness))
 ##  Resource Manager
 ##
 class PDFResourceManager(object):
    """Repository of shared resources.
-    
+
    ResourceManager facilitates reuse of shared resources
    such as fonts and images so that large objects are not
    allocated multiple times.
@ -148,7 +153,8 @@ class PDFResourceManager(object):
        try:
            return CMapDB.get_cmap(cmapname)
        except CMapDB.CMapNotFound:
-            if strict: raise
+            if strict:
                raise
            return CMap()
    def get_font(self, objid, spec):
@ -191,7 +197,7 @@ class PDFResourceManager(object):
            else:
                if STRICT:
                    raise PDFFontError('Invalid Font spec: %r' % spec)
-                font = PDFType1Font(self, spec) # this is so wrong!
+                font = PDFType1Font(self, spec)  # this is so wrong!
            if objid and self.caching:
                self._cached_fonts[objid] = font
        return font
@ -223,12 +229,14 @@ class PDFContentParser(PSStackParser):
        return
    def fillbuf(self):
-        if self.charpos < len(self.buf): return
+        if self.charpos < len(self.buf):
            return
        while 1:
            self.fillfp()
            self.bufpos = self.fp.tell()
            self.buf = self.fp.read(self.BUFSIZ)
-            if self.buf: break
+            if self.buf:
                break
            self.fp = None
        self.charpos = 0
        return
@ -259,7 +267,7 @@ class PDFContentParser(PSStackParser):
                except ValueError:
                    data += self.buf[self.charpos:]
                    self.charpos = len(self.buf)
-        data = data[:-(len(target)+1)] # strip the last part
+        data = data[:-(len(target)+1)]  # strip the last part
        data = re.sub(r'(\x0d\x0a|[\x0d\x0a])$', '', data)
        return (pos, data)
@ -270,6 +278,7 @@ class PDFContentParser(PSStackParser):
    KEYWORD_BI = KWD('BI')
    KEYWORD_ID = KWD('ID')
    KEYWORD_EI = KWD('EI')
    def do_keyword(self, pos, token):
        if token is self.KEYWORD_BI:
            # inline image within a content stream
@ -279,13 +288,14 @@ class PDFContentParser(PSStackParser):
                (_, objs) = self.end_type('inline')
                if len(objs) % 2 != 0:
                    raise PSTypeError('Invalid dictionary construct: %r' % objs)
-                d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
+                d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
                (pos, data) = self.get_inline_data(pos+len('ID '))
                obj = PDFStream(d, data)
                self.push((pos, obj))
                self.push((pos, self.KEYWORD_EI))
            except PSTypeError:
-                if STRICT: raise
+                if STRICT:
                    raise
        else:
            self.push((pos, token))
        return
@ -312,7 +322,9 @@ class PDFPageInterpreter(object):
        self.fontmap = {}
        self.xobjmap = {}
        self.csmap = PREDEFINED_COLORSPACE.copy()
-        if not resources: return
+        if not resources:
            return
        def get_colorspace(spec):
            if isinstance(spec, list):
                name = literal_name(spec[0])
@ -324,23 +336,23 @@ class PDFPageInterpreter(object):
                return PDFColorSpace(name, len(list_value(spec[1])))
            else:
                return PREDEFINED_COLORSPACE.get(name)
-        for (k,v) in dict_value(resources).iteritems():
+        for (k, v) in dict_value(resources).iteritems():
            if 2 <= self.debug:
-                print >>sys.stderr, 'Resource: %r: %r' % (k,v)
+                print >>sys.stderr, 'Resource: %r: %r' % (k, v)
            if k == 'Font':
-                for (fontid,spec) in dict_value(v).iteritems():
+                for (fontid, spec) in dict_value(v).iteritems():
                    objid = None
                    if isinstance(spec, PDFObjRef):
                        objid = spec.objid
                    spec = dict_value(spec)
                    self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
            elif k == 'ColorSpace':
-                for (csid,spec) in dict_value(v).iteritems():
+                for (csid, spec) in dict_value(v).iteritems():
                    self.csmap[csid] = get_colorspace(resolve1(spec))
            elif k == 'ProcSet':
                self.rsrcmgr.get_procset(list_value(v))
            elif k == 'XObject':
-                for (xobjid,xobjstrm) in dict_value(v).iteritems():
+                for (xobjid, xobjstrm) in dict_value(v).iteritems():
                    self.xobjmap[xobjid] = xobjstrm
        return
@ -367,7 +379,8 @@ class PDFPageInterpreter(object):
        return
    def pop(self, n):
-        if n == 0: return []
+        if n == 0:
            return []
        x = self.argstack[-n:]
        self.argstack = self.argstack[:-n]
        return x
@ -384,6 +397,7 @@ class PDFPageInterpreter(object):
    def do_q(self):
        self.gstack.append(self.get_current_state())
        return
    # grestore
    def do_Q(self):
        if self.gstack:
@ -392,7 +406,7 @@ class PDFPageInterpreter(object):
    # concat-matrix
    def do_cm(self, a1, b1, c1, d1, e1, f1):
-        self.ctm = mult_matrix((a1,b1,c1,d1,e1,f1), self.ctm)
+        self.ctm = mult_matrix((a1, b1, c1, d1, e1, f1), self.ctm)
        self.device.set_ctm(self.ctm)
        return
@ -400,30 +414,37 @@ class PDFPageInterpreter(object):
    def do_w(self, linewidth):
        self.graphicstate.linewidth = linewidth
        return
    # setlinecap
    def do_J(self, linecap):
        self.graphicstate.linecap = linecap
        return
    # setlinejoin
    def do_j(self, linejoin):
        self.graphicstate.linejoin = linejoin
        return
    # setmiterlimit
    def do_M(self, miterlimit):
        self.graphicstate.miterlimit = miterlimit
        return
    # setdash
    def do_d(self, dash, phase):
        self.graphicstate.dash = (dash, phase)
        return
    # setintent
    def do_ri(self, intent):
        self.graphicstate.intent = intent
        return
    # setflatness
    def do_i(self, flatness):
        self.graphicstate.flatness = flatness
        return
    # load-gstate
    def do_gs(self, name):
        #XXX
@ -431,34 +452,40 @@ class PDFPageInterpreter(object):
    # moveto
    def do_m(self, x, y):
-        self.curpath.append(('m',x,y))
+        self.curpath.append(('m', x, y))
        return
    # lineto
    def do_l(self, x, y):
-        self.curpath.append(('l',x,y))
+        self.curpath.append(('l', x, y))
        return
    # curveto
    def do_c(self, x1, y1, x2, y2, x3, y3):
-        self.curpath.append(('c',x1,y1,x2,y2,x3,y3))
+        self.curpath.append(('c', x1, y1, x2, y2, x3, y3))
        return
    # urveto
    def do_v(self, x2, y2, x3, y3):
-        self.curpath.append(('v',x2,y2,x3,y3))
+        self.curpath.append(('v', x2, y2, x3, y3))
        return
    # rveto
    def do_y(self, x1, y1, x3, y3):
-        self.curpath.append(('y',x1,y1,x3,y3))
+        self.curpath.append(('y', x1, y1, x3, y3))
        return
    # closepath
    def do_h(self):
        self.curpath.append(('h',))
        return
    # rectangle
    def do_re(self, x, y, w, h):
-        self.curpath.append(('m',x,y))
+        self.curpath.append(('m', x, y))
-        self.curpath.append(('l',x+w,y))
+        self.curpath.append(('l', x+w, y))
-        self.curpath.append(('l',x+w,y+h))
+        self.curpath.append(('l', x+w, y+h))
-        self.curpath.append(('l',x,y+h))
+        self.curpath.append(('l', x, y+h))
        self.curpath.append(('h',))
        return
@ -467,11 +494,13 @@ class PDFPageInterpreter(object):
        self.device.paint_path(self.graphicstate, True, False, False, self.curpath)
        self.curpath = []
        return
    # close-and-stroke
    def do_s(self):
        self.do_h()
        self.do_S()
        return
    # fill
    def do_f(self):
        self.device.paint_path(self.graphicstate, False, True, False, self.curpath)
@ -479,68 +508,85 @@ class PDFPageInterpreter(object):
        return
    # fill (obsolete)
    do_F = do_f
    # fill-even-odd
    def do_f_a(self):
        self.device.paint_path(self.graphicstate, False, True, True, self.curpath)
        self.curpath = []
        return
    # fill-and-stroke
    def do_B(self):
        self.device.paint_path(self.graphicstate, True, True, False, self.curpath)
        self.curpath = []
        return
    # fill-and-stroke-even-odd
    def do_B_a(self):
        self.device.paint_path(self.graphicstate, True, True, True, self.curpath)
        self.curpath = []
        return
    # close-fill-and-stroke
    def do_b(self):
        self.do_h()
        self.do_B()
        return
    # close-fill-and-stroke-even-odd
    def do_b_a(self):
        self.do_h()
        self.do_B_a()
        return
    # close-only
    def do_n(self):
        self.curpath = []
        return
    # clip
-    def do_W(self): return
+    def do_W(self):
        return
    # clip-even-odd
-    def do_W_a(self): return
+    def do_W_a(self):
        return
    # setcolorspace-stroking
    def do_CS(self, name):
        self.scs = self.csmap[literal_name(name)]
        return
    # setcolorspace-non-strokine
    def do_cs(self, name):
        self.ncs = self.csmap[literal_name(name)]
        return
    # setgray-stroking
    def do_G(self, gray):
        #self.do_CS(LITERAL_DEVICE_GRAY)
        return
    # setgray-non-stroking
    def do_g(self, gray):
        #self.do_cs(LITERAL_DEVICE_GRAY)
        return
    # setrgb-stroking
    def do_RG(self, r, g, b):
        #self.do_CS(LITERAL_DEVICE_RGB)
        return
    # setrgb-non-stroking
    def do_rg(self, r, g, b):
        #self.do_cs(LITERAL_DEVICE_RGB)
        return
    # setcmyk-stroking
    def do_K(self, c, m, y, k):
        #self.do_CS(LITERAL_DEVICE_CMYK)
        return
    # setcmyk-non-stroking
    def do_k(self, c, m, y, k):
        #self.do_cs(LITERAL_DEVICE_CMYK)
@ -556,6 +602,7 @@ class PDFPageInterpreter(object):
            n = 1
        self.pop(n)
        return
    def do_scn(self):
        if self.ncs:
            n = self.ncs.ncomponents
@ -565,42 +612,53 @@ class PDFPageInterpreter(object):
            n = 1
        self.pop(n)
        return
    def do_SC(self):
        self.do_SCN()
        return
    def do_sc(self):
        self.do_scn()
        return
    # sharing-name
-    def do_sh(self, name): return
+    def do_sh(self, name):
        return
    # begin-text
    def do_BT(self):
        self.textstate.reset()
        return
    # end-text
    def do_ET(self):
        return
    # begin-compat
-    def do_BX(self): return
+    def do_BX(self):
        return
    # end-compat
-    def do_EX(self): return
+    def do_EX(self):
        return
    # marked content operators
    def do_MP(self, tag):
        self.device.do_tag(tag)
        return
    def do_DP(self, tag, props):
        self.device.do_tag(tag, props)
        return
    def do_BMC(self, tag):
        self.device.begin_tag(tag)
        return
    def do_BDC(self, tag, props):
        self.device.begin_tag(tag, props)
        return
    def do_EMC(self):
        self.device.end_tag()
        return
@ -609,18 +667,22 @@ class PDFPageInterpreter(object):
    def do_Tc(self, space):
        self.textstate.charspace = space
        return
    # setwordspace
    def do_Tw(self, space):
        self.textstate.wordspace = space
        return
    # textscale
    def do_Tz(self, scale):
        self.textstate.scaling = scale
        return
    # setleading
    def do_TL(self, leading):
        self.textstate.leading = -leading
        return
    # selectfont
    def do_Tf(self, fontid, fontsize):
        try:
@ -631,10 +693,12 @@ class PDFPageInterpreter(object):
            self.textstate.font = self.rsrcmgr.get_font(None, {})
        self.textstate.fontsize = fontsize
        return
    # setrendering
    def do_Tr(self, render):
        self.textstate.render = render
        return
    # settextrise
    def do_Ts(self, rise):
        self.textstate.rise = rise
@ -642,49 +706,55 @@ class PDFPageInterpreter(object):
    # text-move
    def do_Td(self, tx, ty):
-        (a,b,c,d,e,f) = self.textstate.matrix
+        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f)
+        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
        self.textstate.linematrix = (0, 0)
-        #print >>sys.stderr, 'Td(%r,%r): %r' % (tx,ty,self.textstate)
+        #print >>sys.stderr, 'Td(%r,%r): %r' % (tx, ty, self.textstate)
        return
    # text-move
    def do_TD(self, tx, ty):
-        (a,b,c,d,e,f) = self.textstate.matrix
+        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a,b,c,d,tx*a+ty*c+e,tx*b+ty*d+f)
+        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
        self.textstate.leading = ty
        self.textstate.linematrix = (0, 0)
-        #print >>sys.stderr, 'TD(%r,%r): %r' % (tx,ty,self.textstate)
+        #print >>sys.stderr, 'TD(%r,%r): %r' % (tx, ty, self.textstate)
        return
    # textmatrix
-    def do_Tm(self, a,b,c,d,e,f):
+    def do_Tm(self, a, b, c, d, e, f):
-        self.textstate.matrix = (a,b,c,d,e,f)
+        self.textstate.matrix = (a, b, c, d, e, f)
        self.textstate.linematrix = (0, 0)
        return
    # nextline
    def do_T_a(self):
-        (a,b,c,d,e,f) = self.textstate.matrix
+        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a,b,c,d,self.textstate.leading*c+e,self.textstate.leading*d+f)
+        self.textstate.matrix = (a, b, c, d, self.textstate.leading*c+e, self.textstate.leading*d+f)
        self.textstate.linematrix = (0, 0)
        return
    # show-pos
    def do_TJ(self, seq):
-        #print >>sys.stderr, 'TJ(%r): %r' % (seq,self.textstate)
+        #print >>sys.stderr, 'TJ(%r): %r' % (seq, self.textstate)
        if self.textstate.font is None:
            if STRICT:
                raise PDFInterpreterError('No font specified!')
            return
        self.device.render_string(self.textstate, seq)
        return
    # show
    def do_Tj(self, s):
        self.do_TJ([s])
        return
    # quote
    def do__q(self, s):
        self.do_T_a()
        self.do_TJ([s])
        return
    # doublequote
    def do__w(self, aw, ac, s):
        self.do_Tw(aw)
@ -693,14 +763,16 @@ class PDFPageInterpreter(object):
        return
    # inline image
-    def do_BI(self): # never called
+    def do_BI(self):  # never called
        return
-    def do_ID(self): # never called
+
    def do_ID(self):  # never called
        return
    def do_EI(self, obj):
        if 'W' in obj and 'H' in obj:
            iobjid = str(id(obj))
-            self.device.begin_figure(iobjid, (0,0,1,1), MATRIX_IDENTITY)
+            self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
            self.device.render_image(iobjid, obj)
            self.device.end_figure(iobjid)
        return
@ -721,7 +793,7 @@ class PDFPageInterpreter(object):
            interpreter = self.dup()
            bbox = list_value(xobj['BBox'])
            matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
-            # According to PDF reference 1.7 section 4.9.1, XObjects in 
+            # According to PDF reference 1.7 section 4.9.1, XObjects in
            # earlier PDFs (prior to v1.2) use the page's Resources entry
            # instead of having their own Resources entry.
            resources = dict_value(xobj.get('Resources')) or self.resources.copy()
@ -729,7 +801,7 @@ class PDFPageInterpreter(object):
            interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
            self.device.end_figure(xobjid)
        elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
-            self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
+            self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
            self.device.render_image(xobjid, xobj)
            self.device.end_figure(xobjid)
        else:
@ -740,15 +812,15 @@ class PDFPageInterpreter(object):
    def process_page(self, page):
        if 1 <= self.debug:
            print >>sys.stderr, 'Processing page: %r' % page
-        (x0,y0,x1,y1) = page.mediabox
+        (x0, y0, x1, y1) = page.mediabox
        if page.rotate == 90:
-            ctm = (0,-1,1,0, -y0,x1)
+            ctm = (0, -1, 1, 0, -y0, x1)
        elif page.rotate == 180:
-            ctm = (-1,0,0,-1, x1,y1)
+            ctm = (-1, 0, 0, -1, x1, y1)
        elif page.rotate == 270:
-            ctm = (0,1,-1,0, y1,-x0)
+            ctm = (0, 1, -1, 0, y1, -x0)
        else:
-            ctm = (1,0,0,1, -x0,-y0)
+            ctm = (1, 0, 0, 1, -x0, -y0)
        self.device.begin_page(page, ctm)
        self.render_contents(page.resources, page.contents, ctm=ctm)
        self.device.end_page(page)
@ -760,7 +832,7 @@ class PDFPageInterpreter(object):
    def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
        if 1 <= self.debug:
            print >>sys.stderr, ('render_contents: resources=%r, streams=%r, ctm=%r' %
-                             (resources, streams, ctm))
+                                 (resources, streams, ctm))
        self.init_resources(resources)
        self.init_state(ctm)
        self.execute(list_value(streams))
@ -774,12 +846,12 @@ class PDFPageInterpreter(object):
            return
        while 1:
            try:
-                (_,obj) = parser.nextobject()
+                (_, obj) = parser.nextobject()
            except PSEOF:
                break
            if isinstance(obj, PSKeyword):
                name = keyword_name(obj)
-                method = 'do_%s' % name.replace('*','_a').replace('"','_w').replace("'",'_q')
+                method = 'do_%s' % name.replace('*', '_a').replace('"', '_w').replace("'", '_q')
                if hasattr(self, method):
                    func = getattr(self, method)
                    nargs = func.func_code.co_argcount-1
--- a/pdfminer/pdfpage.py
+++ b/pdfminer/pdfpage.py
@ -39,7 +39,7 @@ class PDFPage(object):
    def __init__(self, doc, pageid, attrs):
        """Initialize a page object.
-        
+
        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
@ -62,7 +62,7 @@ class PDFPage(object):
        else:
            contents = []
        if not isinstance(contents, list):
-            contents = [ contents ]
+            contents = [contents]
        self.contents = contents
        return
@ -70,6 +70,7 @@ class PDFPage(object):
        return '<PDFPage: Resources=%r, MediaBox=%r>' % (self.resources, self.mediabox)
    INHERITABLE_ATTRS = set(['Resources', 'MediaBox', 'CropBox', 'Rotate'])
    @classmethod
    def create_pages(klass, document, debug=0):
        def search(obj, parent):
@ -79,7 +80,7 @@ class PDFPage(object):
            else:
                objid = obj.objid
                tree = dict_value(obj).copy()
-            for (k,v) in parent.iteritems():
+            for (k, v) in parent.iteritems():
                if k in klass.INHERITABLE_ATTRS and k not in tree:
                    tree[k] = v
            if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
@ -94,7 +95,7 @@ class PDFPage(object):
                yield (objid, tree)
        pages = False
        if 'Pages' in document.catalog:
-            for (objid,tree) in search(document.catalog['Pages'], document.catalog):
+            for (objid, tree) in search(document.catalog['Pages'], document.catalog):
                yield klass(document, objid, tree)
                pages = True
        if not pages:
@ -109,7 +110,8 @@ class PDFPage(object):
                        pass
        return
-    class PDFTextExtractionNotAllowed(PDFEncryptionError): pass
+    class PDFTextExtractionNotAllowed(PDFEncryptionError):
        pass
    @classmethod
    def get_pages(klass, fp,
@ -126,8 +128,10 @@ class PDFPage(object):
        if check_extractable and not doc.is_extractable:
            raise klass.PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
        # Process each page contained in the document.
-        for (pageno,page) in enumerate(klass.create_pages(doc)):
+        for (pageno, page) in enumerate(klass.create_pages(doc)):
-            if pagenos and (pageno not in pagenos): continue
+            if pagenos and (pageno not in pagenos):
                continue
            yield page
-            if maxpages and maxpages <= pageno+1: break
+            if maxpages and maxpages <= pageno+1:
                break
        return
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@ -15,7 +15,8 @@ from pdftypes import dict_value
 ##  Exceptions
 ##
-class PDFSyntaxError(PDFException): pass
+class PDFSyntaxError(PDFException):
    pass
 ##  PDFParser
@ -35,7 +36,7 @@ class PDFParser(PSStackParser):
      parser.set_document(doc)
      parser.seek(offset)
      parser.nextobject()
-    
+
    """
    def __init__(self, fp):
@ -55,12 +56,13 @@ class PDFParser(PSStackParser):
    KEYWORD_STREAM = KWD('stream')
    KEYWORD_XREF = KWD('xref')
    KEYWORD_STARTXREF = KWD('startxref')
    def do_keyword(self, pos, token):
        """Handles PDF-related keywords."""
-        
+
        if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
            self.add_results(*self.pop(1))
-        
+
        elif token is self.KEYWORD_ENDOBJ:
            self.add_results(*self.pop(4))
@ -71,7 +73,7 @@ class PDFParser(PSStackParser):
        elif token is self.KEYWORD_R:
            # reference to indirect object
            try:
-                ((_,objid), (_,genno)) = self.pop(2)
+                ((_, objid), (_, genno)) = self.pop(2)
                (objid, genno) = (int(objid), int(genno))
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
@ -80,7 +82,7 @@ class PDFParser(PSStackParser):
        elif token is self.KEYWORD_STREAM:
            # stream object
-            ((_,dic),) = self.pop(1)
+            ((_, dic),) = self.pop(1)
            dic = dict_value(dic)
            objlen = 0
            if not self.fallback:
@ -118,14 +120,14 @@ class PDFParser(PSStackParser):
            # XXX limit objlen not to exceed object boundary
            if 2 <= self.debug:
                print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
-                      (pos, objlen, dic, data[:10])
+                                    (pos, objlen, dic, data[:10])
            obj = PDFStream(dic, data, self.doc.decipher)
            self.push((pos, obj))
        else:
            # others
            self.push((pos, token))
-        
+
        return
@ -153,7 +155,7 @@ class PDFStreamParser(PDFParser):
        if token is self.KEYWORD_R:
            # reference to indirect object
            try:
-                ((_,objid), (_,genno)) = self.pop(2)
+                ((_, objid), (_, genno)) = self.pop(2)
                (objid, genno) = (int(objid), int(genno))
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@ -22,13 +22,28 @@ LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))
 ##  PDF Objects
 ##
-class PDFObject(PSObject): pass
+class PDFObject(PSObject):
    pass
-class PDFException(PSException): pass
+
-class PDFTypeError(PDFException): pass
+class PDFException(PSException):
-class PDFValueError(PDFException): pass
+    pass
-class PDFObjectNotFound(PDFException): pass
+
-class PDFNotImplementedError(PDFException): pass
+
 class PDFTypeError(PDFException):
    pass
 class PDFValueError(PDFException):
    pass
 class PDFObjectNotFound(PDFException):
    pass
 class PDFNotImplementedError(PDFException):
    pass
 ##  PDFObjRef
@ -65,33 +80,36 @@ def resolve1(x, default=None):
        x = x.resolve(default=default)
    return x
 def resolve_all(x, default=None):
    """Recursively resolves the given object and all the internals.
-    
+
    Make sure there is no indirect reference within the nested object.
    This procedure might be slow.
    """
    while isinstance(x, PDFObjRef):
        x = x.resolve(default=default)
    if isinstance(x, list):
-        x = [ resolve_all(v, default=default) for v in x ]
+        x = [resolve_all(v, default=default) for v in x]
    elif isinstance(x, dict):
-        for (k,v) in x.iteritems():
+        for (k, v) in x.iteritems():
            x[k] = resolve_all(v, default=default)
    return x
 def decipher_all(decipher, objid, genno, x):
    """Recursively deciphers the given object.
    """
    if isinstance(x, str):
        return decipher(objid, genno, x)
    if isinstance(x, list):
-        x = [ decipher_all(decipher, objid, genno, v) for v in x ]
+        x = [decipher_all(decipher, objid, genno, v) for v in x]
    elif isinstance(x, dict):
-        for (k,v) in x.iteritems():
+        for (k, v) in x.iteritems():
            x[k] = decipher_all(decipher, objid, genno, v)
    return x
 # Type cheking
 def int_value(x):
    x = resolve1(x)
@ -101,6 +119,7 @@ def int_value(x):
        return 0
    return x
 def float_value(x):
    x = resolve1(x)
    if not isinstance(x, float):
@ -109,6 +128,7 @@ def float_value(x):
        return 0.0
    return x
 def num_value(x):
    x = resolve1(x)
    if not (isinstance(x, int) or isinstance(x, float)):
@ -117,6 +137,7 @@ def num_value(x):
        return 0
    return x
 def str_value(x):
    x = resolve1(x)
    if not isinstance(x, str):
@ -125,6 +146,7 @@ def str_value(x):
        return ''
    return x
 def list_value(x):
    x = resolve1(x)
    if not (isinstance(x, list) or isinstance(x, tuple)):
@ -133,6 +155,7 @@ def list_value(x):
        return []
    return x
 def dict_value(x):
    x = resolve1(x)
    if not isinstance(x, dict):
@ -141,6 +164,7 @@ def dict_value(x):
        return {}
    return x
 def stream_value(x):
    x = resolve1(x)
    if not isinstance(x, PDFStream):
@ -179,13 +203,13 @@ class PDFStream(PDFObject):
    def __contains__(self, name):
        return name in self.attrs
-    
+
    def __getitem__(self, name):
        return self.attrs[name]
-    
+
    def get(self, name, default=None):
        return self.attrs.get(name, default)
-    
+
    def get_any(self, names, default=None):
        for name in names:
            if name in self.attrs:
@ -194,12 +218,14 @@ class PDFStream(PDFObject):
    def get_filters(self):
        filters = self.get_any(('F', 'Filter'))
-        if not filters: return []
+        if not filters:
-        if isinstance(filters, list): return filters
+            return []
-        return [ filters ]
+        if isinstance(filters, list):
            return filters
        return [filters]
    def decode(self):
-        assert self.data is None and self.rawdata != None
+        assert self.data is None and self.rawdata is not None
        data = self.rawdata
        if self.decipher:
            # Handle encryption
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@ -8,11 +8,24 @@ STRICT = 0
 ##  PS Exceptions
 ##
-class PSException(Exception): pass
+class PSException(Exception):
-class PSEOF(PSException): pass
+    pass
-class PSSyntaxError(PSException): pass
+
-class PSTypeError(PSException): pass
+
-class PSValueError(PSException): pass
+class PSEOF(PSException):
    pass
 class PSSyntaxError(PSException):
    pass
 class PSTypeError(PSException):
    pass
 class PSValueError(PSException):
    pass
 ##  Basic PostScript Types
@ -32,7 +45,7 @@ class PSObject(object):
 class PSLiteral(PSObject):
    """A class that represents a PostScript literal.
-    
+
    Postscript literals are used as identifiers, such as
    variable names, property names and dictionary keys.
    Literals are case sensitive and denoted by a preceding
@ -55,11 +68,11 @@ class PSLiteral(PSObject):
 class PSKeyword(PSObject):
    """A class that represents a PostScript keyword.
-    
+
    PostScript keywords are a dozen of predefined words.
    Commands and directives in PostScript are expressed by keywords.
    They are also used to denote the content boundaries.
-    
+
    Note: Do not create an instance of PSKeyword directly.
    Always use PSKeywordTable.intern().
    """
@ -80,7 +93,7 @@ class PSSymbolTable(object):
    Interned objects can be checked its identity with "is" operator.
    """
-    
+
    def __init__(self, klass):
        self.dict = {}
        self.klass = klass
@ -114,6 +127,7 @@ def literal_name(x):
            return str(x)
    return x.name
 def keyword_name(x):
    if not isinstance(x, PSKeyword):
        if STRICT:
@ -136,7 +150,9 @@ END_NUMBER = re.compile(r'[^0-9]')
 END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
 END_STRING = re.compile(r'[()\134]')
 OCT_STRING = re.compile(r'[0-7]')
-ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
+ESC_STRING = {'b': 8, 't': 9, 'n': 10, 'f': 12, 'r': 13, '(': 40, ')': 41, '\\': 92}
 class PSBaseParser(object):
    """Most basic PostScript parser that performs only tokenization.
@ -190,7 +206,8 @@ class PSBaseParser(object):
        return
    def fillbuf(self):
-        if self.charpos < len(self.buf): return
+        if self.charpos < len(self.buf):
            return
        # fetch next chunk.
        self.bufpos = self.fp.tell()
        self.buf = self.fp.read(self.BUFSIZ)
@ -242,7 +259,8 @@ class PSBaseParser(object):
            pos = max(0, pos-self.BUFSIZ)
            self.fp.seek(pos)
            s = self.fp.read(prevpos-pos)
-            if not s: break
+            if not s:
                break
            while 1:
                n = max(s.rfind('\r'), s.rfind('\n'))
                if n == -1:
@ -357,7 +375,7 @@ class PSBaseParser(object):
            pass
        self._parse1 = self._parse_main
        return j
-    
+
    def _parse_float(self, s, i):
        m = END_NUMBER.search(s, i)
        if not m:
@ -407,7 +425,7 @@ class PSBaseParser(object):
            return j+1
        if c == ')':
            self.paren -= 1
-            if self.paren: # WTF, they said balanced parens need no special treatment.
+            if self.paren:  # WTF, they said balanced parens need no special treatment.
                self._curtoken += c
                return j+1
        self._add_token(self._curtoken)
@ -493,17 +511,17 @@ class PSStackParser(PSBaseParser):
    def push(self, *objs):
        self.curstack.extend(objs)
        return
-    
+
    def pop(self, n):
        objs = self.curstack[-n:]
        self.curstack[-n:] = []
        return objs
-    
+
    def popall(self):
        objs = self.curstack
        self.curstack = []
        return objs
-    
+
    def add_results(self, *objs):
        if 2 <= self.debug:
            print >>sys.stderr, 'add_results: %r' % (objs,)
@ -516,11 +534,11 @@ class PSStackParser(PSBaseParser):
        if 2 <= self.debug:
            print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type)
        return
-    
+
    def end_type(self, type):
        if self.curtype != type:
            raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
-        objs = [ obj for (_,obj) in self.curstack ]
+        objs = [obj for (_, obj) in self.curstack]
        (pos, self.curtype, self.curstack) = self.context.pop()
        if 2 <= self.debug:
            print >>sys.stderr, 'end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)
@ -553,7 +571,8 @@ class PSStackParser(PSBaseParser):
                try:
                    self.push(self.end_type('a'))
                except PSTypeError:
-                    if STRICT: raise
+                    if STRICT:
                        raise
            elif token == KEYWORD_DICT_BEGIN:
                # begin dictionary
                self.start_type(pos, 'd')
@ -564,10 +583,11 @@ class PSStackParser(PSBaseParser):
                    if len(objs) % 2 != 0:
                        raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
                    # construct a Python dictionary.
-                    d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) if v is not None )
+                    d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None)
                    self.push((pos, d))
                except PSTypeError:
-                    if STRICT: raise
+                    if STRICT:
                        raise
            elif token == KEYWORD_PROC_BEGIN:
                # begin proc
                self.start_type(pos, 'p')
@ -576,7 +596,8 @@ class PSStackParser(PSBaseParser):
                try:
                    self.push(self.end_type('p'))
                except PSTypeError:
-                    if STRICT: raise
+                    if STRICT:
                        raise
            else:
                if 2 <= self.debug:
                    print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
@ -592,9 +613,11 @@ class PSStackParser(PSBaseParser):
        return obj
 import unittest
 ##  Simplistic Test cases
 ##
 import unittest
 class TestPSBaseParser(unittest.TestCase):
    TESTDATA = r'''%!PS
@ -630,7 +653,7 @@ func/a/b{(c)do*}def
      (242, KWD('def')), (246, KWD('[')), (248, 1), (250, 'z'), (254, KWD('!')),
      (256, KWD(']')), (258, KWD('<<')), (261, LIT('foo')), (266, 'bar'),
      (272, KWD('>>'))
-      ]
+    ]
    OBJS = [
      (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
@ -641,10 +664,11 @@ func/a/b{(c)do*}def
      (191, ''), (194, ' '), (199, '@@ '), (211, '\xab\xcd\x00\x124\x05'),
      (230, LIT('a')), (232, LIT('b')), (234, ['c']), (246, [1, 'z']),
      (258, {'foo': 'bar'}),
-      ]
+    ]
    def get_tokens(self, s):
        import StringIO
        class MyParser(PSBaseParser):
            def flush(self):
                self.add_results(*self.popall())
@ -659,6 +683,7 @@ func/a/b{(c)do*}def
    def get_objects(self, s):
        import StringIO
        class MyParser(PSStackParser):
            def flush(self):
                self.add_results(*self.popall())
@ -683,4 +708,5 @@ func/a/b{(c)do*}def
        self.assertEqual(objs, self.OBJS)
        return
-if __name__ == '__main__': unittest.main()
+if __name__ == '__main__':
    unittest.main()
--- a/pdfminer/rijndael.py
+++ b/pdfminer/rijndael.py
--- a/pdfminer/runlength.py
+++ b/pdfminer/runlength.py
@ -24,7 +24,7 @@ def rldecode(data):
    '1234567777777abcde'
    """
    decoded = []
-    i=0
+    i = 0
    while i < len(data):
        #print "data[%d]=:%d:" % (i,ord(data[i]))
        length = ord(data[i])
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@ -32,13 +32,13 @@ def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
                buf += chr(c)
        elif pred == '\x02':
            # PNG up
-            for (a,b) in zip(line0,line1):
+            for (a, b) in zip(line0, line1):
                c = (ord(a)+ord(b)) & 255
                buf += chr(c)
        elif pred == '\x03':
            # PNG average (UNTESTED)
            c = 0
-            for (a,b) in zip(line0,line1):
+            for (a, b) in zip(line0, line1):
                c = ((c+ord(a)+ord(b))/2) & 255
                buf += chr(c)
        else:
@ -52,21 +52,25 @@ def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
 ##
 MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
-def mult_matrix((a1,b1,c1,d1,e1,f1), (a0,b0,c0,d0,e0,f0)):
+
 def mult_matrix((a1, b1, c1, d1, e1, f1), (a0, b0, c0, d0, e0, f0)):
    """Returns the multiplication of two matrices."""
    return (a0*a1+c0*b1,    b0*a1+d0*b1,
            a0*c1+c0*d1,    b0*c1+d0*d1,
            a0*e1+c0*f1+e0, b0*e1+d0*f1+f0)
 def translate_matrix((a,b,c,d,e,f), (x,y)):
    """Translates a matrix by (x,y)."""
    return (a,b,c,d,x*a+y*c+e,x*b+y*d+f)
-def apply_matrix_pt((a,b,c,d,e,f), (x,y)):
+def translate_matrix((a, b, c, d, e, f), (x, y)):
    """Translates a matrix by (x, y)."""
    return (a, b, c, d, x*a+y*c+e, x*b+y*d+f)
 def apply_matrix_pt((a, b, c, d, e, f), (x, y)):
    """Applies a matrix to a point."""
    return (a*x+c*y+e, b*x+d*y+f)
-def apply_matrix_norm((a,b,c,d,e,f), (p,q)):
+
 def apply_matrix_norm((a, b, c, d, e, f), (p, q)):
    """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
    return (a*p+c*q, b*p+d*q)
@ -79,17 +83,20 @@ def uniq(objs):
    """Eliminates duplicated elements."""
    done = set()
    for obj in objs:
-        if obj in done: continue
+        if obj in done:
            continue
        done.add(obj)
        yield obj
    return
 # csort
-def csort(objs, key=lambda x:x):
+def csort(objs, key=lambda x: x):
    """Order-preserving sorting function."""
-    idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
+    idxs = dict((obj, i) for (i, obj) in enumerate(objs))
    return sorted(objs, key=lambda obj: (key(obj), idxs[obj]))
 # fsplit
 def fsplit(pred, objs):
    """Split a list into two classes according to the predicate."""
@ -100,7 +107,8 @@ def fsplit(pred, objs):
            t.append(obj)
        else:
            f.append(obj)
-    return (t,f)
+    return (t, f)
 # drange
 def drange(v0, v1, d):
@ -108,16 +116,18 @@ def drange(v0, v1, d):
    assert v0 < v1
    return xrange(int(v0)/d, int(v1+d)/d)
 # get_bound
 def get_bound(pts):
    """Compute a minimal rectangle that covers all the points."""
    (x0, y0, x1, y1) = (INF, INF, -INF, -INF)
-    for (x,y) in pts:
+    for (x, y) in pts:
        x0 = min(x0, x)
        y0 = min(y0, y)
        x1 = max(x1, x)
        y1 = max(y1, y)
-    return (x0,y0,x1,y1)
+    return (x0, y0, x1, y1)
 # pick
 def pick(seq, func, maxobj=None):
@ -126,9 +136,10 @@ def pick(seq, func, maxobj=None):
    for obj in seq:
        score = func(obj)
        if maxscore is None or maxscore < score:
-            (maxscore,maxobj) = (score,obj)
+            (maxscore, maxobj) = (score, obj)
    return maxobj
 # choplist
 def choplist(n, seq):
    """Groups every n elements of the list."""
@ -140,6 +151,7 @@ def choplist(n, seq):
            r = []
    return
 # nunpack
 def nunpack(s, default=0):
    """Unpacks 1 to 4 byte integers (big endian)."""
@ -157,59 +169,65 @@ def nunpack(s, default=0):
    else:
        raise TypeError('invalid length: %d' % l)
 # decode_text
-PDFDocEncoding = ''.join( unichr(x) for x in (
+PDFDocEncoding = ''.join(unichr(x) for x in (
-  0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
-  0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
-  0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0017, 0x0017,
+    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0017, 0x0017,
-  0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
+    0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
-  0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
-  0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
-  0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
-  0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
-  0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
-  0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
-  0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
-  0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+    0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
-  0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
-  0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+    0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
-  0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
-  0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000,
+    0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000,
-  0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
+    0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
-  0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
+    0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
-  0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
+    0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
-  0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000,
+    0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000,
-  0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+    0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
-  0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af,
+    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af,
-  0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
-  0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
-  0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
-  0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
-  0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
-  0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
-  0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
-  0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
-  0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
-  0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
+    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
 ))
 def decode_text(s):
    """Decodes a PDFDocEncoding string to Unicode."""
    if s.startswith('\xfe\xff'):
        return unicode(s[2:], 'utf-16be', 'ignore')
    else:
-        return ''.join( PDFDocEncoding[ord(c)] for c in s )
+        return ''.join(PDFDocEncoding[ord(c)] for c in s)
 # enc
 def enc(x, codec='ascii'):
    """Encodes a string for SGML/XML/HTML"""
-    x = x.replace('&','&amp;').replace('>','&gt;').replace('<','&lt;').replace('"','&quot;')
+    x = x.replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;').replace('"', '&quot;')
    return x.encode(codec, 'xmlcharrefreplace')
-def bbox2str((x0,y0,x1,y1)):
+
 def bbox2str((x0, y0, x1, y1)):
    return '%.3f,%.3f,%.3f,%.3f' % (x0, y0, x1, y1)
-def matrix2str((a,b,c,d,e,f)):
+
-    return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a,b,c,d,e,f)
+def matrix2str((a, b, c, d, e, f)):
    return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a, b, c, d, e, f)
 ##  Plane
@ -240,14 +258,14 @@ class Plane(object):
    def __contains__(self, obj):
        return obj in self._objs
-    def _getrange(self, (x0,y0,x1,y1)):
+    def _getrange(self, (x0, y0, x1, y1)):
        x0 = max(self.x0, x0)
        y0 = max(self.y0, y0)
        x1 = min(self.x1, x1)
        y1 = min(self.y1, y1)
        for y in drange(y0, y1, self.gridsize):
            for x in drange(x0, x1, self.gridsize):
-                yield (x,y)
+                yield (x, y)
        return
    # extend(objs)
@ -255,7 +273,7 @@ class Plane(object):
        for obj in objs:
            self.add(obj)
        return
-    
+
    # add(obj): place an object.
    def add(self, obj):
        for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
@ -279,14 +297,17 @@ class Plane(object):
        return
    # find(): finds objects that are in a certain area.
-    def find(self, (x0,y0,x1,y1)):
+    def find(self, (x0, y0, x1, y1)):
        done = set()
-        for k in self._getrange((x0,y0,x1,y1)):
+        for k in self._getrange((x0, y0, x1, y1)):
-            if k not in self._grid: continue
+            if k not in self._grid:
                continue
            for obj in self._grid[k]:
-                if obj in done: continue
+                if obj in done:
                    continue
                done.add(obj)
                if (obj.x1 <= x0 or x1 <= obj.x0 or
-                    obj.y1 <= y0 or y1 <= obj.y0): continue
+                    obj.y1 <= y0 or y1 <= obj.y0):
                    continue
                yield obj
        return
--- a/setup.py
+++ b/setup.py
@ -7,9 +7,9 @@ setup(
    version=__version__,
    description='PDF parser and analyzer',
    long_description='''PDFMiner is a tool for extracting information from PDF documents.
-Unlike other PDF-related tools, it focuses entirely on getting 
+Unlike other PDF-related tools, it focuses entirely on getting
 and analyzing text data. PDFMiner allows to obtain
-the exact location of texts in a page, as well as 
+the exact location of texts in a page, as well as
 other information such as fonts or lines.
 It includes a PDF converter that can transform PDF files
 into other text formats (such as HTML). It has an extensible
--- a/tools/conv_cmap.py
+++ b/tools/conv_cmap.py
@ -50,7 +50,7 @@ class CMapConverter(object):
                assert values[0] == 'CID'
                encs = values
                continue
-            
+
            def put(dmap, code, cid, force=False):
                for b in code[:-1]:
                    b = ord(b)
@ -64,7 +64,7 @@ class CMapConverter(object):
                if force or ((b not in dmap) or dmap[b] == cid):
                    dmap[b] = cid
                return
-            
+
            def add(unimap, enc, code):
                try:
                    codec = self.enc2codec[enc]
@ -78,20 +78,20 @@ class CMapConverter(object):
                except UnicodeError:
                    pass
                return
-                
+
            def pick(unimap):
                chars = unimap.items()
                chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True)
                (c,_) = chars[0]
                return c
-                
+
            cid = int(values[0])
            unimap_h = {}
            unimap_v = {}
            for (enc,value) in zip(encs, values):
                if enc == 'CID': continue
                if value == '*': continue
-                
+
                # hcodes, vcodes: encoded bytes for each writing mode.
                hcodes = []
                vcodes = []
@ -121,7 +121,7 @@ class CMapConverter(object):
                    for code in hcodes:
                        put(hmap, code, cid)
                        put(vmap, code, cid)
-            
+
            # Determine the "most popular" candidate.
            if unimap_h:
                self.cid2unichr_h[cid] = pick(unimap_h)
@ -137,7 +137,7 @@ class CMapConverter(object):
        )
        fp.write(pickle.dumps(data))
        return
-        
+
    def dump_unicodemap(self, fp):
        data = dict(
            CID2UNICHR_H=self.cid2unichr_h,
@ -151,7 +151,7 @@ def main(argv):
    import getopt
    import gzip
    import os.path
-    
+
    def usage():
        print 'usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]' % argv[0]
        return 100
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@ -25,7 +25,7 @@ def dumpxml(out, obj, codec=None):
    if obj is None:
        out.write('<null />')
        return
-    
+
    if isinstance(obj, dict):
        out.write('<dict size="%d">\n' % len(obj))
        for (k,v) in obj.iteritems():
@ -179,7 +179,7 @@ def extractembedded(outfp, fname, objids, pagenos, password='',
        out.write(fileobj.get_data())
        out.close()
        return
-    
+
    fp = file(fname, 'rb')
    parser = PDFParser(fp)
    doc = PDFDocument(parser)
--- a/tools/latin2ascii.py
+++ b/tools/latin2ascii.py
@ -14,7 +14,7 @@ This is an in-house mapping table for some Latin-1 characters
 LATIN2ASCII = {
  #0x00a0: '',
  #0x00a7: '',
-  
+
  # iso-8859-1
  0x00c0: 'A`',
  0x00c1: "A'",
--- a/tools/pdf2html.cgi
+++ b/tools/pdf2html.cgi
@ -159,7 +159,7 @@ class WebApp(object):
    def convert(self):
        self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
-        if (self.method != 'POST' or 
+        if (self.method != 'POST' or
            'c' not in self.form or
            'f' not in self.form):
            self.response_200()