diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py index 4244f11..a2f2971 100644 --- a/pdfminer/__init__.py +++ b/pdfminer/__init__.py @@ -1,4 +1,5 @@ #!/usr/bin/env python __version__ = '20131022' -if __name__ == '__main__': print __version__ +if __name__ == '__main__': + print __version__ diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py index 0675ee9..7adcf60 100644 --- a/pdfminer/arcfour.py +++ b/pdfminer/arcfour.py @@ -6,6 +6,7 @@ This code is in the public domain. """ + ## Arcfour ## class Arcfour(object): diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py index a78d2bf..80df0ba 100644 --- a/pdfminer/ascii85.py +++ b/pdfminer/ascii85.py @@ -9,6 +9,7 @@ This code is in the public domain. import re import struct + # ascii85decode(data) def ascii85decode(data): """ @@ -16,13 +17,13 @@ def ascii85decode(data): letters, using 85 different types of characters (as 256**4 < 85**5). When the length of the original bytes is not a multiple of 4, a special rule is used for round up. - + The Adobe's ASCII85 implementation is slightly different from its original in handling the last characters. - + The sample string is taken from: http://en.wikipedia.org/w/index.php?title=Ascii85 - + >>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q') 'Man is distinguished' >>> ascii85decode('E,9)oF*2M7/c~>') @@ -35,7 +36,7 @@ def ascii85decode(data): n += 1 b = b*85+(ord(c)-33) if n == 5: - out += struct.pack('>L',b) + out += struct.pack('>L', b) n = b = 0 elif c == 'z': assert n == 0 @@ -44,13 +45,15 @@ def ascii85decode(data): if n: for _ in range(5-n): b = b*85+84 - out += struct.pack('>L',b)[:n-1] + out += struct.pack('>L', b)[:n-1] break return out # asciihexdecode(data) hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE) trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE) + + def asciihexdecode(data): """ ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1 @@ -60,7 +63,7 @@ def asciihexdecode(data): EOD. Any other characters will cause an error. If the filter encounters the EOD marker after reading an odd number of hexadecimal digits, it will behave as if a 0 followed the last digit. - + >>> asciihexdecode('61 62 2e6364 65') 'ab.cde' >>> asciihexdecode('61 62 2e6364 657>') diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py index 9ce08b2..5c7f695 100644 --- a/pdfminer/ccitt.py +++ b/pdfminer/ccitt.py @@ -29,7 +29,7 @@ class BitParser(object): for i in xrange(len(bits)): if 0 < i: if p[b] is None: - p[b] = [None,None] + p[b] = [None, None] p = p[b] if bits[i] == '1': b = 1 @@ -41,7 +41,7 @@ class BitParser(object): def feedbytes(self, data): for c in data: b = ord(c) - for m in (128,64,32,16,8,4,2,1): + for m in (128, 64, 32, 16, 8, 4, 2, 1): self._parse_bit(b & m) return @@ -62,7 +62,7 @@ class BitParser(object): ## class CCITTG4Parser(BitParser): - MODE = [None,None] + MODE = [None, None] BitParser.add(MODE, 0, '1') BitParser.add(MODE, +1, '011') BitParser.add(MODE, -1, '010') @@ -82,7 +82,7 @@ class CCITTG4Parser(BitParser): BitParser.add(MODE, 'x7', '0000001110') BitParser.add(MODE, 'e', '000000000001000000000001') - WHITE = [None,None] + WHITE = [None, None] BitParser.add(WHITE, 0 , '00110101') BitParser.add(WHITE, 1 , '000111') BitParser.add(WHITE, 2 , '0111') @@ -188,7 +188,7 @@ class CCITTG4Parser(BitParser): BitParser.add(WHITE, 2496, '000000011110') BitParser.add(WHITE, 2560, '000000011111') - BLACK = [None,None] + BLACK = [None, None] BitParser.add(BLACK, 0 , '0000110111') BitParser.add(BLACK, 1 , '010') BitParser.add(BLACK, 2 , '11') @@ -294,25 +294,30 @@ class CCITTG4Parser(BitParser): BitParser.add(BLACK, 2496, '000000011110') BitParser.add(BLACK, 2560, '000000011111') - UNCOMPRESSED = [None,None] - BitParser.add(UNCOMPRESSED, '1' , '1') - BitParser.add(UNCOMPRESSED, '01' , '01') - BitParser.add(UNCOMPRESSED, '001' , '001') - BitParser.add(UNCOMPRESSED, '0001' , '0001') - BitParser.add(UNCOMPRESSED, '00001' , '00001') - BitParser.add(UNCOMPRESSED, '00000' , '000001') - BitParser.add(UNCOMPRESSED, 'T00' , '00000011') - BitParser.add(UNCOMPRESSED, 'T10' , '00000010') - BitParser.add(UNCOMPRESSED, 'T000' , '000000011') - BitParser.add(UNCOMPRESSED, 'T100' , '000000010') - BitParser.add(UNCOMPRESSED, 'T0000' , '0000000011') - BitParser.add(UNCOMPRESSED, 'T1000' , '0000000010') - BitParser.add(UNCOMPRESSED, 'T00000' , '00000000011') - BitParser.add(UNCOMPRESSED, 'T10000' , '00000000010') - - class EOFB(Exception): pass - class InvalidData(Exception): pass - class ByteSkip(Exception): pass + UNCOMPRESSED = [None, None] + BitParser.add(UNCOMPRESSED, '1', '1') + BitParser.add(UNCOMPRESSED, '01', '01') + BitParser.add(UNCOMPRESSED, '001', '001') + BitParser.add(UNCOMPRESSED, '0001', '0001') + BitParser.add(UNCOMPRESSED, '00001', '00001') + BitParser.add(UNCOMPRESSED, '00000', '000001') + BitParser.add(UNCOMPRESSED, 'T00', '00000011') + BitParser.add(UNCOMPRESSED, 'T10', '00000010') + BitParser.add(UNCOMPRESSED, 'T000', '000000011') + BitParser.add(UNCOMPRESSED, 'T100', '000000010') + BitParser.add(UNCOMPRESSED, 'T0000', '0000000011') + BitParser.add(UNCOMPRESSED, 'T1000', '0000000010') + BitParser.add(UNCOMPRESSED, 'T00000', '00000000011') + BitParser.add(UNCOMPRESSED, 'T10000', '00000000010') + + class EOFB(Exception): + pass + + class InvalidData(Exception): + pass + + class ByteSkip(Exception): + pass def __init__(self, width, bytealign=False): BitParser.__init__(self) @@ -325,7 +330,7 @@ class CCITTG4Parser(BitParser): for c in data: b = ord(c) try: - for m in (128,64,32,16,8,4,2,1): + for m in (128, 64, 32, 16, 8, 4, 2, 1): self._parse_bit(b & m) except self.ByteSkip: self._accept = self._parse_mode @@ -359,7 +364,8 @@ class CCITTG4Parser(BitParser): raise self.InvalidData(mode) def _parse_horiz1(self, n): - if n is None: raise self.InvalidData + if n is None: + raise self.InvalidData self._n1 += n if n < 64: self._n2 = 0 @@ -371,7 +377,8 @@ class CCITTG4Parser(BitParser): return self.BLACK def _parse_horiz2(self, n): - if n is None: raise self.InvalidData + if n is None: + raise self.InvalidData self._n2 += n if n < 64: self._color = 1-self._color @@ -385,9 +392,10 @@ class CCITTG4Parser(BitParser): return self.BLACK def _parse_uncompressed(self, bits): - if not bits: raise self.InvalidData + if not bits: + raise self.InvalidData if bits.startswith('T'): - self._accept = self._parse_mode + self._accept = self._parse_mode self._color = int(bits[1]) self._do_uncompressed(bits[2:]) return self.MODE @@ -396,17 +404,17 @@ class CCITTG4Parser(BitParser): return self.UNCOMPRESSED def _get_bits(self): - return ''.join( str(b) for b in self._curline[:self._curpos] ) + return ''.join(str(b) for b in self._curline[:self._curpos]) def _get_refline(self, i): if i < 0: - return '[]'+''.join( str(b) for b in self._refline ) + return '[]'+''.join(str(b) for b in self._refline) elif len(self._refline) <= i: - return ''.join( str(b) for b in self._refline )+'[]' + return ''.join(str(b) for b in self._refline)+'[]' else: - return (''.join( str(b) for b in self._refline[:i] )+ - '['+str(self._refline[i])+']'+ - ''.join( str(b) for b in self._refline[i+1:] )) + return (''.join(str(b) for b in self._refline[:i]) + + '['+str(self._refline[i])+']' + + ''.join(str(b) for b in self._refline[i+1:])) def reset(self): self._y = 0 @@ -417,16 +425,16 @@ class CCITTG4Parser(BitParser): return def output_line(self, y, bits): - print y, ''.join( str(b) for b in bits ) + print y, ''.join(str(b) for b in bits) return - + def _reset_line(self): self._refline = self._curline self._curline = array.array('b', [1]*self.width) self._curpos = -1 self._color = 1 return - + def _flush_line(self): if self.width <= self._curpos: self.output_line(self._y, self._curline) @@ -442,12 +450,13 @@ class CCITTG4Parser(BitParser): x1 = self._curpos+1 while 1: if x1 == 0: - if (self._color == 1 and - self._refline[x1] != self._color): break + if (self._color == 1 and self._refline[x1] != self._color): + break elif x1 == len(self._refline): break elif (self._refline[x1-1] == self._color and - self._refline[x1] != self._color): break + self._refline[x1] != self._color): + break x1 += 1 x1 += dx x0 = max(0, self._curpos) @@ -461,50 +470,54 @@ class CCITTG4Parser(BitParser): self._curpos = x1 self._color = 1-self._color return - + def _do_pass(self): #print '* pass: curpos=%r, color=%r' % (self._curpos, self._color) #print ' refline:', self._get_refline(self._curpos+1) x1 = self._curpos+1 while 1: if x1 == 0: - if (self._color == 1 and - self._refline[x1] != self._color): break + if (self._color == 1 and self._refline[x1] != self._color): + break elif x1 == len(self._refline): break elif (self._refline[x1-1] == self._color and - self._refline[x1] != self._color): break + self._refline[x1] != self._color): + break x1 += 1 while 1: if x1 == 0: - if (self._color == 0 and - self._refline[x1] == self._color): break + if (self._color == 0 and self._refline[x1] == self._color): + break elif x1 == len(self._refline): break elif (self._refline[x1-1] != self._color and - self._refline[x1] == self._color): break + self._refline[x1] == self._color): + break x1 += 1 for x in xrange(self._curpos, x1): self._curline[x] = self._color self._curpos = x1 return - + def _do_horizontal(self, n1, n2): #print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color) if self._curpos < 0: self._curpos = 0 x = self._curpos for _ in xrange(n1): - if len(self._curline) <= x: break + if len(self._curline) <= x: + break self._curline[x] = self._color x += 1 for _ in xrange(n2): - if len(self._curline) <= x: break + if len(self._curline) <= x: + break self._curline[x] = 1-self._color x += 1 self._curpos = x return - + def _do_uncompressed(self, bits): #print '* uncompressed(%r): curpos=%r' % (bits, self._curpos) for c in bits: @@ -513,15 +526,16 @@ class CCITTG4Parser(BitParser): self._flush_line() return +import unittest + ## Test cases ## -import unittest class TestCCITTG4Parser(unittest.TestCase): def get_parser(self, bits): parser = CCITTG4Parser(len(bits)) - parser._curline = [ int(c) for c in bits ] + parser._curline = [int(c) for c in bits] parser._reset_line() return parser @@ -656,7 +670,7 @@ class TestCCITTG4Parser(unittest.TestCase): parser._do_vertical(-1) parser._do_vertical(-1) parser._do_vertical(1) - parser._do_horizontal(1,1) + parser._do_horizontal(1, 1) self.assertEqual(parser._get_bits(), '011101') return @@ -673,23 +687,23 @@ class TestCCITTG4Parser(unittest.TestCase): ## CCITTFaxDecoder ## class CCITTFaxDecoder(CCITTG4Parser): - + def __init__(self, width, bytealign=False, reversed=False): CCITTG4Parser.__init__(self, width, bytealign=bytealign) self.reversed = reversed self._buf = '' return - + def close(self): return self._buf - + def output_line(self, y, bits): bytes = array.array('B', [0]*((len(bits)+7)/8)) if self.reversed: - bits = [ 1-b for b in bits ] - for (i,b) in enumerate(bits): + bits = [1-b for b in bits] + for (i, b) in enumerate(bits): if b: - bytes[i/8] += (128,64,32,16,8,4,2,1)[i%8] + bytes[i/8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8] self._buf += bytes.tostring() return @@ -705,35 +719,39 @@ def ccittfaxdecode(data, params): raise ValueError(K) parser.feedbytes(data) return parser.close() - - + + # test def main(argv): import pygame if not argv[1:]: return unittest.main() + class Parser(CCITTG4Parser): def __init__(self, width, bytealign=False): CCITTG4Parser.__init__(self, width, bytealign=bytealign) - self.img = pygame.Surface((self.width,1000)) + self.img = pygame.Surface((self.width, 1000)) return + def output_line(self, y, bits): - for (x,b) in enumerate(bits): + for (x, b) in enumerate(bits): if b: - self.img.set_at((x,y), (255,255,255)) + self.img.set_at((x, y), (255, 255, 255)) else: - self.img.set_at((x,y), (0,0,0)) + self.img.set_at((x, y), (0, 0, 0)) return + def close(self): pygame.image.save(self.img, 'out.bmp') return for path in argv[1:]: - fp = file(path,'rb') - (_,_,k,w,h,_) = path.split('.') + fp = file(path, 'rb') + (_, _, k, w, h, _) = path.split('.') parser = Parser(int(w)) parser.feedbytes(fp.read()) parser.close() fp.close() return -if __name__ == '__main__': sys.exit(main(sys.argv)) +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 5945cf5..b18df98 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -25,7 +25,8 @@ from encodingdb import name2unicode from utils import choplist, nunpack -class CMapError(Exception): pass +class CMapError(Exception): + pass ## CMap @@ -43,8 +44,9 @@ class CMap(object): def use_cmap(self, cmap): assert isinstance(cmap, CMap) + def copy(dst, src): - for (k,v) in src.iteritems(): + for (k, v) in src.iteritems(): if isinstance(v, dict): d = {} dst[k] = d @@ -73,14 +75,14 @@ class CMap(object): if code2cid is None: code2cid = self.code2cid code = () - for (k,v) in sorted(code2cid.iteritems()): + for (k, v) in sorted(code2cid.iteritems()): c = code+(k,) if isinstance(v, int): - out.write('code %r = cid %d\n' % (c,v)) + out.write('code %r = cid %d\n' % (c, v)) else: self.dump(out=out, code2cid=v, code=c) return - + ## IdentityCMap ## @@ -99,8 +101,7 @@ class IdentityCMap(object): return struct.unpack('>%dH' % n, code) else: return () - - + ## UnicodeMap ## @@ -118,8 +119,8 @@ class UnicodeMap(object): return self.cid2unichr[cid] def dump(self, out=sys.stdout): - for (k,v) in sorted(self.cid2unichr.iteritems()): - out.write('cid %d = unicode %r\n' % (k,v)) + for (k, v) in sorted(self.cid2unichr.iteritems()): + out.write('cid %d = unicode %r\n' % (k, v)) return @@ -152,7 +153,7 @@ class FileCMap(CMap): else: t = {} d[c] = t - d =t + d = t c = ord(code[-1]) d[c] = cid return @@ -161,7 +162,7 @@ class FileCMap(CMap): ## FileUnicodeMap ## class FileUnicodeMap(UnicodeMap): - + def __init__(self): UnicodeMap.__init__(self) self.attrs = {} @@ -204,12 +205,12 @@ class PyCMap(CMap): def is_vertical(self): return self._is_vertical - + ## PyUnicodeMap ## class PyUnicodeMap(UnicodeMap): - + def __init__(self, name, module, vertical): if vertical: cid2unichr = module.CID2UNICHR_V @@ -230,18 +231,17 @@ class CMapDB(object): debug = 0 _cmap_cache = {} _umap_cache = {} - - class CMapNotFound(CMapError): pass + + class CMapNotFound(CMapError): + pass @classmethod def _load_data(klass, name): filename = '%s.pickle.gz' % name if klass.debug: print >>sys.stderr, 'loading:', name - cmap_paths = ( - os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), - os.path.join(os.path.dirname(__file__), 'cmap'), - ) + cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), + os.path.join(os.path.dirname(__file__), 'cmap'),) for directory in cmap_paths: path = os.path.join(directory, filename) if os.path.exists(path): @@ -305,11 +305,12 @@ class CMapParser(PSStackParser): elif name == 'endcmap': self._in_cmap = False return - if not self._in_cmap: return + if not self._in_cmap: + return # if name == 'def': try: - ((_,k),(_,v)) = self.pop(2) + ((_, k), (_, v)) = self.pop(2) self.cmap.set_attr(literal_name(k), v) except PSSyntaxError: pass @@ -317,7 +318,7 @@ class CMapParser(PSStackParser): if name == 'usecmap': try: - ((_,cmapname),) = self.pop(1) + ((_, cmapname),) = self.pop(1) self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname))) except PSSyntaxError: pass @@ -336,13 +337,15 @@ class CMapParser(PSStackParser): self.popall() return if name == 'endcidrange': - objs = [ obj for (_,obj) in self.popall() ] - for (s,e,cid) in choplist(3, objs): + objs = [obj for (__, obj) in self.popall()] + for (s, e, cid) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or - not isinstance(cid, int) or len(s) != len(e)): continue + not isinstance(cid, int) or len(s) != len(e)): + continue sprefix = s[:-4] eprefix = e[:-4] - if sprefix != eprefix: continue + if sprefix != eprefix: + continue svar = s[-4:] evar = e[-4:] s1 = nunpack(svar) @@ -350,7 +353,7 @@ class CMapParser(PSStackParser): vlen = len(svar) #assert s1 <= e1 for i in xrange(e1-s1+1): - x = sprefix+struct.pack('>L',s1+i)[-vlen:] + x = sprefix+struct.pack('>L', s1+i)[-vlen:] self.cmap.add_code2cid(x, cid+i) return @@ -358,8 +361,8 @@ class CMapParser(PSStackParser): self.popall() return if name == 'endcidchar': - objs = [ obj for (_,obj) in self.popall() ] - for (cid,code) in choplist(2, objs): + objs = [obj for (__, obj) in self.popall()] + for (cid, code) in choplist(2, objs): if isinstance(code, str) and isinstance(cid, str): self.cmap.add_code2cid(code, nunpack(cid)) return @@ -368,10 +371,11 @@ class CMapParser(PSStackParser): self.popall() return if name == 'endbfrange': - objs = [ obj for (_,obj) in self.popall() ] - for (s,e,code) in choplist(3, objs): + objs = [obj for (__, obj) in self.popall()] + for (s, e, code) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or - len(s) != len(e)): continue + len(s) != len(e)): + continue s1 = nunpack(s) e1 = nunpack(e) #assert s1 <= e1 @@ -384,7 +388,7 @@ class CMapParser(PSStackParser): prefix = code[:-4] vlen = len(var) for i in xrange(e1-s1+1): - x = prefix+struct.pack('>L',base+i)[-vlen:] + x = prefix+struct.pack('>L', base+i)[-vlen:] self.cmap.add_cid2unichr(s1+i, x) return @@ -392,8 +396,8 @@ class CMapParser(PSStackParser): self.popall() return if name == 'endbfchar': - objs = [ obj for (_,obj) in self.popall() ] - for (cid,code) in choplist(2, objs): + objs = [obj for (__, obj) in self.popall()] + for (cid, code) in choplist(2, objs): if isinstance(cid, str) and isinstance(code, str): self.cmap.add_cid2unichr(nunpack(cid), code) return @@ -408,6 +412,7 @@ class CMapParser(PSStackParser): self.push((pos, token)) return + # test def main(argv): args = argv[1:] @@ -420,4 +425,5 @@ def main(argv): cmap.dump() return -if __name__ == '__main__': sys.exit(main(sys.argv)) +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 3788d5a..e0d487c 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -21,9 +21,9 @@ class PDFLayoutAnalyzer(PDFTextDevice): return def begin_page(self, page, ctm): - (x0,y0,x1,y1) = page.mediabox - (x0,y0) = apply_matrix_pt(ctm, (x0,y0)) - (x1,y1) = apply_matrix_pt(ctm, (x1,y1)) + (x0, y0, x1, y1) = page.mediabox + (x0, y0) = apply_matrix_pt(ctm, (x0, y0)) + (x1, y1) = apply_matrix_pt(ctm, (x1, y1)) mediabox = (0, 0, abs(x0-x1), abs(y0-y1)) self.cur_item = LTPage(self.pageno, mediabox) return @@ -61,26 +61,26 @@ class PDFLayoutAnalyzer(PDFTextDevice): shape = ''.join(x[0] for x in path) if shape == 'ml': # horizontal/vertical line - (_,x0,y0) = path[0] - (_,x1,y1) = path[1] - (x0,y0) = apply_matrix_pt(self.ctm, (x0,y0)) - (x1,y1) = apply_matrix_pt(self.ctm, (x1,y1)) + (_, x0, y0) = path[0] + (_, x1, y1) = path[1] + (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) + (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) if x0 == x1 or y0 == y1: - self.cur_item.add(LTLine(gstate.linewidth, (x0,y0), (x1,y1))) + self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1))) return if shape == 'mlllh': # rectangle - (_,x0,y0) = path[0] - (_,x1,y1) = path[1] - (_,x2,y2) = path[2] - (_,x3,y3) = path[3] - (x0,y0) = apply_matrix_pt(self.ctm, (x0,y0)) - (x1,y1) = apply_matrix_pt(self.ctm, (x1,y1)) - (x2,y2) = apply_matrix_pt(self.ctm, (x2,y2)) - (x3,y3) = apply_matrix_pt(self.ctm, (x3,y3)) + (_, x0, y0) = path[0] + (_, x1, y1) = path[1] + (_, x2, y2) = path[2] + (_, x3, y3) = path[3] + (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) + (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) + (x2, y2) = apply_matrix_pt(self.ctm, (x2, y2)) + (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): - self.cur_item.add(LTRect(gstate.linewidth, (x0,y0,x2,y2))) + self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2))) return # other shapes pts = [] @@ -119,7 +119,7 @@ class PDFPageAggregator(PDFLayoutAnalyzer): PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) self.result = None return - + def receive_layout(self, ltpage): self.result = ltpage return @@ -137,7 +137,7 @@ class PDFConverter(PDFLayoutAnalyzer): self.outfp = outfp self.codec = codec return - + ## TextConverter ## @@ -176,10 +176,11 @@ class TextConverter(PDFConverter): # is text. This stops all the image and drawing ouput from being # recorded and taking up RAM. def render_image(self, name, stream): - if self.imagewriter is None: return + if self.imagewriter is None: + return PDFConverter.render_image(self, name, stream) return - + def paint_path(self, gstate, stroke, fill, evenodd, path): return @@ -196,18 +197,18 @@ class HTMLConverter(PDFConverter): 'textgroup': 'red', 'curve': 'black', 'page': 'gray', - } - + } + TEXT_COLORS = { 'textbox': 'blue', 'char': 'black', - } + } - def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, + def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, scale=1, fontscale=1.0, layoutmode='normal', showpageno=True, pagemargin=50, imagewriter=None, - rect_colors={'curve':'black', 'page':'gray'}, - text_colors={'char':'black'}): + rect_colors={'curve': 'black', 'page': 'gray'}, + text_colors={'char': 'black'}): PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams) self.scale = scale self.fontscale = fontscale @@ -238,7 +239,7 @@ class HTMLConverter(PDFConverter): def write_footer(self): self.write('