PEP8: Remove trailing whitespace

pull/1/head
Matthew Duggan 2013-11-07 16:14:53 +09:00
parent 9ff6aa0463
commit c1da8b835c
22 changed files with 96 additions and 96 deletions

View File

@ -16,13 +16,13 @@ def ascii85decode(data):
letters, using 85 different types of characters (as 256**4 < 85**5).
When the length of the original bytes is not a multiple of 4, a special
rule is used for round up.
The Adobe's ASCII85 implementation is slightly different from
its original in handling the last characters.
The sample string is taken from:
http://en.wikipedia.org/w/index.php?title=Ascii85
>>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
'Man is distinguished'
>>> ascii85decode('E,9)oF*2M7/c~>')
@ -60,7 +60,7 @@ def asciihexdecode(data):
EOD. Any other characters will cause an error. If the filter encounters
the EOD marker after reading an odd number of hexadecimal digits, it
will behave as if a 0 followed the last digit.
>>> asciihexdecode('61 62 2e6364 65')
'ab.cde'
>>> asciihexdecode('61 62 2e6364 657>')

View File

@ -308,7 +308,7 @@ class CCITTG4Parser(BitParser):
BitParser.add(UNCOMPRESSED, 'T1000' , '0000000010')
BitParser.add(UNCOMPRESSED, 'T00000' , '00000000011')
BitParser.add(UNCOMPRESSED, 'T10000' , '00000000010')
class EOFB(Exception): pass
class InvalidData(Exception): pass
class ByteSkip(Exception): pass
@ -386,7 +386,7 @@ class CCITTG4Parser(BitParser):
def _parse_uncompressed(self, bits):
if not bits: raise self.InvalidData
if bits.startswith('T'):
self._accept = self._parse_mode
self._accept = self._parse_mode
self._color = int(bits[1])
self._do_uncompressed(bits[2:])
return self.MODE
@ -418,14 +418,14 @@ class CCITTG4Parser(BitParser):
def output_line(self, y, bits):
print y, ''.join( str(b) for b in bits )
return
def _reset_line(self):
self._refline = self._curline
self._curline = array.array('b', [1]*self.width)
self._curpos = -1
self._color = 1
return
def _flush_line(self):
if self.width <= self._curpos:
self.output_line(self._y, self._curline)
@ -460,7 +460,7 @@ class CCITTG4Parser(BitParser):
self._curpos = x1
self._color = 1-self._color
return
def _do_pass(self):
#print '* pass: curpos=%r, color=%r' % (self._curpos, self._color)
#print ' refline:', self._get_refline(self._curpos+1)
@ -487,7 +487,7 @@ class CCITTG4Parser(BitParser):
self._curline[x] = self._color
self._curpos = x1
return
def _do_horizontal(self, n1, n2):
#print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color)
if self._curpos < 0:
@ -503,7 +503,7 @@ class CCITTG4Parser(BitParser):
x += 1
self._curpos = x
return
def _do_uncompressed(self, bits):
#print '* uncompressed(%r): curpos=%r' % (bits, self._curpos)
for c in bits:
@ -672,16 +672,16 @@ class TestCCITTG4Parser(unittest.TestCase):
## CCITTFaxDecoder
##
class CCITTFaxDecoder(CCITTG4Parser):
def __init__(self, width, bytealign=False, reversed=False):
CCITTG4Parser.__init__(self, width, bytealign=bytealign)
self.reversed = reversed
self._buf = ''
return
def close(self):
return self._buf
def output_line(self, y, bits):
bytes = array.array('B', [0]*((len(bits)+7)/8))
if self.reversed:
@ -704,8 +704,8 @@ def ccittfaxdecode(data, params):
raise ValueError(K)
parser.feedbytes(data)
return parser.close()
# test
def main(argv):
import pygame

View File

@ -81,7 +81,7 @@ class CMap(object):
else:
self.dump(out=out, code2cid=v, code=c)
return
## IdentityCMap
##
@ -100,8 +100,8 @@ class IdentityCMap(object):
return struct.unpack('>%dH' % n, code)
else:
return ()
## UnicodeMap
##
@ -162,7 +162,7 @@ class FileCMap(CMap):
## FileUnicodeMap
##
class FileUnicodeMap(UnicodeMap):
def __init__(self):
UnicodeMap.__init__(self)
self.attrs = {}
@ -205,12 +205,12 @@ class PyCMap(CMap):
def is_vertical(self):
return self._is_vertical
## PyUnicodeMap
##
class PyUnicodeMap(UnicodeMap):
def __init__(self, name, module, vertical):
if vertical:
cid2unichr = module.CID2UNICHR_V
@ -231,7 +231,7 @@ class CMapDB(object):
debug = 0
_cmap_cache = {}
_umap_cache = {}
class CMapNotFound(CMapError): pass
@classmethod

View File

@ -119,7 +119,7 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
self.result = None
return
def receive_layout(self, ltpage):
self.result = ltpage
return
@ -137,7 +137,7 @@ class PDFConverter(PDFLayoutAnalyzer):
self.outfp = outfp
self.codec = codec
return
## TextConverter
##
@ -179,7 +179,7 @@ class TextConverter(PDFConverter):
if self.imagewriter is None: return
PDFConverter.render_image(self, name, stream)
return
def paint_path(self, gstate, stroke, fill, evenodd, path):
return
@ -197,13 +197,13 @@ class HTMLConverter(PDFConverter):
'curve': 'black',
'page': 'gray',
}
TEXT_COLORS = {
'textbox': 'blue',
'char': 'black',
}
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
pagemargin=50, imagewriter=None,
rect_colors={'curve':'black', 'page':'gray'},
@ -295,7 +295,7 @@ class HTMLConverter(PDFConverter):
self._font = self._fontstack.pop()
self.write('</div>')
return
def put_text(self, text, fontname, fontsize):
font = (fontname, fontsize)
if font != self._font:
@ -399,7 +399,7 @@ class XMLConverter(PDFConverter):
def write_footer(self):
self.outfp.write('</pages>\n')
return
def write_text(self, text):
self.outfp.write(enc(text, self.codec))
return

View File

@ -8,7 +8,7 @@ written with a proportional font.
The following data were extracted from the AFM files:
http://www.ctan.org/tex-archive/fonts/adobe/afm/
"""
### BEGIN Verbatim copy of the license part

View File

@ -70,7 +70,7 @@ class ImageWriter(object):
(width, height) = image.srcsize
if len(filters) == 1 and filters[0] in LITERALS_DCT_DECODE:
ext = '.jpg'
elif (image.bits == 1 or
elif (image.bits == 1 or
image.bits == 8 and image.colorspace in (LITERAL_DEVICE_RGB, LITERAL_DEVICE_GRAY)):
ext = '.%dx%d.bmp' % (width, height)
else:
@ -84,7 +84,7 @@ class ImageWriter(object):
from PIL import Image
from PIL import ImageChops
ifp = cStringIO.StringIO(raw_data)
i = Image.open(ifp)
i = Image.open(ifp)
i = ImageChops.invert(i)
i = i.convert('RGB')
i.save(fp, 'JPEG')

View File

@ -94,7 +94,7 @@ class LTComponent(LTItem):
def is_empty(self):
return self.width <= 0 or self.height <= 0
def is_hoverlap(self, obj):
assert isinstance(obj, LTComponent)
return obj.x0 <= self.x1 and self.x0 <= obj.x1
@ -247,7 +247,7 @@ class LTChar(LTComponent, LTText):
def __repr__(self):
return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
(self.__class__.__name__, bbox2str(self.bbox),
(self.__class__.__name__, bbox2str(self.bbox),
matrix2str(self.matrix), self.fontname, self.adv,
self.get_text()))
@ -258,7 +258,7 @@ class LTChar(LTComponent, LTText):
"""Returns True if two characters can coexist in the same line."""
return True
## LTContainer
##
class LTContainer(LTComponent):
@ -287,7 +287,7 @@ class LTContainer(LTComponent):
for obj in self._objs:
obj.analyze(laparams)
return
## LTExpandableContainer
##
@ -315,7 +315,7 @@ class LTTextContainer(LTExpandableContainer, LTText):
def get_text(self):
return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) )
## LTTextLine
##
@ -363,7 +363,7 @@ class LTTextLineHorizontal(LTTextLine):
abs(obj.height-self.height) < d and
(abs(obj.x0-self.x0) < d or
abs(obj.x1-self.x1) < d)) ]
class LTTextLineVertical(LTTextLine):
def __init__(self, word_margin):
@ -379,7 +379,7 @@ class LTTextLineVertical(LTTextLine):
self._y0 = obj.y0
LTTextLine.add(self, obj)
return
def find_neighbors(self, plane, ratio):
d = ratio*self.width
objs = plane.find((self.x0-d, self.y0, self.x1+d, self.y1))
@ -387,8 +387,8 @@ class LTTextLineVertical(LTTextLine):
if (isinstance(obj, LTTextLineVertical) and
abs(obj.width-self.width) < d and
(abs(obj.y0-self.y0) < d or
abs(obj.y1-self.y1) < d)) ]
abs(obj.y1-self.y1) < d)) ]
## LTTextBox
##
@ -408,7 +408,7 @@ class LTTextBox(LTTextContainer):
self.index, bbox2str(self.bbox), self.get_text()))
class LTTextBoxHorizontal(LTTextBox):
def analyze(self, laparams):
LTTextBox.analyze(self, laparams)
self._objs = csort(self._objs, key=lambda obj: -obj.y1)
@ -438,7 +438,7 @@ class LTTextGroup(LTTextContainer):
return
class LTTextGroupLRTB(LTTextGroup):
def analyze(self, laparams):
LTTextGroup.analyze(self, laparams)
# reorder the objects from top-left to bottom-right.
@ -448,7 +448,7 @@ class LTTextGroupLRTB(LTTextGroup):
return
class LTTextGroupTBRL(LTTextGroup):
def analyze(self, laparams):
LTTextGroup.analyze(self, laparams)
# reorder the objects from top-right to bottom-left.
@ -466,14 +466,14 @@ class LTLayoutContainer(LTContainer):
LTContainer.__init__(self, bbox)
self.groups = None
return
def get_textlines(self, laparams, objs):
obj0 = None
line = None
for obj1 in objs:
if obj0 is not None:
k = 0
if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and
if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and
min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1) and
obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin):
# obj0 and obj1 is horizontally aligned:
@ -488,7 +488,7 @@ class LTLayoutContainer(LTContainer):
# (char_margin)
k |= 1
if (laparams.detect_vertical and
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
# obj0 and obj1 is vertically aligned:
@ -565,9 +565,9 @@ class LTLayoutContainer(LTContainer):
assert boxes
def dist(obj1, obj2):
"""A distance function between two TextBoxes.
Consider the bounding rectangle for obj1 and obj2.
Return its area less the areas of obj1 and obj2,
Return its area less the areas of obj1 and obj2,
shown as 'www' below. This value may be negative.
+------+..........+ (x1,y1)
| obj1 |wwwwwwwwww:
@ -621,7 +621,7 @@ class LTLayoutContainer(LTContainer):
plane.add(group)
assert len(plane) == 1
return list(plane)
def analyze(self, laparams):
# textobjs is a list of LTChar objects, i.e.
# it has all the individual characters in the page.
@ -668,7 +668,7 @@ class LTFigure(LTLayoutContainer):
def analyze(self, laparams):
if not laparams.all_texts: return
LTLayoutContainer.analyze(self, laparams)
return
return
## LTPage

View File

@ -74,8 +74,8 @@ class PDFTextDevice(PDFDevice):
seq, matrix, textstate.linematrix, font, fontsize,
scaling, charspace, wordspace, rise, dxscale)
return
def render_string_horizontal(self, seq, matrix, (x,y),
def render_string_horizontal(self, seq, matrix, (x,y),
font, fontsize, scaling, charspace, wordspace, rise, dxscale):
needcharspace = False
for obj in seq:
@ -93,7 +93,7 @@ class PDFTextDevice(PDFDevice):
needcharspace = True
return (x, y)
def render_string_vertical(self, seq, matrix, (x,y),
def render_string_vertical(self, seq, matrix, (x,y),
font, fontsize, scaling, charspace, wordspace, rise, dxscale):
needcharspace = False
for obj in seq:
@ -104,7 +104,7 @@ class PDFTextDevice(PDFDevice):
for cid in font.decode(obj):
if needcharspace:
y += charspace
y += self.render_char(translate_matrix(matrix, (x,y)),
y += self.render_char(translate_matrix(matrix, (x,y)),
font, fontsize, scaling, rise, cid)
if cid == 32 and wordspace:
y += wordspace

View File

@ -260,7 +260,7 @@ class PDFDocument(object):
doc = PDFDocument(parser)
doc.initialize(password)
obj = doc.getobj(objid)
"""
debug = 0
@ -425,7 +425,7 @@ class PDFDocument(object):
raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
(_,obj) = self._parser.nextobject()
return obj
# can raise PDFObjectNotFound
def getobj(self, objid):
assert objid != 0

View File

@ -102,7 +102,7 @@ class Type1FontHeaderParser(PSStackParser):
except KeyError:
pass
return self._cid2unicode
def do_keyword(self, pos, token):
if token is self.KEYWORD_PUT:
((_,key),(_,value)) = self.pop(2)
@ -111,7 +111,7 @@ class Type1FontHeaderParser(PSStackParser):
self.add_results((key, literal_name(value)))
return
## CFFFont
## (Format specified in Adobe Technical Note: #5176
## "The Compact Font Format Specification")

View File

@ -125,7 +125,7 @@ class PDFGraphicState(object):
class PDFResourceManager(object):
"""Repository of shared resources.
ResourceManager facilitates reuse of shared resources
such as fonts and images so that large objects are not
allocated multiple times.
@ -725,7 +725,7 @@ class PDFPageInterpreter(object):
interpreter = self.dup()
bbox = list_value(xobj['BBox'])
matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
# According to PDF reference 1.7 section 4.9.1, XObjects in
# According to PDF reference 1.7 section 4.9.1, XObjects in
# earlier PDFs (prior to v1.2) use the page's Resources entry
# instead of having their own Resources entry.
resources = dict_value(xobj.get('Resources')) or self.resources.copy()

View File

@ -40,7 +40,7 @@ class PDFPage(object):
def __init__(self, doc, pageid, attrs):
"""Initialize a page object.
doc: a PDFDocument object.
pageid: any Python object that can uniquely identify the page.
attrs: a dictionary of page attributes.

View File

@ -35,7 +35,7 @@ class PDFParser(PSStackParser):
parser.set_document(doc)
parser.seek(offset)
parser.nextobject()
"""
def __init__(self, fp):
@ -57,10 +57,10 @@ class PDFParser(PSStackParser):
KEYWORD_STARTXREF = KWD('startxref')
def do_keyword(self, pos, token):
"""Handles PDF-related keywords."""
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
self.add_results(*self.pop(1))
elif token is self.KEYWORD_ENDOBJ:
self.add_results(*self.pop(4))
@ -125,7 +125,7 @@ class PDFParser(PSStackParser):
else:
# others
self.push((pos, token))
return

View File

@ -68,7 +68,7 @@ def resolve1(x, default=None):
def resolve_all(x, default=None):
"""Recursively resolves the given object and all the internals.
Make sure there is no indirect reference within the nested object.
This procedure might be slow.
"""
@ -180,13 +180,13 @@ class PDFStream(PDFObject):
def __contains__(self, name):
return name in self.attrs
def __getitem__(self, name):
return self.attrs[name]
def get(self, name, default=None):
return self.attrs.get(name, default)
def get_any(self, names, default=None):
for name in names:
if name in self.attrs:

View File

@ -32,7 +32,7 @@ class PSObject(object):
class PSLiteral(PSObject):
"""A class that represents a PostScript literal.
Postscript literals are used as identifiers, such as
variable names, property names and dictionary keys.
Literals are case sensitive and denoted by a preceding
@ -55,11 +55,11 @@ class PSLiteral(PSObject):
class PSKeyword(PSObject):
"""A class that represents a PostScript keyword.
PostScript keywords are a dozen of predefined words.
Commands and directives in PostScript are expressed by keywords.
They are also used to denote the content boundaries.
Note: Do not create an instance of PSKeyword directly.
Always use PSKeywordTable.intern().
"""
@ -80,7 +80,7 @@ class PSSymbolTable(object):
Interned objects can be checked its identity with "is" operator.
"""
def __init__(self, klass):
self.dict = {}
self.klass = klass
@ -357,7 +357,7 @@ class PSBaseParser(object):
pass
self._parse1 = self._parse_main
return j
def _parse_float(self, s, i):
m = END_NUMBER.search(s, i)
if not m:
@ -493,17 +493,17 @@ class PSStackParser(PSBaseParser):
def push(self, *objs):
self.curstack.extend(objs)
return
def pop(self, n):
objs = self.curstack[-n:]
self.curstack[-n:] = []
return objs
def popall(self):
objs = self.curstack
self.curstack = []
return objs
def add_results(self, *objs):
if 2 <= self.debug:
print >>sys.stderr, 'add_results: %r' % (objs,)
@ -516,7 +516,7 @@ class PSStackParser(PSBaseParser):
if 2 <= self.debug:
print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type)
return
def end_type(self, type):
if self.curtype != type:
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))

View File

@ -1060,7 +1060,7 @@ class RijndaelEncryptor(object):
>>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex')
'd8f532538289ef7d06b506a4fd5be9c9'
"""
def __init__(self, key, keybits=256):
assert len(key) == KEYLENGTH(keybits)
(self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits)

View File

@ -255,7 +255,7 @@ class Plane(object):
for obj in objs:
self.add(obj)
return
# add(obj): place an object.
def add(self, obj):
for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):

View File

@ -7,9 +7,9 @@ setup(
version=__version__,
description='PDF parser and analyzer',
long_description='''PDFMiner is a tool for extracting information from PDF documents.
Unlike other PDF-related tools, it focuses entirely on getting
Unlike other PDF-related tools, it focuses entirely on getting
and analyzing text data. PDFMiner allows to obtain
the exact location of texts in a page, as well as
the exact location of texts in a page, as well as
other information such as fonts or lines.
It includes a PDF converter that can transform PDF files
into other text formats (such as HTML). It has an extensible

View File

@ -50,7 +50,7 @@ class CMapConverter(object):
assert values[0] == 'CID'
encs = values
continue
def put(dmap, code, cid, force=False):
for b in code[:-1]:
b = ord(b)
@ -64,7 +64,7 @@ class CMapConverter(object):
if force or ((b not in dmap) or dmap[b] == cid):
dmap[b] = cid
return
def add(unimap, enc, code):
try:
codec = self.enc2codec[enc]
@ -78,20 +78,20 @@ class CMapConverter(object):
except UnicodeError:
pass
return
def pick(unimap):
chars = unimap.items()
chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True)
(c,_) = chars[0]
return c
cid = int(values[0])
unimap_h = {}
unimap_v = {}
for (enc,value) in zip(encs, values):
if enc == 'CID': continue
if value == '*': continue
# hcodes, vcodes: encoded bytes for each writing mode.
hcodes = []
vcodes = []
@ -121,7 +121,7 @@ class CMapConverter(object):
for code in hcodes:
put(hmap, code, cid)
put(vmap, code, cid)
# Determine the "most popular" candidate.
if unimap_h:
self.cid2unichr_h[cid] = pick(unimap_h)
@ -137,7 +137,7 @@ class CMapConverter(object):
)
fp.write(pickle.dumps(data))
return
def dump_unicodemap(self, fp):
data = dict(
CID2UNICHR_H=self.cid2unichr_h,
@ -151,7 +151,7 @@ def main(argv):
import getopt
import gzip
import os.path
def usage():
print 'usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]' % argv[0]
return 100

View File

@ -25,7 +25,7 @@ def dumpxml(out, obj, codec=None):
if obj is None:
out.write('<null />')
return
if isinstance(obj, dict):
out.write('<dict size="%d">\n' % len(obj))
for (k,v) in obj.iteritems():
@ -179,7 +179,7 @@ def extractembedded(outfp, fname, objids, pagenos, password='',
out.write(fileobj.get_data())
out.close()
return
fp = file(fname, 'rb')
parser = PDFParser(fp)
doc = PDFDocument(parser)

View File

@ -14,7 +14,7 @@ This is an in-house mapping table for some Latin-1 characters
LATIN2ASCII = {
#0x00a0: '',
#0x00a7: '',
# iso-8859-1
0x00c0: 'A`',
0x00c1: "A'",

View File

@ -159,7 +159,7 @@ class WebApp(object):
def convert(self):
self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
if (self.method != 'POST' or
if (self.method != 'POST' or
'c' not in self.form or
'f' not in self.form):
self.response_200()