PEP8: Remove trailing whitespace

pull/1/head
Matthew Duggan 2013-11-07 16:14:53 +09:00
parent 9ff6aa0463
commit c1da8b835c
22 changed files with 96 additions and 96 deletions

View File

@ -16,13 +16,13 @@ def ascii85decode(data):
letters, using 85 different types of characters (as 256**4 < 85**5). letters, using 85 different types of characters (as 256**4 < 85**5).
When the length of the original bytes is not a multiple of 4, a special When the length of the original bytes is not a multiple of 4, a special
rule is used for round up. rule is used for round up.
The Adobe's ASCII85 implementation is slightly different from The Adobe's ASCII85 implementation is slightly different from
its original in handling the last characters. its original in handling the last characters.
The sample string is taken from: The sample string is taken from:
http://en.wikipedia.org/w/index.php?title=Ascii85 http://en.wikipedia.org/w/index.php?title=Ascii85
>>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q') >>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
'Man is distinguished' 'Man is distinguished'
>>> ascii85decode('E,9)oF*2M7/c~>') >>> ascii85decode('E,9)oF*2M7/c~>')
@ -60,7 +60,7 @@ def asciihexdecode(data):
EOD. Any other characters will cause an error. If the filter encounters EOD. Any other characters will cause an error. If the filter encounters
the EOD marker after reading an odd number of hexadecimal digits, it the EOD marker after reading an odd number of hexadecimal digits, it
will behave as if a 0 followed the last digit. will behave as if a 0 followed the last digit.
>>> asciihexdecode('61 62 2e6364 65') >>> asciihexdecode('61 62 2e6364 65')
'ab.cde' 'ab.cde'
>>> asciihexdecode('61 62 2e6364 657>') >>> asciihexdecode('61 62 2e6364 657>')

View File

@ -308,7 +308,7 @@ class CCITTG4Parser(BitParser):
BitParser.add(UNCOMPRESSED, 'T1000' , '0000000010') BitParser.add(UNCOMPRESSED, 'T1000' , '0000000010')
BitParser.add(UNCOMPRESSED, 'T00000' , '00000000011') BitParser.add(UNCOMPRESSED, 'T00000' , '00000000011')
BitParser.add(UNCOMPRESSED, 'T10000' , '00000000010') BitParser.add(UNCOMPRESSED, 'T10000' , '00000000010')
class EOFB(Exception): pass class EOFB(Exception): pass
class InvalidData(Exception): pass class InvalidData(Exception): pass
class ByteSkip(Exception): pass class ByteSkip(Exception): pass
@ -386,7 +386,7 @@ class CCITTG4Parser(BitParser):
def _parse_uncompressed(self, bits): def _parse_uncompressed(self, bits):
if not bits: raise self.InvalidData if not bits: raise self.InvalidData
if bits.startswith('T'): if bits.startswith('T'):
self._accept = self._parse_mode self._accept = self._parse_mode
self._color = int(bits[1]) self._color = int(bits[1])
self._do_uncompressed(bits[2:]) self._do_uncompressed(bits[2:])
return self.MODE return self.MODE
@ -418,14 +418,14 @@ class CCITTG4Parser(BitParser):
def output_line(self, y, bits): def output_line(self, y, bits):
print y, ''.join( str(b) for b in bits ) print y, ''.join( str(b) for b in bits )
return return
def _reset_line(self): def _reset_line(self):
self._refline = self._curline self._refline = self._curline
self._curline = array.array('b', [1]*self.width) self._curline = array.array('b', [1]*self.width)
self._curpos = -1 self._curpos = -1
self._color = 1 self._color = 1
return return
def _flush_line(self): def _flush_line(self):
if self.width <= self._curpos: if self.width <= self._curpos:
self.output_line(self._y, self._curline) self.output_line(self._y, self._curline)
@ -460,7 +460,7 @@ class CCITTG4Parser(BitParser):
self._curpos = x1 self._curpos = x1
self._color = 1-self._color self._color = 1-self._color
return return
def _do_pass(self): def _do_pass(self):
#print '* pass: curpos=%r, color=%r' % (self._curpos, self._color) #print '* pass: curpos=%r, color=%r' % (self._curpos, self._color)
#print ' refline:', self._get_refline(self._curpos+1) #print ' refline:', self._get_refline(self._curpos+1)
@ -487,7 +487,7 @@ class CCITTG4Parser(BitParser):
self._curline[x] = self._color self._curline[x] = self._color
self._curpos = x1 self._curpos = x1
return return
def _do_horizontal(self, n1, n2): def _do_horizontal(self, n1, n2):
#print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color) #print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color)
if self._curpos < 0: if self._curpos < 0:
@ -503,7 +503,7 @@ class CCITTG4Parser(BitParser):
x += 1 x += 1
self._curpos = x self._curpos = x
return return
def _do_uncompressed(self, bits): def _do_uncompressed(self, bits):
#print '* uncompressed(%r): curpos=%r' % (bits, self._curpos) #print '* uncompressed(%r): curpos=%r' % (bits, self._curpos)
for c in bits: for c in bits:
@ -672,16 +672,16 @@ class TestCCITTG4Parser(unittest.TestCase):
## CCITTFaxDecoder ## CCITTFaxDecoder
## ##
class CCITTFaxDecoder(CCITTG4Parser): class CCITTFaxDecoder(CCITTG4Parser):
def __init__(self, width, bytealign=False, reversed=False): def __init__(self, width, bytealign=False, reversed=False):
CCITTG4Parser.__init__(self, width, bytealign=bytealign) CCITTG4Parser.__init__(self, width, bytealign=bytealign)
self.reversed = reversed self.reversed = reversed
self._buf = '' self._buf = ''
return return
def close(self): def close(self):
return self._buf return self._buf
def output_line(self, y, bits): def output_line(self, y, bits):
bytes = array.array('B', [0]*((len(bits)+7)/8)) bytes = array.array('B', [0]*((len(bits)+7)/8))
if self.reversed: if self.reversed:
@ -704,8 +704,8 @@ def ccittfaxdecode(data, params):
raise ValueError(K) raise ValueError(K)
parser.feedbytes(data) parser.feedbytes(data)
return parser.close() return parser.close()
# test # test
def main(argv): def main(argv):
import pygame import pygame

View File

@ -81,7 +81,7 @@ class CMap(object):
else: else:
self.dump(out=out, code2cid=v, code=c) self.dump(out=out, code2cid=v, code=c)
return return
## IdentityCMap ## IdentityCMap
## ##
@ -100,8 +100,8 @@ class IdentityCMap(object):
return struct.unpack('>%dH' % n, code) return struct.unpack('>%dH' % n, code)
else: else:
return () return ()
## UnicodeMap ## UnicodeMap
## ##
@ -162,7 +162,7 @@ class FileCMap(CMap):
## FileUnicodeMap ## FileUnicodeMap
## ##
class FileUnicodeMap(UnicodeMap): class FileUnicodeMap(UnicodeMap):
def __init__(self): def __init__(self):
UnicodeMap.__init__(self) UnicodeMap.__init__(self)
self.attrs = {} self.attrs = {}
@ -205,12 +205,12 @@ class PyCMap(CMap):
def is_vertical(self): def is_vertical(self):
return self._is_vertical return self._is_vertical
## PyUnicodeMap ## PyUnicodeMap
## ##
class PyUnicodeMap(UnicodeMap): class PyUnicodeMap(UnicodeMap):
def __init__(self, name, module, vertical): def __init__(self, name, module, vertical):
if vertical: if vertical:
cid2unichr = module.CID2UNICHR_V cid2unichr = module.CID2UNICHR_V
@ -231,7 +231,7 @@ class CMapDB(object):
debug = 0 debug = 0
_cmap_cache = {} _cmap_cache = {}
_umap_cache = {} _umap_cache = {}
class CMapNotFound(CMapError): pass class CMapNotFound(CMapError): pass
@classmethod @classmethod

View File

@ -119,7 +119,7 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
self.result = None self.result = None
return return
def receive_layout(self, ltpage): def receive_layout(self, ltpage):
self.result = ltpage self.result = ltpage
return return
@ -137,7 +137,7 @@ class PDFConverter(PDFLayoutAnalyzer):
self.outfp = outfp self.outfp = outfp
self.codec = codec self.codec = codec
return return
## TextConverter ## TextConverter
## ##
@ -179,7 +179,7 @@ class TextConverter(PDFConverter):
if self.imagewriter is None: return if self.imagewriter is None: return
PDFConverter.render_image(self, name, stream) PDFConverter.render_image(self, name, stream)
return return
def paint_path(self, gstate, stroke, fill, evenodd, path): def paint_path(self, gstate, stroke, fill, evenodd, path):
return return
@ -197,13 +197,13 @@ class HTMLConverter(PDFConverter):
'curve': 'black', 'curve': 'black',
'page': 'gray', 'page': 'gray',
} }
TEXT_COLORS = { TEXT_COLORS = {
'textbox': 'blue', 'textbox': 'blue',
'char': 'black', 'char': 'black',
} }
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
scale=1, fontscale=1.0, layoutmode='normal', showpageno=True, scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
pagemargin=50, imagewriter=None, pagemargin=50, imagewriter=None,
rect_colors={'curve':'black', 'page':'gray'}, rect_colors={'curve':'black', 'page':'gray'},
@ -295,7 +295,7 @@ class HTMLConverter(PDFConverter):
self._font = self._fontstack.pop() self._font = self._fontstack.pop()
self.write('</div>') self.write('</div>')
return return
def put_text(self, text, fontname, fontsize): def put_text(self, text, fontname, fontsize):
font = (fontname, fontsize) font = (fontname, fontsize)
if font != self._font: if font != self._font:
@ -399,7 +399,7 @@ class XMLConverter(PDFConverter):
def write_footer(self): def write_footer(self):
self.outfp.write('</pages>\n') self.outfp.write('</pages>\n')
return return
def write_text(self, text): def write_text(self, text):
self.outfp.write(enc(text, self.codec)) self.outfp.write(enc(text, self.codec))
return return

View File

@ -8,7 +8,7 @@ written with a proportional font.
The following data were extracted from the AFM files: The following data were extracted from the AFM files:
http://www.ctan.org/tex-archive/fonts/adobe/afm/ http://www.ctan.org/tex-archive/fonts/adobe/afm/
""" """
### BEGIN Verbatim copy of the license part ### BEGIN Verbatim copy of the license part

View File

@ -70,7 +70,7 @@ class ImageWriter(object):
(width, height) = image.srcsize (width, height) = image.srcsize
if len(filters) == 1 and filters[0] in LITERALS_DCT_DECODE: if len(filters) == 1 and filters[0] in LITERALS_DCT_DECODE:
ext = '.jpg' ext = '.jpg'
elif (image.bits == 1 or elif (image.bits == 1 or
image.bits == 8 and image.colorspace in (LITERAL_DEVICE_RGB, LITERAL_DEVICE_GRAY)): image.bits == 8 and image.colorspace in (LITERAL_DEVICE_RGB, LITERAL_DEVICE_GRAY)):
ext = '.%dx%d.bmp' % (width, height) ext = '.%dx%d.bmp' % (width, height)
else: else:
@ -84,7 +84,7 @@ class ImageWriter(object):
from PIL import Image from PIL import Image
from PIL import ImageChops from PIL import ImageChops
ifp = cStringIO.StringIO(raw_data) ifp = cStringIO.StringIO(raw_data)
i = Image.open(ifp) i = Image.open(ifp)
i = ImageChops.invert(i) i = ImageChops.invert(i)
i = i.convert('RGB') i = i.convert('RGB')
i.save(fp, 'JPEG') i.save(fp, 'JPEG')

View File

@ -94,7 +94,7 @@ class LTComponent(LTItem):
def is_empty(self): def is_empty(self):
return self.width <= 0 or self.height <= 0 return self.width <= 0 or self.height <= 0
def is_hoverlap(self, obj): def is_hoverlap(self, obj):
assert isinstance(obj, LTComponent) assert isinstance(obj, LTComponent)
return obj.x0 <= self.x1 and self.x0 <= obj.x1 return obj.x0 <= self.x1 and self.x0 <= obj.x1
@ -247,7 +247,7 @@ class LTChar(LTComponent, LTText):
def __repr__(self): def __repr__(self):
return ('<%s %s matrix=%s font=%r adv=%s text=%r>' % return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
(self.__class__.__name__, bbox2str(self.bbox), (self.__class__.__name__, bbox2str(self.bbox),
matrix2str(self.matrix), self.fontname, self.adv, matrix2str(self.matrix), self.fontname, self.adv,
self.get_text())) self.get_text()))
@ -258,7 +258,7 @@ class LTChar(LTComponent, LTText):
"""Returns True if two characters can coexist in the same line.""" """Returns True if two characters can coexist in the same line."""
return True return True
## LTContainer ## LTContainer
## ##
class LTContainer(LTComponent): class LTContainer(LTComponent):
@ -287,7 +287,7 @@ class LTContainer(LTComponent):
for obj in self._objs: for obj in self._objs:
obj.analyze(laparams) obj.analyze(laparams)
return return
## LTExpandableContainer ## LTExpandableContainer
## ##
@ -315,7 +315,7 @@ class LTTextContainer(LTExpandableContainer, LTText):
def get_text(self): def get_text(self):
return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) ) return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) )
## LTTextLine ## LTTextLine
## ##
@ -363,7 +363,7 @@ class LTTextLineHorizontal(LTTextLine):
abs(obj.height-self.height) < d and abs(obj.height-self.height) < d and
(abs(obj.x0-self.x0) < d or (abs(obj.x0-self.x0) < d or
abs(obj.x1-self.x1) < d)) ] abs(obj.x1-self.x1) < d)) ]
class LTTextLineVertical(LTTextLine): class LTTextLineVertical(LTTextLine):
def __init__(self, word_margin): def __init__(self, word_margin):
@ -379,7 +379,7 @@ class LTTextLineVertical(LTTextLine):
self._y0 = obj.y0 self._y0 = obj.y0
LTTextLine.add(self, obj) LTTextLine.add(self, obj)
return return
def find_neighbors(self, plane, ratio): def find_neighbors(self, plane, ratio):
d = ratio*self.width d = ratio*self.width
objs = plane.find((self.x0-d, self.y0, self.x1+d, self.y1)) objs = plane.find((self.x0-d, self.y0, self.x1+d, self.y1))
@ -387,8 +387,8 @@ class LTTextLineVertical(LTTextLine):
if (isinstance(obj, LTTextLineVertical) and if (isinstance(obj, LTTextLineVertical) and
abs(obj.width-self.width) < d and abs(obj.width-self.width) < d and
(abs(obj.y0-self.y0) < d or (abs(obj.y0-self.y0) < d or
abs(obj.y1-self.y1) < d)) ] abs(obj.y1-self.y1) < d)) ]
## LTTextBox ## LTTextBox
## ##
@ -408,7 +408,7 @@ class LTTextBox(LTTextContainer):
self.index, bbox2str(self.bbox), self.get_text())) self.index, bbox2str(self.bbox), self.get_text()))
class LTTextBoxHorizontal(LTTextBox): class LTTextBoxHorizontal(LTTextBox):
def analyze(self, laparams): def analyze(self, laparams):
LTTextBox.analyze(self, laparams) LTTextBox.analyze(self, laparams)
self._objs = csort(self._objs, key=lambda obj: -obj.y1) self._objs = csort(self._objs, key=lambda obj: -obj.y1)
@ -438,7 +438,7 @@ class LTTextGroup(LTTextContainer):
return return
class LTTextGroupLRTB(LTTextGroup): class LTTextGroupLRTB(LTTextGroup):
def analyze(self, laparams): def analyze(self, laparams):
LTTextGroup.analyze(self, laparams) LTTextGroup.analyze(self, laparams)
# reorder the objects from top-left to bottom-right. # reorder the objects from top-left to bottom-right.
@ -448,7 +448,7 @@ class LTTextGroupLRTB(LTTextGroup):
return return
class LTTextGroupTBRL(LTTextGroup): class LTTextGroupTBRL(LTTextGroup):
def analyze(self, laparams): def analyze(self, laparams):
LTTextGroup.analyze(self, laparams) LTTextGroup.analyze(self, laparams)
# reorder the objects from top-right to bottom-left. # reorder the objects from top-right to bottom-left.
@ -466,14 +466,14 @@ class LTLayoutContainer(LTContainer):
LTContainer.__init__(self, bbox) LTContainer.__init__(self, bbox)
self.groups = None self.groups = None
return return
def get_textlines(self, laparams, objs): def get_textlines(self, laparams, objs):
obj0 = None obj0 = None
line = None line = None
for obj1 in objs: for obj1 in objs:
if obj0 is not None: if obj0 is not None:
k = 0 k = 0
if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and
min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1) and min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1) and
obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin): obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin):
# obj0 and obj1 is horizontally aligned: # obj0 and obj1 is horizontally aligned:
@ -488,7 +488,7 @@ class LTLayoutContainer(LTContainer):
# (char_margin) # (char_margin)
k |= 1 k |= 1
if (laparams.detect_vertical and if (laparams.detect_vertical and
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin): obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
# obj0 and obj1 is vertically aligned: # obj0 and obj1 is vertically aligned:
@ -565,9 +565,9 @@ class LTLayoutContainer(LTContainer):
assert boxes assert boxes
def dist(obj1, obj2): def dist(obj1, obj2):
"""A distance function between two TextBoxes. """A distance function between two TextBoxes.
Consider the bounding rectangle for obj1 and obj2. Consider the bounding rectangle for obj1 and obj2.
Return its area less the areas of obj1 and obj2, Return its area less the areas of obj1 and obj2,
shown as 'www' below. This value may be negative. shown as 'www' below. This value may be negative.
+------+..........+ (x1,y1) +------+..........+ (x1,y1)
| obj1 |wwwwwwwwww: | obj1 |wwwwwwwwww:
@ -621,7 +621,7 @@ class LTLayoutContainer(LTContainer):
plane.add(group) plane.add(group)
assert len(plane) == 1 assert len(plane) == 1
return list(plane) return list(plane)
def analyze(self, laparams): def analyze(self, laparams):
# textobjs is a list of LTChar objects, i.e. # textobjs is a list of LTChar objects, i.e.
# it has all the individual characters in the page. # it has all the individual characters in the page.
@ -668,7 +668,7 @@ class LTFigure(LTLayoutContainer):
def analyze(self, laparams): def analyze(self, laparams):
if not laparams.all_texts: return if not laparams.all_texts: return
LTLayoutContainer.analyze(self, laparams) LTLayoutContainer.analyze(self, laparams)
return return
## LTPage ## LTPage

View File

@ -74,8 +74,8 @@ class PDFTextDevice(PDFDevice):
seq, matrix, textstate.linematrix, font, fontsize, seq, matrix, textstate.linematrix, font, fontsize,
scaling, charspace, wordspace, rise, dxscale) scaling, charspace, wordspace, rise, dxscale)
return return
def render_string_horizontal(self, seq, matrix, (x,y), def render_string_horizontal(self, seq, matrix, (x,y),
font, fontsize, scaling, charspace, wordspace, rise, dxscale): font, fontsize, scaling, charspace, wordspace, rise, dxscale):
needcharspace = False needcharspace = False
for obj in seq: for obj in seq:
@ -93,7 +93,7 @@ class PDFTextDevice(PDFDevice):
needcharspace = True needcharspace = True
return (x, y) return (x, y)
def render_string_vertical(self, seq, matrix, (x,y), def render_string_vertical(self, seq, matrix, (x,y),
font, fontsize, scaling, charspace, wordspace, rise, dxscale): font, fontsize, scaling, charspace, wordspace, rise, dxscale):
needcharspace = False needcharspace = False
for obj in seq: for obj in seq:
@ -104,7 +104,7 @@ class PDFTextDevice(PDFDevice):
for cid in font.decode(obj): for cid in font.decode(obj):
if needcharspace: if needcharspace:
y += charspace y += charspace
y += self.render_char(translate_matrix(matrix, (x,y)), y += self.render_char(translate_matrix(matrix, (x,y)),
font, fontsize, scaling, rise, cid) font, fontsize, scaling, rise, cid)
if cid == 32 and wordspace: if cid == 32 and wordspace:
y += wordspace y += wordspace

View File

@ -260,7 +260,7 @@ class PDFDocument(object):
doc = PDFDocument(parser) doc = PDFDocument(parser)
doc.initialize(password) doc.initialize(password)
obj = doc.getobj(objid) obj = doc.getobj(objid)
""" """
debug = 0 debug = 0
@ -425,7 +425,7 @@ class PDFDocument(object):
raise PDFSyntaxError('Invalid object spec: offset=%r' % pos) raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
(_,obj) = self._parser.nextobject() (_,obj) = self._parser.nextobject()
return obj return obj
# can raise PDFObjectNotFound # can raise PDFObjectNotFound
def getobj(self, objid): def getobj(self, objid):
assert objid != 0 assert objid != 0

View File

@ -102,7 +102,7 @@ class Type1FontHeaderParser(PSStackParser):
except KeyError: except KeyError:
pass pass
return self._cid2unicode return self._cid2unicode
def do_keyword(self, pos, token): def do_keyword(self, pos, token):
if token is self.KEYWORD_PUT: if token is self.KEYWORD_PUT:
((_,key),(_,value)) = self.pop(2) ((_,key),(_,value)) = self.pop(2)
@ -111,7 +111,7 @@ class Type1FontHeaderParser(PSStackParser):
self.add_results((key, literal_name(value))) self.add_results((key, literal_name(value)))
return return
## CFFFont ## CFFFont
## (Format specified in Adobe Technical Note: #5176 ## (Format specified in Adobe Technical Note: #5176
## "The Compact Font Format Specification") ## "The Compact Font Format Specification")

View File

@ -125,7 +125,7 @@ class PDFGraphicState(object):
class PDFResourceManager(object): class PDFResourceManager(object):
"""Repository of shared resources. """Repository of shared resources.
ResourceManager facilitates reuse of shared resources ResourceManager facilitates reuse of shared resources
such as fonts and images so that large objects are not such as fonts and images so that large objects are not
allocated multiple times. allocated multiple times.
@ -725,7 +725,7 @@ class PDFPageInterpreter(object):
interpreter = self.dup() interpreter = self.dup()
bbox = list_value(xobj['BBox']) bbox = list_value(xobj['BBox'])
matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY)) matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
# According to PDF reference 1.7 section 4.9.1, XObjects in # According to PDF reference 1.7 section 4.9.1, XObjects in
# earlier PDFs (prior to v1.2) use the page's Resources entry # earlier PDFs (prior to v1.2) use the page's Resources entry
# instead of having their own Resources entry. # instead of having their own Resources entry.
resources = dict_value(xobj.get('Resources')) or self.resources.copy() resources = dict_value(xobj.get('Resources')) or self.resources.copy()

View File

@ -40,7 +40,7 @@ class PDFPage(object):
def __init__(self, doc, pageid, attrs): def __init__(self, doc, pageid, attrs):
"""Initialize a page object. """Initialize a page object.
doc: a PDFDocument object. doc: a PDFDocument object.
pageid: any Python object that can uniquely identify the page. pageid: any Python object that can uniquely identify the page.
attrs: a dictionary of page attributes. attrs: a dictionary of page attributes.

View File

@ -35,7 +35,7 @@ class PDFParser(PSStackParser):
parser.set_document(doc) parser.set_document(doc)
parser.seek(offset) parser.seek(offset)
parser.nextobject() parser.nextobject()
""" """
def __init__(self, fp): def __init__(self, fp):
@ -57,10 +57,10 @@ class PDFParser(PSStackParser):
KEYWORD_STARTXREF = KWD('startxref') KEYWORD_STARTXREF = KWD('startxref')
def do_keyword(self, pos, token): def do_keyword(self, pos, token):
"""Handles PDF-related keywords.""" """Handles PDF-related keywords."""
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
self.add_results(*self.pop(1)) self.add_results(*self.pop(1))
elif token is self.KEYWORD_ENDOBJ: elif token is self.KEYWORD_ENDOBJ:
self.add_results(*self.pop(4)) self.add_results(*self.pop(4))
@ -125,7 +125,7 @@ class PDFParser(PSStackParser):
else: else:
# others # others
self.push((pos, token)) self.push((pos, token))
return return

View File

@ -68,7 +68,7 @@ def resolve1(x, default=None):
def resolve_all(x, default=None): def resolve_all(x, default=None):
"""Recursively resolves the given object and all the internals. """Recursively resolves the given object and all the internals.
Make sure there is no indirect reference within the nested object. Make sure there is no indirect reference within the nested object.
This procedure might be slow. This procedure might be slow.
""" """
@ -180,13 +180,13 @@ class PDFStream(PDFObject):
def __contains__(self, name): def __contains__(self, name):
return name in self.attrs return name in self.attrs
def __getitem__(self, name): def __getitem__(self, name):
return self.attrs[name] return self.attrs[name]
def get(self, name, default=None): def get(self, name, default=None):
return self.attrs.get(name, default) return self.attrs.get(name, default)
def get_any(self, names, default=None): def get_any(self, names, default=None):
for name in names: for name in names:
if name in self.attrs: if name in self.attrs:

View File

@ -32,7 +32,7 @@ class PSObject(object):
class PSLiteral(PSObject): class PSLiteral(PSObject):
"""A class that represents a PostScript literal. """A class that represents a PostScript literal.
Postscript literals are used as identifiers, such as Postscript literals are used as identifiers, such as
variable names, property names and dictionary keys. variable names, property names and dictionary keys.
Literals are case sensitive and denoted by a preceding Literals are case sensitive and denoted by a preceding
@ -55,11 +55,11 @@ class PSLiteral(PSObject):
class PSKeyword(PSObject): class PSKeyword(PSObject):
"""A class that represents a PostScript keyword. """A class that represents a PostScript keyword.
PostScript keywords are a dozen of predefined words. PostScript keywords are a dozen of predefined words.
Commands and directives in PostScript are expressed by keywords. Commands and directives in PostScript are expressed by keywords.
They are also used to denote the content boundaries. They are also used to denote the content boundaries.
Note: Do not create an instance of PSKeyword directly. Note: Do not create an instance of PSKeyword directly.
Always use PSKeywordTable.intern(). Always use PSKeywordTable.intern().
""" """
@ -80,7 +80,7 @@ class PSSymbolTable(object):
Interned objects can be checked its identity with "is" operator. Interned objects can be checked its identity with "is" operator.
""" """
def __init__(self, klass): def __init__(self, klass):
self.dict = {} self.dict = {}
self.klass = klass self.klass = klass
@ -357,7 +357,7 @@ class PSBaseParser(object):
pass pass
self._parse1 = self._parse_main self._parse1 = self._parse_main
return j return j
def _parse_float(self, s, i): def _parse_float(self, s, i):
m = END_NUMBER.search(s, i) m = END_NUMBER.search(s, i)
if not m: if not m:
@ -493,17 +493,17 @@ class PSStackParser(PSBaseParser):
def push(self, *objs): def push(self, *objs):
self.curstack.extend(objs) self.curstack.extend(objs)
return return
def pop(self, n): def pop(self, n):
objs = self.curstack[-n:] objs = self.curstack[-n:]
self.curstack[-n:] = [] self.curstack[-n:] = []
return objs return objs
def popall(self): def popall(self):
objs = self.curstack objs = self.curstack
self.curstack = [] self.curstack = []
return objs return objs
def add_results(self, *objs): def add_results(self, *objs):
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'add_results: %r' % (objs,) print >>sys.stderr, 'add_results: %r' % (objs,)
@ -516,7 +516,7 @@ class PSStackParser(PSBaseParser):
if 2 <= self.debug: if 2 <= self.debug:
print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type) print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type)
return return
def end_type(self, type): def end_type(self, type):
if self.curtype != type: if self.curtype != type:
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))

View File

@ -1060,7 +1060,7 @@ class RijndaelEncryptor(object):
>>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex') >>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex')
'd8f532538289ef7d06b506a4fd5be9c9' 'd8f532538289ef7d06b506a4fd5be9c9'
""" """
def __init__(self, key, keybits=256): def __init__(self, key, keybits=256):
assert len(key) == KEYLENGTH(keybits) assert len(key) == KEYLENGTH(keybits)
(self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits) (self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits)

View File

@ -255,7 +255,7 @@ class Plane(object):
for obj in objs: for obj in objs:
self.add(obj) self.add(obj)
return return
# add(obj): place an object. # add(obj): place an object.
def add(self, obj): def add(self, obj):
for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)): for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):

View File

@ -7,9 +7,9 @@ setup(
version=__version__, version=__version__,
description='PDF parser and analyzer', description='PDF parser and analyzer',
long_description='''PDFMiner is a tool for extracting information from PDF documents. long_description='''PDFMiner is a tool for extracting information from PDF documents.
Unlike other PDF-related tools, it focuses entirely on getting Unlike other PDF-related tools, it focuses entirely on getting
and analyzing text data. PDFMiner allows to obtain and analyzing text data. PDFMiner allows to obtain
the exact location of texts in a page, as well as the exact location of texts in a page, as well as
other information such as fonts or lines. other information such as fonts or lines.
It includes a PDF converter that can transform PDF files It includes a PDF converter that can transform PDF files
into other text formats (such as HTML). It has an extensible into other text formats (such as HTML). It has an extensible

View File

@ -50,7 +50,7 @@ class CMapConverter(object):
assert values[0] == 'CID' assert values[0] == 'CID'
encs = values encs = values
continue continue
def put(dmap, code, cid, force=False): def put(dmap, code, cid, force=False):
for b in code[:-1]: for b in code[:-1]:
b = ord(b) b = ord(b)
@ -64,7 +64,7 @@ class CMapConverter(object):
if force or ((b not in dmap) or dmap[b] == cid): if force or ((b not in dmap) or dmap[b] == cid):
dmap[b] = cid dmap[b] = cid
return return
def add(unimap, enc, code): def add(unimap, enc, code):
try: try:
codec = self.enc2codec[enc] codec = self.enc2codec[enc]
@ -78,20 +78,20 @@ class CMapConverter(object):
except UnicodeError: except UnicodeError:
pass pass
return return
def pick(unimap): def pick(unimap):
chars = unimap.items() chars = unimap.items()
chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True) chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True)
(c,_) = chars[0] (c,_) = chars[0]
return c return c
cid = int(values[0]) cid = int(values[0])
unimap_h = {} unimap_h = {}
unimap_v = {} unimap_v = {}
for (enc,value) in zip(encs, values): for (enc,value) in zip(encs, values):
if enc == 'CID': continue if enc == 'CID': continue
if value == '*': continue if value == '*': continue
# hcodes, vcodes: encoded bytes for each writing mode. # hcodes, vcodes: encoded bytes for each writing mode.
hcodes = [] hcodes = []
vcodes = [] vcodes = []
@ -121,7 +121,7 @@ class CMapConverter(object):
for code in hcodes: for code in hcodes:
put(hmap, code, cid) put(hmap, code, cid)
put(vmap, code, cid) put(vmap, code, cid)
# Determine the "most popular" candidate. # Determine the "most popular" candidate.
if unimap_h: if unimap_h:
self.cid2unichr_h[cid] = pick(unimap_h) self.cid2unichr_h[cid] = pick(unimap_h)
@ -137,7 +137,7 @@ class CMapConverter(object):
) )
fp.write(pickle.dumps(data)) fp.write(pickle.dumps(data))
return return
def dump_unicodemap(self, fp): def dump_unicodemap(self, fp):
data = dict( data = dict(
CID2UNICHR_H=self.cid2unichr_h, CID2UNICHR_H=self.cid2unichr_h,
@ -151,7 +151,7 @@ def main(argv):
import getopt import getopt
import gzip import gzip
import os.path import os.path
def usage(): def usage():
print 'usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]' % argv[0] print 'usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]' % argv[0]
return 100 return 100

View File

@ -25,7 +25,7 @@ def dumpxml(out, obj, codec=None):
if obj is None: if obj is None:
out.write('<null />') out.write('<null />')
return return
if isinstance(obj, dict): if isinstance(obj, dict):
out.write('<dict size="%d">\n' % len(obj)) out.write('<dict size="%d">\n' % len(obj))
for (k,v) in obj.iteritems(): for (k,v) in obj.iteritems():
@ -179,7 +179,7 @@ def extractembedded(outfp, fname, objids, pagenos, password='',
out.write(fileobj.get_data()) out.write(fileobj.get_data())
out.close() out.close()
return return
fp = file(fname, 'rb') fp = file(fname, 'rb')
parser = PDFParser(fp) parser = PDFParser(fp)
doc = PDFDocument(parser) doc = PDFDocument(parser)

View File

@ -14,7 +14,7 @@ This is an in-house mapping table for some Latin-1 characters
LATIN2ASCII = { LATIN2ASCII = {
#0x00a0: '', #0x00a0: '',
#0x00a7: '', #0x00a7: '',
# iso-8859-1 # iso-8859-1
0x00c0: 'A`', 0x00c0: 'A`',
0x00c1: "A'", 0x00c1: "A'",

View File

@ -159,7 +159,7 @@ class WebApp(object):
def convert(self): def convert(self):
self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ) self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
if (self.method != 'POST' or if (self.method != 'POST' or
'c' not in self.form or 'c' not in self.form or
'f' not in self.form): 'f' not in self.form):
self.response_200() self.response_200()