PEP8: Remove trailing whitespace
parent
9ff6aa0463
commit
c1da8b835c
|
@ -16,13 +16,13 @@ def ascii85decode(data):
|
||||||
letters, using 85 different types of characters (as 256**4 < 85**5).
|
letters, using 85 different types of characters (as 256**4 < 85**5).
|
||||||
When the length of the original bytes is not a multiple of 4, a special
|
When the length of the original bytes is not a multiple of 4, a special
|
||||||
rule is used for round up.
|
rule is used for round up.
|
||||||
|
|
||||||
The Adobe's ASCII85 implementation is slightly different from
|
The Adobe's ASCII85 implementation is slightly different from
|
||||||
its original in handling the last characters.
|
its original in handling the last characters.
|
||||||
|
|
||||||
The sample string is taken from:
|
The sample string is taken from:
|
||||||
http://en.wikipedia.org/w/index.php?title=Ascii85
|
http://en.wikipedia.org/w/index.php?title=Ascii85
|
||||||
|
|
||||||
>>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
|
>>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q')
|
||||||
'Man is distinguished'
|
'Man is distinguished'
|
||||||
>>> ascii85decode('E,9)oF*2M7/c~>')
|
>>> ascii85decode('E,9)oF*2M7/c~>')
|
||||||
|
@ -60,7 +60,7 @@ def asciihexdecode(data):
|
||||||
EOD. Any other characters will cause an error. If the filter encounters
|
EOD. Any other characters will cause an error. If the filter encounters
|
||||||
the EOD marker after reading an odd number of hexadecimal digits, it
|
the EOD marker after reading an odd number of hexadecimal digits, it
|
||||||
will behave as if a 0 followed the last digit.
|
will behave as if a 0 followed the last digit.
|
||||||
|
|
||||||
>>> asciihexdecode('61 62 2e6364 65')
|
>>> asciihexdecode('61 62 2e6364 65')
|
||||||
'ab.cde'
|
'ab.cde'
|
||||||
>>> asciihexdecode('61 62 2e6364 657>')
|
>>> asciihexdecode('61 62 2e6364 657>')
|
||||||
|
|
|
@ -308,7 +308,7 @@ class CCITTG4Parser(BitParser):
|
||||||
BitParser.add(UNCOMPRESSED, 'T1000' , '0000000010')
|
BitParser.add(UNCOMPRESSED, 'T1000' , '0000000010')
|
||||||
BitParser.add(UNCOMPRESSED, 'T00000' , '00000000011')
|
BitParser.add(UNCOMPRESSED, 'T00000' , '00000000011')
|
||||||
BitParser.add(UNCOMPRESSED, 'T10000' , '00000000010')
|
BitParser.add(UNCOMPRESSED, 'T10000' , '00000000010')
|
||||||
|
|
||||||
class EOFB(Exception): pass
|
class EOFB(Exception): pass
|
||||||
class InvalidData(Exception): pass
|
class InvalidData(Exception): pass
|
||||||
class ByteSkip(Exception): pass
|
class ByteSkip(Exception): pass
|
||||||
|
@ -386,7 +386,7 @@ class CCITTG4Parser(BitParser):
|
||||||
def _parse_uncompressed(self, bits):
|
def _parse_uncompressed(self, bits):
|
||||||
if not bits: raise self.InvalidData
|
if not bits: raise self.InvalidData
|
||||||
if bits.startswith('T'):
|
if bits.startswith('T'):
|
||||||
self._accept = self._parse_mode
|
self._accept = self._parse_mode
|
||||||
self._color = int(bits[1])
|
self._color = int(bits[1])
|
||||||
self._do_uncompressed(bits[2:])
|
self._do_uncompressed(bits[2:])
|
||||||
return self.MODE
|
return self.MODE
|
||||||
|
@ -418,14 +418,14 @@ class CCITTG4Parser(BitParser):
|
||||||
def output_line(self, y, bits):
|
def output_line(self, y, bits):
|
||||||
print y, ''.join( str(b) for b in bits )
|
print y, ''.join( str(b) for b in bits )
|
||||||
return
|
return
|
||||||
|
|
||||||
def _reset_line(self):
|
def _reset_line(self):
|
||||||
self._refline = self._curline
|
self._refline = self._curline
|
||||||
self._curline = array.array('b', [1]*self.width)
|
self._curline = array.array('b', [1]*self.width)
|
||||||
self._curpos = -1
|
self._curpos = -1
|
||||||
self._color = 1
|
self._color = 1
|
||||||
return
|
return
|
||||||
|
|
||||||
def _flush_line(self):
|
def _flush_line(self):
|
||||||
if self.width <= self._curpos:
|
if self.width <= self._curpos:
|
||||||
self.output_line(self._y, self._curline)
|
self.output_line(self._y, self._curline)
|
||||||
|
@ -460,7 +460,7 @@ class CCITTG4Parser(BitParser):
|
||||||
self._curpos = x1
|
self._curpos = x1
|
||||||
self._color = 1-self._color
|
self._color = 1-self._color
|
||||||
return
|
return
|
||||||
|
|
||||||
def _do_pass(self):
|
def _do_pass(self):
|
||||||
#print '* pass: curpos=%r, color=%r' % (self._curpos, self._color)
|
#print '* pass: curpos=%r, color=%r' % (self._curpos, self._color)
|
||||||
#print ' refline:', self._get_refline(self._curpos+1)
|
#print ' refline:', self._get_refline(self._curpos+1)
|
||||||
|
@ -487,7 +487,7 @@ class CCITTG4Parser(BitParser):
|
||||||
self._curline[x] = self._color
|
self._curline[x] = self._color
|
||||||
self._curpos = x1
|
self._curpos = x1
|
||||||
return
|
return
|
||||||
|
|
||||||
def _do_horizontal(self, n1, n2):
|
def _do_horizontal(self, n1, n2):
|
||||||
#print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color)
|
#print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color)
|
||||||
if self._curpos < 0:
|
if self._curpos < 0:
|
||||||
|
@ -503,7 +503,7 @@ class CCITTG4Parser(BitParser):
|
||||||
x += 1
|
x += 1
|
||||||
self._curpos = x
|
self._curpos = x
|
||||||
return
|
return
|
||||||
|
|
||||||
def _do_uncompressed(self, bits):
|
def _do_uncompressed(self, bits):
|
||||||
#print '* uncompressed(%r): curpos=%r' % (bits, self._curpos)
|
#print '* uncompressed(%r): curpos=%r' % (bits, self._curpos)
|
||||||
for c in bits:
|
for c in bits:
|
||||||
|
@ -672,16 +672,16 @@ class TestCCITTG4Parser(unittest.TestCase):
|
||||||
## CCITTFaxDecoder
|
## CCITTFaxDecoder
|
||||||
##
|
##
|
||||||
class CCITTFaxDecoder(CCITTG4Parser):
|
class CCITTFaxDecoder(CCITTG4Parser):
|
||||||
|
|
||||||
def __init__(self, width, bytealign=False, reversed=False):
|
def __init__(self, width, bytealign=False, reversed=False):
|
||||||
CCITTG4Parser.__init__(self, width, bytealign=bytealign)
|
CCITTG4Parser.__init__(self, width, bytealign=bytealign)
|
||||||
self.reversed = reversed
|
self.reversed = reversed
|
||||||
self._buf = ''
|
self._buf = ''
|
||||||
return
|
return
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
return self._buf
|
return self._buf
|
||||||
|
|
||||||
def output_line(self, y, bits):
|
def output_line(self, y, bits):
|
||||||
bytes = array.array('B', [0]*((len(bits)+7)/8))
|
bytes = array.array('B', [0]*((len(bits)+7)/8))
|
||||||
if self.reversed:
|
if self.reversed:
|
||||||
|
@ -704,8 +704,8 @@ def ccittfaxdecode(data, params):
|
||||||
raise ValueError(K)
|
raise ValueError(K)
|
||||||
parser.feedbytes(data)
|
parser.feedbytes(data)
|
||||||
return parser.close()
|
return parser.close()
|
||||||
|
|
||||||
|
|
||||||
# test
|
# test
|
||||||
def main(argv):
|
def main(argv):
|
||||||
import pygame
|
import pygame
|
||||||
|
|
|
@ -81,7 +81,7 @@ class CMap(object):
|
||||||
else:
|
else:
|
||||||
self.dump(out=out, code2cid=v, code=c)
|
self.dump(out=out, code2cid=v, code=c)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
## IdentityCMap
|
## IdentityCMap
|
||||||
##
|
##
|
||||||
|
@ -100,8 +100,8 @@ class IdentityCMap(object):
|
||||||
return struct.unpack('>%dH' % n, code)
|
return struct.unpack('>%dH' % n, code)
|
||||||
else:
|
else:
|
||||||
return ()
|
return ()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## UnicodeMap
|
## UnicodeMap
|
||||||
##
|
##
|
||||||
|
@ -162,7 +162,7 @@ class FileCMap(CMap):
|
||||||
## FileUnicodeMap
|
## FileUnicodeMap
|
||||||
##
|
##
|
||||||
class FileUnicodeMap(UnicodeMap):
|
class FileUnicodeMap(UnicodeMap):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
UnicodeMap.__init__(self)
|
UnicodeMap.__init__(self)
|
||||||
self.attrs = {}
|
self.attrs = {}
|
||||||
|
@ -205,12 +205,12 @@ class PyCMap(CMap):
|
||||||
|
|
||||||
def is_vertical(self):
|
def is_vertical(self):
|
||||||
return self._is_vertical
|
return self._is_vertical
|
||||||
|
|
||||||
|
|
||||||
## PyUnicodeMap
|
## PyUnicodeMap
|
||||||
##
|
##
|
||||||
class PyUnicodeMap(UnicodeMap):
|
class PyUnicodeMap(UnicodeMap):
|
||||||
|
|
||||||
def __init__(self, name, module, vertical):
|
def __init__(self, name, module, vertical):
|
||||||
if vertical:
|
if vertical:
|
||||||
cid2unichr = module.CID2UNICHR_V
|
cid2unichr = module.CID2UNICHR_V
|
||||||
|
@ -231,7 +231,7 @@ class CMapDB(object):
|
||||||
debug = 0
|
debug = 0
|
||||||
_cmap_cache = {}
|
_cmap_cache = {}
|
||||||
_umap_cache = {}
|
_umap_cache = {}
|
||||||
|
|
||||||
class CMapNotFound(CMapError): pass
|
class CMapNotFound(CMapError): pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -119,7 +119,7 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
|
||||||
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
|
PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
|
||||||
self.result = None
|
self.result = None
|
||||||
return
|
return
|
||||||
|
|
||||||
def receive_layout(self, ltpage):
|
def receive_layout(self, ltpage):
|
||||||
self.result = ltpage
|
self.result = ltpage
|
||||||
return
|
return
|
||||||
|
@ -137,7 +137,7 @@ class PDFConverter(PDFLayoutAnalyzer):
|
||||||
self.outfp = outfp
|
self.outfp = outfp
|
||||||
self.codec = codec
|
self.codec = codec
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
## TextConverter
|
## TextConverter
|
||||||
##
|
##
|
||||||
|
@ -179,7 +179,7 @@ class TextConverter(PDFConverter):
|
||||||
if self.imagewriter is None: return
|
if self.imagewriter is None: return
|
||||||
PDFConverter.render_image(self, name, stream)
|
PDFConverter.render_image(self, name, stream)
|
||||||
return
|
return
|
||||||
|
|
||||||
def paint_path(self, gstate, stroke, fill, evenodd, path):
|
def paint_path(self, gstate, stroke, fill, evenodd, path):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -197,13 +197,13 @@ class HTMLConverter(PDFConverter):
|
||||||
'curve': 'black',
|
'curve': 'black',
|
||||||
'page': 'gray',
|
'page': 'gray',
|
||||||
}
|
}
|
||||||
|
|
||||||
TEXT_COLORS = {
|
TEXT_COLORS = {
|
||||||
'textbox': 'blue',
|
'textbox': 'blue',
|
||||||
'char': 'black',
|
'char': 'black',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
|
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
|
||||||
scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
|
scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
|
||||||
pagemargin=50, imagewriter=None,
|
pagemargin=50, imagewriter=None,
|
||||||
rect_colors={'curve':'black', 'page':'gray'},
|
rect_colors={'curve':'black', 'page':'gray'},
|
||||||
|
@ -295,7 +295,7 @@ class HTMLConverter(PDFConverter):
|
||||||
self._font = self._fontstack.pop()
|
self._font = self._fontstack.pop()
|
||||||
self.write('</div>')
|
self.write('</div>')
|
||||||
return
|
return
|
||||||
|
|
||||||
def put_text(self, text, fontname, fontsize):
|
def put_text(self, text, fontname, fontsize):
|
||||||
font = (fontname, fontsize)
|
font = (fontname, fontsize)
|
||||||
if font != self._font:
|
if font != self._font:
|
||||||
|
@ -399,7 +399,7 @@ class XMLConverter(PDFConverter):
|
||||||
def write_footer(self):
|
def write_footer(self):
|
||||||
self.outfp.write('</pages>\n')
|
self.outfp.write('</pages>\n')
|
||||||
return
|
return
|
||||||
|
|
||||||
def write_text(self, text):
|
def write_text(self, text):
|
||||||
self.outfp.write(enc(text, self.codec))
|
self.outfp.write(enc(text, self.codec))
|
||||||
return
|
return
|
||||||
|
|
|
@ -8,7 +8,7 @@ written with a proportional font.
|
||||||
The following data were extracted from the AFM files:
|
The following data were extracted from the AFM files:
|
||||||
|
|
||||||
http://www.ctan.org/tex-archive/fonts/adobe/afm/
|
http://www.ctan.org/tex-archive/fonts/adobe/afm/
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
### BEGIN Verbatim copy of the license part
|
### BEGIN Verbatim copy of the license part
|
||||||
|
|
|
@ -70,7 +70,7 @@ class ImageWriter(object):
|
||||||
(width, height) = image.srcsize
|
(width, height) = image.srcsize
|
||||||
if len(filters) == 1 and filters[0] in LITERALS_DCT_DECODE:
|
if len(filters) == 1 and filters[0] in LITERALS_DCT_DECODE:
|
||||||
ext = '.jpg'
|
ext = '.jpg'
|
||||||
elif (image.bits == 1 or
|
elif (image.bits == 1 or
|
||||||
image.bits == 8 and image.colorspace in (LITERAL_DEVICE_RGB, LITERAL_DEVICE_GRAY)):
|
image.bits == 8 and image.colorspace in (LITERAL_DEVICE_RGB, LITERAL_DEVICE_GRAY)):
|
||||||
ext = '.%dx%d.bmp' % (width, height)
|
ext = '.%dx%d.bmp' % (width, height)
|
||||||
else:
|
else:
|
||||||
|
@ -84,7 +84,7 @@ class ImageWriter(object):
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from PIL import ImageChops
|
from PIL import ImageChops
|
||||||
ifp = cStringIO.StringIO(raw_data)
|
ifp = cStringIO.StringIO(raw_data)
|
||||||
i = Image.open(ifp)
|
i = Image.open(ifp)
|
||||||
i = ImageChops.invert(i)
|
i = ImageChops.invert(i)
|
||||||
i = i.convert('RGB')
|
i = i.convert('RGB')
|
||||||
i.save(fp, 'JPEG')
|
i.save(fp, 'JPEG')
|
||||||
|
|
|
@ -94,7 +94,7 @@ class LTComponent(LTItem):
|
||||||
|
|
||||||
def is_empty(self):
|
def is_empty(self):
|
||||||
return self.width <= 0 or self.height <= 0
|
return self.width <= 0 or self.height <= 0
|
||||||
|
|
||||||
def is_hoverlap(self, obj):
|
def is_hoverlap(self, obj):
|
||||||
assert isinstance(obj, LTComponent)
|
assert isinstance(obj, LTComponent)
|
||||||
return obj.x0 <= self.x1 and self.x0 <= obj.x1
|
return obj.x0 <= self.x1 and self.x0 <= obj.x1
|
||||||
|
@ -247,7 +247,7 @@ class LTChar(LTComponent, LTText):
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
|
return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
|
||||||
(self.__class__.__name__, bbox2str(self.bbox),
|
(self.__class__.__name__, bbox2str(self.bbox),
|
||||||
matrix2str(self.matrix), self.fontname, self.adv,
|
matrix2str(self.matrix), self.fontname, self.adv,
|
||||||
self.get_text()))
|
self.get_text()))
|
||||||
|
|
||||||
|
@ -258,7 +258,7 @@ class LTChar(LTComponent, LTText):
|
||||||
"""Returns True if two characters can coexist in the same line."""
|
"""Returns True if two characters can coexist in the same line."""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
## LTContainer
|
## LTContainer
|
||||||
##
|
##
|
||||||
class LTContainer(LTComponent):
|
class LTContainer(LTComponent):
|
||||||
|
@ -287,7 +287,7 @@ class LTContainer(LTComponent):
|
||||||
for obj in self._objs:
|
for obj in self._objs:
|
||||||
obj.analyze(laparams)
|
obj.analyze(laparams)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
## LTExpandableContainer
|
## LTExpandableContainer
|
||||||
##
|
##
|
||||||
|
@ -315,7 +315,7 @@ class LTTextContainer(LTExpandableContainer, LTText):
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) )
|
return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) )
|
||||||
|
|
||||||
|
|
||||||
## LTTextLine
|
## LTTextLine
|
||||||
##
|
##
|
||||||
|
@ -363,7 +363,7 @@ class LTTextLineHorizontal(LTTextLine):
|
||||||
abs(obj.height-self.height) < d and
|
abs(obj.height-self.height) < d and
|
||||||
(abs(obj.x0-self.x0) < d or
|
(abs(obj.x0-self.x0) < d or
|
||||||
abs(obj.x1-self.x1) < d)) ]
|
abs(obj.x1-self.x1) < d)) ]
|
||||||
|
|
||||||
class LTTextLineVertical(LTTextLine):
|
class LTTextLineVertical(LTTextLine):
|
||||||
|
|
||||||
def __init__(self, word_margin):
|
def __init__(self, word_margin):
|
||||||
|
@ -379,7 +379,7 @@ class LTTextLineVertical(LTTextLine):
|
||||||
self._y0 = obj.y0
|
self._y0 = obj.y0
|
||||||
LTTextLine.add(self, obj)
|
LTTextLine.add(self, obj)
|
||||||
return
|
return
|
||||||
|
|
||||||
def find_neighbors(self, plane, ratio):
|
def find_neighbors(self, plane, ratio):
|
||||||
d = ratio*self.width
|
d = ratio*self.width
|
||||||
objs = plane.find((self.x0-d, self.y0, self.x1+d, self.y1))
|
objs = plane.find((self.x0-d, self.y0, self.x1+d, self.y1))
|
||||||
|
@ -387,8 +387,8 @@ class LTTextLineVertical(LTTextLine):
|
||||||
if (isinstance(obj, LTTextLineVertical) and
|
if (isinstance(obj, LTTextLineVertical) and
|
||||||
abs(obj.width-self.width) < d and
|
abs(obj.width-self.width) < d and
|
||||||
(abs(obj.y0-self.y0) < d or
|
(abs(obj.y0-self.y0) < d or
|
||||||
abs(obj.y1-self.y1) < d)) ]
|
abs(obj.y1-self.y1) < d)) ]
|
||||||
|
|
||||||
|
|
||||||
## LTTextBox
|
## LTTextBox
|
||||||
##
|
##
|
||||||
|
@ -408,7 +408,7 @@ class LTTextBox(LTTextContainer):
|
||||||
self.index, bbox2str(self.bbox), self.get_text()))
|
self.index, bbox2str(self.bbox), self.get_text()))
|
||||||
|
|
||||||
class LTTextBoxHorizontal(LTTextBox):
|
class LTTextBoxHorizontal(LTTextBox):
|
||||||
|
|
||||||
def analyze(self, laparams):
|
def analyze(self, laparams):
|
||||||
LTTextBox.analyze(self, laparams)
|
LTTextBox.analyze(self, laparams)
|
||||||
self._objs = csort(self._objs, key=lambda obj: -obj.y1)
|
self._objs = csort(self._objs, key=lambda obj: -obj.y1)
|
||||||
|
@ -438,7 +438,7 @@ class LTTextGroup(LTTextContainer):
|
||||||
return
|
return
|
||||||
|
|
||||||
class LTTextGroupLRTB(LTTextGroup):
|
class LTTextGroupLRTB(LTTextGroup):
|
||||||
|
|
||||||
def analyze(self, laparams):
|
def analyze(self, laparams):
|
||||||
LTTextGroup.analyze(self, laparams)
|
LTTextGroup.analyze(self, laparams)
|
||||||
# reorder the objects from top-left to bottom-right.
|
# reorder the objects from top-left to bottom-right.
|
||||||
|
@ -448,7 +448,7 @@ class LTTextGroupLRTB(LTTextGroup):
|
||||||
return
|
return
|
||||||
|
|
||||||
class LTTextGroupTBRL(LTTextGroup):
|
class LTTextGroupTBRL(LTTextGroup):
|
||||||
|
|
||||||
def analyze(self, laparams):
|
def analyze(self, laparams):
|
||||||
LTTextGroup.analyze(self, laparams)
|
LTTextGroup.analyze(self, laparams)
|
||||||
# reorder the objects from top-right to bottom-left.
|
# reorder the objects from top-right to bottom-left.
|
||||||
|
@ -466,14 +466,14 @@ class LTLayoutContainer(LTContainer):
|
||||||
LTContainer.__init__(self, bbox)
|
LTContainer.__init__(self, bbox)
|
||||||
self.groups = None
|
self.groups = None
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_textlines(self, laparams, objs):
|
def get_textlines(self, laparams, objs):
|
||||||
obj0 = None
|
obj0 = None
|
||||||
line = None
|
line = None
|
||||||
for obj1 in objs:
|
for obj1 in objs:
|
||||||
if obj0 is not None:
|
if obj0 is not None:
|
||||||
k = 0
|
k = 0
|
||||||
if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and
|
if (obj0.is_compatible(obj1) and obj0.is_voverlap(obj1) and
|
||||||
min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1) and
|
min(obj0.height, obj1.height) * laparams.line_overlap < obj0.voverlap(obj1) and
|
||||||
obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin):
|
obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin):
|
||||||
# obj0 and obj1 is horizontally aligned:
|
# obj0 and obj1 is horizontally aligned:
|
||||||
|
@ -488,7 +488,7 @@ class LTLayoutContainer(LTContainer):
|
||||||
# (char_margin)
|
# (char_margin)
|
||||||
k |= 1
|
k |= 1
|
||||||
if (laparams.detect_vertical and
|
if (laparams.detect_vertical and
|
||||||
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
|
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
|
||||||
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
|
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
|
||||||
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
|
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
|
||||||
# obj0 and obj1 is vertically aligned:
|
# obj0 and obj1 is vertically aligned:
|
||||||
|
@ -565,9 +565,9 @@ class LTLayoutContainer(LTContainer):
|
||||||
assert boxes
|
assert boxes
|
||||||
def dist(obj1, obj2):
|
def dist(obj1, obj2):
|
||||||
"""A distance function between two TextBoxes.
|
"""A distance function between two TextBoxes.
|
||||||
|
|
||||||
Consider the bounding rectangle for obj1 and obj2.
|
Consider the bounding rectangle for obj1 and obj2.
|
||||||
Return its area less the areas of obj1 and obj2,
|
Return its area less the areas of obj1 and obj2,
|
||||||
shown as 'www' below. This value may be negative.
|
shown as 'www' below. This value may be negative.
|
||||||
+------+..........+ (x1,y1)
|
+------+..........+ (x1,y1)
|
||||||
| obj1 |wwwwwwwwww:
|
| obj1 |wwwwwwwwww:
|
||||||
|
@ -621,7 +621,7 @@ class LTLayoutContainer(LTContainer):
|
||||||
plane.add(group)
|
plane.add(group)
|
||||||
assert len(plane) == 1
|
assert len(plane) == 1
|
||||||
return list(plane)
|
return list(plane)
|
||||||
|
|
||||||
def analyze(self, laparams):
|
def analyze(self, laparams):
|
||||||
# textobjs is a list of LTChar objects, i.e.
|
# textobjs is a list of LTChar objects, i.e.
|
||||||
# it has all the individual characters in the page.
|
# it has all the individual characters in the page.
|
||||||
|
@ -668,7 +668,7 @@ class LTFigure(LTLayoutContainer):
|
||||||
def analyze(self, laparams):
|
def analyze(self, laparams):
|
||||||
if not laparams.all_texts: return
|
if not laparams.all_texts: return
|
||||||
LTLayoutContainer.analyze(self, laparams)
|
LTLayoutContainer.analyze(self, laparams)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
## LTPage
|
## LTPage
|
||||||
|
|
|
@ -74,8 +74,8 @@ class PDFTextDevice(PDFDevice):
|
||||||
seq, matrix, textstate.linematrix, font, fontsize,
|
seq, matrix, textstate.linematrix, font, fontsize,
|
||||||
scaling, charspace, wordspace, rise, dxscale)
|
scaling, charspace, wordspace, rise, dxscale)
|
||||||
return
|
return
|
||||||
|
|
||||||
def render_string_horizontal(self, seq, matrix, (x,y),
|
def render_string_horizontal(self, seq, matrix, (x,y),
|
||||||
font, fontsize, scaling, charspace, wordspace, rise, dxscale):
|
font, fontsize, scaling, charspace, wordspace, rise, dxscale):
|
||||||
needcharspace = False
|
needcharspace = False
|
||||||
for obj in seq:
|
for obj in seq:
|
||||||
|
@ -93,7 +93,7 @@ class PDFTextDevice(PDFDevice):
|
||||||
needcharspace = True
|
needcharspace = True
|
||||||
return (x, y)
|
return (x, y)
|
||||||
|
|
||||||
def render_string_vertical(self, seq, matrix, (x,y),
|
def render_string_vertical(self, seq, matrix, (x,y),
|
||||||
font, fontsize, scaling, charspace, wordspace, rise, dxscale):
|
font, fontsize, scaling, charspace, wordspace, rise, dxscale):
|
||||||
needcharspace = False
|
needcharspace = False
|
||||||
for obj in seq:
|
for obj in seq:
|
||||||
|
@ -104,7 +104,7 @@ class PDFTextDevice(PDFDevice):
|
||||||
for cid in font.decode(obj):
|
for cid in font.decode(obj):
|
||||||
if needcharspace:
|
if needcharspace:
|
||||||
y += charspace
|
y += charspace
|
||||||
y += self.render_char(translate_matrix(matrix, (x,y)),
|
y += self.render_char(translate_matrix(matrix, (x,y)),
|
||||||
font, fontsize, scaling, rise, cid)
|
font, fontsize, scaling, rise, cid)
|
||||||
if cid == 32 and wordspace:
|
if cid == 32 and wordspace:
|
||||||
y += wordspace
|
y += wordspace
|
||||||
|
|
|
@ -260,7 +260,7 @@ class PDFDocument(object):
|
||||||
doc = PDFDocument(parser)
|
doc = PDFDocument(parser)
|
||||||
doc.initialize(password)
|
doc.initialize(password)
|
||||||
obj = doc.getobj(objid)
|
obj = doc.getobj(objid)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
debug = 0
|
debug = 0
|
||||||
|
@ -425,7 +425,7 @@ class PDFDocument(object):
|
||||||
raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
|
raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
|
||||||
(_,obj) = self._parser.nextobject()
|
(_,obj) = self._parser.nextobject()
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
# can raise PDFObjectNotFound
|
# can raise PDFObjectNotFound
|
||||||
def getobj(self, objid):
|
def getobj(self, objid):
|
||||||
assert objid != 0
|
assert objid != 0
|
||||||
|
|
|
@ -102,7 +102,7 @@ class Type1FontHeaderParser(PSStackParser):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
return self._cid2unicode
|
return self._cid2unicode
|
||||||
|
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
if token is self.KEYWORD_PUT:
|
if token is self.KEYWORD_PUT:
|
||||||
((_,key),(_,value)) = self.pop(2)
|
((_,key),(_,value)) = self.pop(2)
|
||||||
|
@ -111,7 +111,7 @@ class Type1FontHeaderParser(PSStackParser):
|
||||||
self.add_results((key, literal_name(value)))
|
self.add_results((key, literal_name(value)))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
## CFFFont
|
## CFFFont
|
||||||
## (Format specified in Adobe Technical Note: #5176
|
## (Format specified in Adobe Technical Note: #5176
|
||||||
## "The Compact Font Format Specification")
|
## "The Compact Font Format Specification")
|
||||||
|
|
|
@ -125,7 +125,7 @@ class PDFGraphicState(object):
|
||||||
class PDFResourceManager(object):
|
class PDFResourceManager(object):
|
||||||
|
|
||||||
"""Repository of shared resources.
|
"""Repository of shared resources.
|
||||||
|
|
||||||
ResourceManager facilitates reuse of shared resources
|
ResourceManager facilitates reuse of shared resources
|
||||||
such as fonts and images so that large objects are not
|
such as fonts and images so that large objects are not
|
||||||
allocated multiple times.
|
allocated multiple times.
|
||||||
|
@ -725,7 +725,7 @@ class PDFPageInterpreter(object):
|
||||||
interpreter = self.dup()
|
interpreter = self.dup()
|
||||||
bbox = list_value(xobj['BBox'])
|
bbox = list_value(xobj['BBox'])
|
||||||
matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
|
matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
|
||||||
# According to PDF reference 1.7 section 4.9.1, XObjects in
|
# According to PDF reference 1.7 section 4.9.1, XObjects in
|
||||||
# earlier PDFs (prior to v1.2) use the page's Resources entry
|
# earlier PDFs (prior to v1.2) use the page's Resources entry
|
||||||
# instead of having their own Resources entry.
|
# instead of having their own Resources entry.
|
||||||
resources = dict_value(xobj.get('Resources')) or self.resources.copy()
|
resources = dict_value(xobj.get('Resources')) or self.resources.copy()
|
||||||
|
|
|
@ -40,7 +40,7 @@ class PDFPage(object):
|
||||||
|
|
||||||
def __init__(self, doc, pageid, attrs):
|
def __init__(self, doc, pageid, attrs):
|
||||||
"""Initialize a page object.
|
"""Initialize a page object.
|
||||||
|
|
||||||
doc: a PDFDocument object.
|
doc: a PDFDocument object.
|
||||||
pageid: any Python object that can uniquely identify the page.
|
pageid: any Python object that can uniquely identify the page.
|
||||||
attrs: a dictionary of page attributes.
|
attrs: a dictionary of page attributes.
|
||||||
|
|
|
@ -35,7 +35,7 @@ class PDFParser(PSStackParser):
|
||||||
parser.set_document(doc)
|
parser.set_document(doc)
|
||||||
parser.seek(offset)
|
parser.seek(offset)
|
||||||
parser.nextobject()
|
parser.nextobject()
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, fp):
|
def __init__(self, fp):
|
||||||
|
@ -57,10 +57,10 @@ class PDFParser(PSStackParser):
|
||||||
KEYWORD_STARTXREF = KWD('startxref')
|
KEYWORD_STARTXREF = KWD('startxref')
|
||||||
def do_keyword(self, pos, token):
|
def do_keyword(self, pos, token):
|
||||||
"""Handles PDF-related keywords."""
|
"""Handles PDF-related keywords."""
|
||||||
|
|
||||||
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
|
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
|
||||||
self.add_results(*self.pop(1))
|
self.add_results(*self.pop(1))
|
||||||
|
|
||||||
elif token is self.KEYWORD_ENDOBJ:
|
elif token is self.KEYWORD_ENDOBJ:
|
||||||
self.add_results(*self.pop(4))
|
self.add_results(*self.pop(4))
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ class PDFParser(PSStackParser):
|
||||||
else:
|
else:
|
||||||
# others
|
# others
|
||||||
self.push((pos, token))
|
self.push((pos, token))
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ def resolve1(x, default=None):
|
||||||
|
|
||||||
def resolve_all(x, default=None):
|
def resolve_all(x, default=None):
|
||||||
"""Recursively resolves the given object and all the internals.
|
"""Recursively resolves the given object and all the internals.
|
||||||
|
|
||||||
Make sure there is no indirect reference within the nested object.
|
Make sure there is no indirect reference within the nested object.
|
||||||
This procedure might be slow.
|
This procedure might be slow.
|
||||||
"""
|
"""
|
||||||
|
@ -180,13 +180,13 @@ class PDFStream(PDFObject):
|
||||||
|
|
||||||
def __contains__(self, name):
|
def __contains__(self, name):
|
||||||
return name in self.attrs
|
return name in self.attrs
|
||||||
|
|
||||||
def __getitem__(self, name):
|
def __getitem__(self, name):
|
||||||
return self.attrs[name]
|
return self.attrs[name]
|
||||||
|
|
||||||
def get(self, name, default=None):
|
def get(self, name, default=None):
|
||||||
return self.attrs.get(name, default)
|
return self.attrs.get(name, default)
|
||||||
|
|
||||||
def get_any(self, names, default=None):
|
def get_any(self, names, default=None):
|
||||||
for name in names:
|
for name in names:
|
||||||
if name in self.attrs:
|
if name in self.attrs:
|
||||||
|
|
|
@ -32,7 +32,7 @@ class PSObject(object):
|
||||||
class PSLiteral(PSObject):
|
class PSLiteral(PSObject):
|
||||||
|
|
||||||
"""A class that represents a PostScript literal.
|
"""A class that represents a PostScript literal.
|
||||||
|
|
||||||
Postscript literals are used as identifiers, such as
|
Postscript literals are used as identifiers, such as
|
||||||
variable names, property names and dictionary keys.
|
variable names, property names and dictionary keys.
|
||||||
Literals are case sensitive and denoted by a preceding
|
Literals are case sensitive and denoted by a preceding
|
||||||
|
@ -55,11 +55,11 @@ class PSLiteral(PSObject):
|
||||||
class PSKeyword(PSObject):
|
class PSKeyword(PSObject):
|
||||||
|
|
||||||
"""A class that represents a PostScript keyword.
|
"""A class that represents a PostScript keyword.
|
||||||
|
|
||||||
PostScript keywords are a dozen of predefined words.
|
PostScript keywords are a dozen of predefined words.
|
||||||
Commands and directives in PostScript are expressed by keywords.
|
Commands and directives in PostScript are expressed by keywords.
|
||||||
They are also used to denote the content boundaries.
|
They are also used to denote the content boundaries.
|
||||||
|
|
||||||
Note: Do not create an instance of PSKeyword directly.
|
Note: Do not create an instance of PSKeyword directly.
|
||||||
Always use PSKeywordTable.intern().
|
Always use PSKeywordTable.intern().
|
||||||
"""
|
"""
|
||||||
|
@ -80,7 +80,7 @@ class PSSymbolTable(object):
|
||||||
|
|
||||||
Interned objects can be checked its identity with "is" operator.
|
Interned objects can be checked its identity with "is" operator.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, klass):
|
def __init__(self, klass):
|
||||||
self.dict = {}
|
self.dict = {}
|
||||||
self.klass = klass
|
self.klass = klass
|
||||||
|
@ -357,7 +357,7 @@ class PSBaseParser(object):
|
||||||
pass
|
pass
|
||||||
self._parse1 = self._parse_main
|
self._parse1 = self._parse_main
|
||||||
return j
|
return j
|
||||||
|
|
||||||
def _parse_float(self, s, i):
|
def _parse_float(self, s, i):
|
||||||
m = END_NUMBER.search(s, i)
|
m = END_NUMBER.search(s, i)
|
||||||
if not m:
|
if not m:
|
||||||
|
@ -493,17 +493,17 @@ class PSStackParser(PSBaseParser):
|
||||||
def push(self, *objs):
|
def push(self, *objs):
|
||||||
self.curstack.extend(objs)
|
self.curstack.extend(objs)
|
||||||
return
|
return
|
||||||
|
|
||||||
def pop(self, n):
|
def pop(self, n):
|
||||||
objs = self.curstack[-n:]
|
objs = self.curstack[-n:]
|
||||||
self.curstack[-n:] = []
|
self.curstack[-n:] = []
|
||||||
return objs
|
return objs
|
||||||
|
|
||||||
def popall(self):
|
def popall(self):
|
||||||
objs = self.curstack
|
objs = self.curstack
|
||||||
self.curstack = []
|
self.curstack = []
|
||||||
return objs
|
return objs
|
||||||
|
|
||||||
def add_results(self, *objs):
|
def add_results(self, *objs):
|
||||||
if 2 <= self.debug:
|
if 2 <= self.debug:
|
||||||
print >>sys.stderr, 'add_results: %r' % (objs,)
|
print >>sys.stderr, 'add_results: %r' % (objs,)
|
||||||
|
@ -516,7 +516,7 @@ class PSStackParser(PSBaseParser):
|
||||||
if 2 <= self.debug:
|
if 2 <= self.debug:
|
||||||
print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type)
|
print >>sys.stderr, 'start_type: pos=%r, type=%r' % (pos, type)
|
||||||
return
|
return
|
||||||
|
|
||||||
def end_type(self, type):
|
def end_type(self, type):
|
||||||
if self.curtype != type:
|
if self.curtype != type:
|
||||||
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
|
raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
|
||||||
|
|
|
@ -1060,7 +1060,7 @@ class RijndaelEncryptor(object):
|
||||||
>>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex')
|
>>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex')
|
||||||
'd8f532538289ef7d06b506a4fd5be9c9'
|
'd8f532538289ef7d06b506a4fd5be9c9'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, key, keybits=256):
|
def __init__(self, key, keybits=256):
|
||||||
assert len(key) == KEYLENGTH(keybits)
|
assert len(key) == KEYLENGTH(keybits)
|
||||||
(self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits)
|
(self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits)
|
||||||
|
|
|
@ -255,7 +255,7 @@ class Plane(object):
|
||||||
for obj in objs:
|
for obj in objs:
|
||||||
self.add(obj)
|
self.add(obj)
|
||||||
return
|
return
|
||||||
|
|
||||||
# add(obj): place an object.
|
# add(obj): place an object.
|
||||||
def add(self, obj):
|
def add(self, obj):
|
||||||
for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
|
for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
|
||||||
|
|
4
setup.py
4
setup.py
|
@ -7,9 +7,9 @@ setup(
|
||||||
version=__version__,
|
version=__version__,
|
||||||
description='PDF parser and analyzer',
|
description='PDF parser and analyzer',
|
||||||
long_description='''PDFMiner is a tool for extracting information from PDF documents.
|
long_description='''PDFMiner is a tool for extracting information from PDF documents.
|
||||||
Unlike other PDF-related tools, it focuses entirely on getting
|
Unlike other PDF-related tools, it focuses entirely on getting
|
||||||
and analyzing text data. PDFMiner allows to obtain
|
and analyzing text data. PDFMiner allows to obtain
|
||||||
the exact location of texts in a page, as well as
|
the exact location of texts in a page, as well as
|
||||||
other information such as fonts or lines.
|
other information such as fonts or lines.
|
||||||
It includes a PDF converter that can transform PDF files
|
It includes a PDF converter that can transform PDF files
|
||||||
into other text formats (such as HTML). It has an extensible
|
into other text formats (such as HTML). It has an extensible
|
||||||
|
|
|
@ -50,7 +50,7 @@ class CMapConverter(object):
|
||||||
assert values[0] == 'CID'
|
assert values[0] == 'CID'
|
||||||
encs = values
|
encs = values
|
||||||
continue
|
continue
|
||||||
|
|
||||||
def put(dmap, code, cid, force=False):
|
def put(dmap, code, cid, force=False):
|
||||||
for b in code[:-1]:
|
for b in code[:-1]:
|
||||||
b = ord(b)
|
b = ord(b)
|
||||||
|
@ -64,7 +64,7 @@ class CMapConverter(object):
|
||||||
if force or ((b not in dmap) or dmap[b] == cid):
|
if force or ((b not in dmap) or dmap[b] == cid):
|
||||||
dmap[b] = cid
|
dmap[b] = cid
|
||||||
return
|
return
|
||||||
|
|
||||||
def add(unimap, enc, code):
|
def add(unimap, enc, code):
|
||||||
try:
|
try:
|
||||||
codec = self.enc2codec[enc]
|
codec = self.enc2codec[enc]
|
||||||
|
@ -78,20 +78,20 @@ class CMapConverter(object):
|
||||||
except UnicodeError:
|
except UnicodeError:
|
||||||
pass
|
pass
|
||||||
return
|
return
|
||||||
|
|
||||||
def pick(unimap):
|
def pick(unimap):
|
||||||
chars = unimap.items()
|
chars = unimap.items()
|
||||||
chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True)
|
chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True)
|
||||||
(c,_) = chars[0]
|
(c,_) = chars[0]
|
||||||
return c
|
return c
|
||||||
|
|
||||||
cid = int(values[0])
|
cid = int(values[0])
|
||||||
unimap_h = {}
|
unimap_h = {}
|
||||||
unimap_v = {}
|
unimap_v = {}
|
||||||
for (enc,value) in zip(encs, values):
|
for (enc,value) in zip(encs, values):
|
||||||
if enc == 'CID': continue
|
if enc == 'CID': continue
|
||||||
if value == '*': continue
|
if value == '*': continue
|
||||||
|
|
||||||
# hcodes, vcodes: encoded bytes for each writing mode.
|
# hcodes, vcodes: encoded bytes for each writing mode.
|
||||||
hcodes = []
|
hcodes = []
|
||||||
vcodes = []
|
vcodes = []
|
||||||
|
@ -121,7 +121,7 @@ class CMapConverter(object):
|
||||||
for code in hcodes:
|
for code in hcodes:
|
||||||
put(hmap, code, cid)
|
put(hmap, code, cid)
|
||||||
put(vmap, code, cid)
|
put(vmap, code, cid)
|
||||||
|
|
||||||
# Determine the "most popular" candidate.
|
# Determine the "most popular" candidate.
|
||||||
if unimap_h:
|
if unimap_h:
|
||||||
self.cid2unichr_h[cid] = pick(unimap_h)
|
self.cid2unichr_h[cid] = pick(unimap_h)
|
||||||
|
@ -137,7 +137,7 @@ class CMapConverter(object):
|
||||||
)
|
)
|
||||||
fp.write(pickle.dumps(data))
|
fp.write(pickle.dumps(data))
|
||||||
return
|
return
|
||||||
|
|
||||||
def dump_unicodemap(self, fp):
|
def dump_unicodemap(self, fp):
|
||||||
data = dict(
|
data = dict(
|
||||||
CID2UNICHR_H=self.cid2unichr_h,
|
CID2UNICHR_H=self.cid2unichr_h,
|
||||||
|
@ -151,7 +151,7 @@ def main(argv):
|
||||||
import getopt
|
import getopt
|
||||||
import gzip
|
import gzip
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
print 'usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]' % argv[0]
|
print 'usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]' % argv[0]
|
||||||
return 100
|
return 100
|
||||||
|
|
|
@ -25,7 +25,7 @@ def dumpxml(out, obj, codec=None):
|
||||||
if obj is None:
|
if obj is None:
|
||||||
out.write('<null />')
|
out.write('<null />')
|
||||||
return
|
return
|
||||||
|
|
||||||
if isinstance(obj, dict):
|
if isinstance(obj, dict):
|
||||||
out.write('<dict size="%d">\n' % len(obj))
|
out.write('<dict size="%d">\n' % len(obj))
|
||||||
for (k,v) in obj.iteritems():
|
for (k,v) in obj.iteritems():
|
||||||
|
@ -179,7 +179,7 @@ def extractembedded(outfp, fname, objids, pagenos, password='',
|
||||||
out.write(fileobj.get_data())
|
out.write(fileobj.get_data())
|
||||||
out.close()
|
out.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
fp = file(fname, 'rb')
|
fp = file(fname, 'rb')
|
||||||
parser = PDFParser(fp)
|
parser = PDFParser(fp)
|
||||||
doc = PDFDocument(parser)
|
doc = PDFDocument(parser)
|
||||||
|
|
|
@ -14,7 +14,7 @@ This is an in-house mapping table for some Latin-1 characters
|
||||||
LATIN2ASCII = {
|
LATIN2ASCII = {
|
||||||
#0x00a0: '',
|
#0x00a0: '',
|
||||||
#0x00a7: '',
|
#0x00a7: '',
|
||||||
|
|
||||||
# iso-8859-1
|
# iso-8859-1
|
||||||
0x00c0: 'A`',
|
0x00c0: 'A`',
|
||||||
0x00c1: "A'",
|
0x00c1: "A'",
|
||||||
|
|
|
@ -159,7 +159,7 @@ class WebApp(object):
|
||||||
|
|
||||||
def convert(self):
|
def convert(self):
|
||||||
self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
|
self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
|
||||||
if (self.method != 'POST' or
|
if (self.method != 'POST' or
|
||||||
'c' not in self.form or
|
'c' not in self.form or
|
||||||
'f' not in self.form):
|
'f' not in self.form):
|
||||||
self.response_200()
|
self.response_200()
|
||||||
|
|
Loading…
Reference in New Issue