boxes_flow patch by Daniel Gerber

pull/1/head
Yusuke Shinyama 2010-12-26 17:26:39 +09:00
parent 3da3adad9b
commit a24c452ba2
2 changed files with 12 additions and 5 deletions

View File

@ -28,12 +28,14 @@ class LAParams(object):
char_margin=2.0,
line_margin=0.5,
word_margin=0.1,
boxes_flow=0,
all_texts=False):
self.writing_mode = writing_mode
self.line_overlap = line_overlap
self.char_margin = char_margin
self.line_margin = line_margin
self.word_margin = word_margin
self.boxes_flow = boxes_flow
self.all_texts = all_texts
return
@ -399,14 +401,18 @@ class LTTextGroupLRTB(LTTextGroup):
def analyze(self, laparams):
# reorder the objects from top-left to bottom-right.
self._objs = csort(self._objs, key=lambda obj: obj.x0+obj.x1-(obj.y0+obj.y1))
self._objs = csort(self._objs, key=lambda obj:
(1-laparams.boxes_flow)*(obj.x0+obj.x1) -
(1+laparams.boxes_flow)*(obj.y0+obj.y1))
return LTTextGroup.analyze(self, laparams)
class LTTextGroupTBRL(LTTextGroup):
def analyze(self, laparams):
# reorder the objects from top-right to bottom-left.
self._objs = csort(self._objs, key=lambda obj: -(obj.x0+obj.x1)-(obj.y0+obj.y1))
self._objs = csort(self._objs, key=lambda obj:
-(1+laparams.boxes_flow)*(obj.x0+obj.x1)
-(1-laparams.boxes_flow)*(obj.y0+obj.y1))
return LTTextGroup.analyze(self, laparams)

View File

@ -12,11 +12,11 @@ def main(argv):
import getopt
def usage():
print ('usage: %s [-d] [-p pagenos] [-m maxpages] [-P password] [-o output] '
'[-n] [-A] [-M char_margin] [-L line_margin] [-W word_margin] [-Y layout_mode] '
'[-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0])
'[-n] [-A] [-M char_margin] [-L line_margin] [-W word_margin] [-F boxes_flow]'
'[-Y layout_mode] [-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0])
return 100
try:
(opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAM:L:W:Y:O:t:c:s:')
(opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAM:L:W:F:Y:O:t:c:s:')
except getopt.GetoptError:
return usage()
if not args: return usage()
@ -47,6 +47,7 @@ def main(argv):
elif k == '-M': laparams.char_margin = float(v)
elif k == '-L': laparams.line_margin = float(v)
elif k == '-W': laparams.word_margin = float(v)
elif k == '-F': laparams.boxes_flow = float(v)
elif k == '-Y': layoutmode = v
elif k == '-O': outdir = v
elif k == '-t': outtype = v