fix aggressive vertical writing detection (which ruins layout)

pull/1/head
Yusuke Shinyama 2011-02-02 23:09:34 +09:00
parent 109aedeb43
commit cbd58121e3
2 changed files with 7 additions and 5 deletions

View File

@ -23,19 +23,19 @@ def csort(objs, key):
class LAParams(object): class LAParams(object):
def __init__(self, def __init__(self,
writing_mode='lr-tb',
line_overlap=0.5, line_overlap=0.5,
char_margin=2.0, char_margin=2.0,
line_margin=0.5, line_margin=0.5,
word_margin=0.1, word_margin=0.1,
boxes_flow=0, boxes_flow=0,
detect_vertical=False,
all_texts=False): all_texts=False):
self.writing_mode = writing_mode
self.line_overlap = line_overlap self.line_overlap = line_overlap
self.char_margin = char_margin self.char_margin = char_margin
self.line_margin = line_margin self.line_margin = line_margin
self.word_margin = word_margin self.word_margin = word_margin
self.boxes_flow = boxes_flow self.boxes_flow = boxes_flow
self.detect_vertical = detect_vertical
self.all_texts = all_texts self.all_texts = all_texts
return return
@ -480,7 +480,8 @@ class LTLayoutContainer(LTContainer):
# |<--->| # |<--->|
# (char_margin) # (char_margin)
k |= 1 k |= 1
if (obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and if (laparams.detect_vertical and
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin): obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
# obj0 and obj1 is vertically aligned: # obj0 and obj1 is vertically aligned:

View File

@ -12,11 +12,11 @@ def main(argv):
import getopt import getopt
def usage(): def usage():
print ('usage: %s [-d] [-p pagenos] [-m maxpages] [-P password] [-o output] ' print ('usage: %s [-d] [-p pagenos] [-m maxpages] [-P password] [-o output] '
'[-n] [-A] [-M char_margin] [-L line_margin] [-W word_margin] [-F boxes_flow] ' '[-n] [-A] [-V] [-M char_margin] [-L line_margin] [-W word_margin] [-F boxes_flow] '
'[-Y layout_mode] [-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0]) '[-Y layout_mode] [-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0])
return 100 return 100
try: try:
(opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAM:L:W:F:Y:O:t:c:s:') (opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAVM:L:W:F:Y:O:t:c:s:')
except getopt.GetoptError: except getopt.GetoptError:
return usage() return usage()
if not args: return usage() if not args: return usage()
@ -44,6 +44,7 @@ def main(argv):
elif k == '-o': outfile = v elif k == '-o': outfile = v
elif k == '-n': laparams = None elif k == '-n': laparams = None
elif k == '-A': laparams.all_texts = True elif k == '-A': laparams.all_texts = True
elif k == '-V': laparams.detect_vertical = True
elif k == '-M': laparams.char_margin = float(v) elif k == '-M': laparams.char_margin = float(v)
elif k == '-L': laparams.line_margin = float(v) elif k == '-L': laparams.line_margin = float(v)
elif k == '-W': laparams.word_margin = float(v) elif k == '-W': laparams.word_margin = float(v)