fix aggressive vertical writing detection (which ruins layout)
parent
109aedeb43
commit
cbd58121e3
|
@ -23,19 +23,19 @@ def csort(objs, key):
|
||||||
class LAParams(object):
|
class LAParams(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
writing_mode='lr-tb',
|
|
||||||
line_overlap=0.5,
|
line_overlap=0.5,
|
||||||
char_margin=2.0,
|
char_margin=2.0,
|
||||||
line_margin=0.5,
|
line_margin=0.5,
|
||||||
word_margin=0.1,
|
word_margin=0.1,
|
||||||
boxes_flow=0,
|
boxes_flow=0,
|
||||||
|
detect_vertical=False,
|
||||||
all_texts=False):
|
all_texts=False):
|
||||||
self.writing_mode = writing_mode
|
|
||||||
self.line_overlap = line_overlap
|
self.line_overlap = line_overlap
|
||||||
self.char_margin = char_margin
|
self.char_margin = char_margin
|
||||||
self.line_margin = line_margin
|
self.line_margin = line_margin
|
||||||
self.word_margin = word_margin
|
self.word_margin = word_margin
|
||||||
self.boxes_flow = boxes_flow
|
self.boxes_flow = boxes_flow
|
||||||
|
self.detect_vertical = detect_vertical
|
||||||
self.all_texts = all_texts
|
self.all_texts = all_texts
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -480,7 +480,8 @@ class LTLayoutContainer(LTContainer):
|
||||||
# |<--->|
|
# |<--->|
|
||||||
# (char_margin)
|
# (char_margin)
|
||||||
k |= 1
|
k |= 1
|
||||||
if (obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
|
if (laparams.detect_vertical and
|
||||||
|
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
|
||||||
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
|
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
|
||||||
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
|
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
|
||||||
# obj0 and obj1 is vertically aligned:
|
# obj0 and obj1 is vertically aligned:
|
||||||
|
|
|
@ -12,11 +12,11 @@ def main(argv):
|
||||||
import getopt
|
import getopt
|
||||||
def usage():
|
def usage():
|
||||||
print ('usage: %s [-d] [-p pagenos] [-m maxpages] [-P password] [-o output] '
|
print ('usage: %s [-d] [-p pagenos] [-m maxpages] [-P password] [-o output] '
|
||||||
'[-n] [-A] [-M char_margin] [-L line_margin] [-W word_margin] [-F boxes_flow] '
|
'[-n] [-A] [-V] [-M char_margin] [-L line_margin] [-W word_margin] [-F boxes_flow] '
|
||||||
'[-Y layout_mode] [-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0])
|
'[-Y layout_mode] [-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0])
|
||||||
return 100
|
return 100
|
||||||
try:
|
try:
|
||||||
(opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAM:L:W:F:Y:O:t:c:s:')
|
(opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAVM:L:W:F:Y:O:t:c:s:')
|
||||||
except getopt.GetoptError:
|
except getopt.GetoptError:
|
||||||
return usage()
|
return usage()
|
||||||
if not args: return usage()
|
if not args: return usage()
|
||||||
|
@ -44,6 +44,7 @@ def main(argv):
|
||||||
elif k == '-o': outfile = v
|
elif k == '-o': outfile = v
|
||||||
elif k == '-n': laparams = None
|
elif k == '-n': laparams = None
|
||||||
elif k == '-A': laparams.all_texts = True
|
elif k == '-A': laparams.all_texts = True
|
||||||
|
elif k == '-V': laparams.detect_vertical = True
|
||||||
elif k == '-M': laparams.char_margin = float(v)
|
elif k == '-M': laparams.char_margin = float(v)
|
||||||
elif k == '-L': laparams.line_margin = float(v)
|
elif k == '-L': laparams.line_margin = float(v)
|
||||||
elif k == '-W': laparams.word_margin = float(v)
|
elif k == '-W': laparams.word_margin = float(v)
|
||||||
|
|
Loading…
Reference in New Issue