boxes_flow patch by Daniel Gerber
parent
3da3adad9b
commit
a24c452ba2
|
@ -28,12 +28,14 @@ class LAParams(object):
|
|||
char_margin=2.0,
|
||||
line_margin=0.5,
|
||||
word_margin=0.1,
|
||||
boxes_flow=0,
|
||||
all_texts=False):
|
||||
self.writing_mode = writing_mode
|
||||
self.line_overlap = line_overlap
|
||||
self.char_margin = char_margin
|
||||
self.line_margin = line_margin
|
||||
self.word_margin = word_margin
|
||||
self.boxes_flow = boxes_flow
|
||||
self.all_texts = all_texts
|
||||
return
|
||||
|
||||
|
@ -399,14 +401,18 @@ class LTTextGroupLRTB(LTTextGroup):
|
|||
|
||||
def analyze(self, laparams):
|
||||
# reorder the objects from top-left to bottom-right.
|
||||
self._objs = csort(self._objs, key=lambda obj: obj.x0+obj.x1-(obj.y0+obj.y1))
|
||||
self._objs = csort(self._objs, key=lambda obj:
|
||||
(1-laparams.boxes_flow)*(obj.x0+obj.x1) -
|
||||
(1+laparams.boxes_flow)*(obj.y0+obj.y1))
|
||||
return LTTextGroup.analyze(self, laparams)
|
||||
|
||||
class LTTextGroupTBRL(LTTextGroup):
|
||||
|
||||
def analyze(self, laparams):
|
||||
# reorder the objects from top-right to bottom-left.
|
||||
self._objs = csort(self._objs, key=lambda obj: -(obj.x0+obj.x1)-(obj.y0+obj.y1))
|
||||
self._objs = csort(self._objs, key=lambda obj:
|
||||
-(1+laparams.boxes_flow)*(obj.x0+obj.x1)
|
||||
-(1-laparams.boxes_flow)*(obj.y0+obj.y1))
|
||||
return LTTextGroup.analyze(self, laparams)
|
||||
|
||||
|
||||
|
|
|
@ -12,11 +12,11 @@ def main(argv):
|
|||
import getopt
|
||||
def usage():
|
||||
print ('usage: %s [-d] [-p pagenos] [-m maxpages] [-P password] [-o output] '
|
||||
'[-n] [-A] [-M char_margin] [-L line_margin] [-W word_margin] [-Y layout_mode] '
|
||||
'[-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0])
|
||||
'[-n] [-A] [-M char_margin] [-L line_margin] [-W word_margin] [-F boxes_flow]'
|
||||
'[-Y layout_mode] [-O output_dir] [-t text|html|xml|tag] [-c codec] [-s scale] file ...' % argv[0])
|
||||
return 100
|
||||
try:
|
||||
(opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAM:L:W:Y:O:t:c:s:')
|
||||
(opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:nAM:L:W:F:Y:O:t:c:s:')
|
||||
except getopt.GetoptError:
|
||||
return usage()
|
||||
if not args: return usage()
|
||||
|
@ -47,6 +47,7 @@ def main(argv):
|
|||
elif k == '-M': laparams.char_margin = float(v)
|
||||
elif k == '-L': laparams.line_margin = float(v)
|
||||
elif k == '-W': laparams.word_margin = float(v)
|
||||
elif k == '-F': laparams.boxes_flow = float(v)
|
||||
elif k == '-Y': layoutmode = v
|
||||
elif k == '-O': outdir = v
|
||||
elif k == '-t': outtype = v
|
||||
|
|
Loading…
Reference in New Issue