Fix not being able to pass boxes flow as None to pdf2txt (#479)

* Fix not being able to pass boxes flow as None to pdf2txt

* Changes from code review

* Update CHANGELOG.md

Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>
pull/522/head
Jake Stockwin 2020-10-10 14:17:04 +01:00 committed by GitHub
parent f03657e5c4
commit ef4787d8ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 4 deletions

View File

@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [Unreleased] ## [Unreleased]
### Added ### Added
- Option to disable boxes flow layout analysis when using pdf2txt ([#479](https://github.com/pdfminer/pdfminer.six/pull/479))
- Support for `pathlib.PurePath` in `open_filename` ([#491](https://github.com/pdfminer/pdfminer.six/issues/491)) - Support for `pathlib.PurePath` in `open_filename` ([#491](https://github.com/pdfminer/pdfminer.six/issues/491))
### Fixed ### Fixed

View File

@ -16,6 +16,15 @@ OUTPUT_TYPES = ((".htm", "html"),
(".tag", "tag")) (".tag", "tag"))
def float_or_disabled(x):
if x.lower().strip() == "disabled":
return x
try:
x = float(x)
except ValueError:
raise argparse.ArgumentTypeError("invalid float value: {}".format(x))
def extract_text(files=[], outfile='-', def extract_text(files=[], outfile='-',
no_laparams=False, all_texts=None, detect_vertical=None, no_laparams=False, all_texts=None, detect_vertical=None,
word_margin=None, char_margin=None, line_margin=None, word_margin=None, char_margin=None, line_margin=None,
@ -120,14 +129,14 @@ def maketheparser():
"be part of the same paragraph. The margin is specified " "be part of the same paragraph. The margin is specified "
"relative to the height of a line.") "relative to the height of a line.")
la_params.add_argument( la_params.add_argument(
"--boxes-flow", "-F", type=float, default=0.5, "--boxes-flow", "-F", type=float_or_disabled, default=0.5,
help="Specifies how much a horizontal and vertical position of a " help="Specifies how much a horizontal and vertical position of a "
"text matters when determining the order of lines. The value " "text matters when determining the order of lines. The value "
"should be within the range of -1.0 (only horizontal position " "should be within the range of -1.0 (only horizontal position "
"matters) to +1.0 (only vertical position matters). You can also " "matters) to +1.0 (only vertical position matters). You can also "
"pass `None` to disable advanced layout analysis, and instead " "pass `disabled` to disable advanced layout analysis, and "
"return text based on the position of the bottom left corner of " "instead return text based on the position of the bottom left "
"the text box.") "corner of the text box.")
la_params.add_argument( la_params.add_argument(
"--all-texts", "-A", default=False, action="store_true", "--all-texts", "-A", default=False, action="store_true",
help="If layout analysis should be performed on text in figures.") help="If layout analysis should be performed on text in figures.")