From 78f06225b6e42aa5878e51521c8f2a7b97efa7ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Haso=C5=88?= Date: Mon, 9 Dec 2019 22:04:05 +0100 Subject: [PATCH] Removed duplicated and therefore unused code from pdf2txt.py (#341) --- tools/pdf2txt.py | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index d370a13..fc50593 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -10,6 +10,11 @@ from pdfminer.image import ImageWriter logging.basicConfig() +OUTPUT_TYPES = ((".htm", "html"), + (".html", "html"), + (".xml", "xml"), + (".tag", "tag")) + def extract_text(files=[], outfile='-', no_laparams=False, all_texts=None, detect_vertical=None, # LAParams @@ -39,15 +44,8 @@ def extract_text(files=[], outfile='-', else: laparams = None - imagewriter = None - if output_dir: - imagewriter = ImageWriter(output_dir) - if output_type == "text" and outfile != "-": - for override, alttype in ( (".htm", "html"), - (".html", "html"), - (".xml", "xml"), - (".tag", "tag") ): + for override, alttype in OUTPUT_TYPES: if outfile.endswith(override): output_type = alttype @@ -58,7 +56,6 @@ def extract_text(files=[], outfile='-', else: outfp = open(outfile, "wb") - for fname in files: with open(fname, "rb") as fp: pdfminer.high_level.extract_text_to_fp(fp, **locals()) @@ -145,33 +142,19 @@ def main(args=None): if A.pagenos: A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")]) - imagewriter = None - if A.output_dir: - imagewriter = ImageWriter(A.output_dir) - if six.PY2 and sys.stdin.encoding: A.password = A.password.decode(sys.stdin.encoding) if A.output_type == "text" and A.outfile != "-": - for override, alttype in ( (".htm", "html"), - (".html", "html"), - (".xml", "xml" ), - (".tag", "tag" ) ): + for override, alttype in OUTPUT_TYPES: if A.outfile.endswith(override): A.output_type = alttype - if A.outfile == "-": - outfp = sys.stdout - if outfp.encoding is not None: - # Why ignore outfp.encoding? :-/ stupid cathal? - A.codec = 'utf-8' - else: - outfp = open(A.outfile, "wb") - ## Test Code outfp = extract_text(**vars(A)) outfp.close() return 0 -if __name__ == '__main__': sys.exit(main()) +if __name__ == '__main__': + sys.exit(main())