Removed duplicated and therefore unused code from pdf2txt.py (#341)

2019-12-09 22:04:05 +01:00 · 2019-12-09 22:04:05 +01:00 · 78f06225b6
parent 452f0b4ad0
commit 78f06225b6
1 changed files with 9 additions and 26 deletions
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@ -10,6 +10,11 @@ from pdfminer.image import ImageWriter
 logging.basicConfig()
 OUTPUT_TYPES = ((".htm", "html"),
                (".html", "html"),
                (".xml", "xml"),
                (".tag", "tag"))
 def extract_text(files=[], outfile='-',
            no_laparams=False, all_texts=None, detect_vertical=None, # LAParams
@ -39,15 +44,8 @@ def extract_text(files=[], outfile='-',
    else:
        laparams = None
    imagewriter = None
    if output_dir:
        imagewriter = ImageWriter(output_dir)
    if output_type == "text" and outfile != "-":
-        for override, alttype in (  (".htm", "html"),
+        for override, alttype in OUTPUT_TYPES:
                                    (".html", "html"),
                                    (".xml", "xml"),
                                    (".tag", "tag") ):
            if outfile.endswith(override):
                output_type = alttype
@ -58,7 +56,6 @@ def extract_text(files=[], outfile='-',
    else:
        outfp = open(outfile, "wb")
    for fname in files:
        with open(fname, "rb") as fp:
            pdfminer.high_level.extract_text_to_fp(fp, **locals())
@ -145,33 +142,19 @@ def main(args=None):
    if A.pagenos:
        A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")])
    imagewriter = None
    if A.output_dir:
        imagewriter = ImageWriter(A.output_dir)
    if six.PY2 and sys.stdin.encoding:
        A.password = A.password.decode(sys.stdin.encoding)
    if A.output_type == "text" and A.outfile != "-":
-        for override, alttype in (  (".htm",  "html"),
+        for override, alttype in OUTPUT_TYPES:
                                    (".html", "html"),
                                    (".xml",  "xml" ),
                                    (".tag",  "tag" ) ):
            if A.outfile.endswith(override):
                A.output_type = alttype
    if A.outfile == "-":
        outfp = sys.stdout
        if outfp.encoding is not None:
            # Why ignore outfp.encoding? :-/ stupid cathal?
            A.codec = 'utf-8'
    else:
        outfp = open(A.outfile, "wb")
    ## Test Code
    outfp = extract_text(**vars(A))
    outfp.close()
    return 0
-if __name__ == '__main__': sys.exit(main())
+if __name__ == '__main__':
    sys.exit(main())