From 6cbee25b3ed83aedd0581c2ca54b775c6cff22b0 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Sat, 25 Jun 2022 23:11:10 +0200 Subject: [PATCH] Deprecate usage of `if __name__ == "__main__"` in scripts that are not documented. Also deprecate usage of scripts that are only there for testing purposes. (#756) * Deprecate usage of `if __name__ == "__main__"` in scripts that are not document. Also deprecate usage of scripts that are only there for testing purposes. * Add CHANGELOG.md * Cleanup CHANGELOG.md * Cleanup CHANGELOG.md * Undo deleting conf_glyphlist.py and conf_afm.py and add a deprecation warning instead --- CHANGELOG.md | 22 ++++++++------- pdfminer/cmapdb.py | 9 +++++++ pdfminer/fontmetrics.py | 42 +++++++++++++++++++++++++++++ pdfminer/glyphlist.py | 26 ++++++++++++++++++ pdfminer/pdffont.py | 13 +++++++-- tests/test_highlevel_extracttext.py | 4 --- tools/conv_afm.py | 8 ++++++ tools/conv_cmap.py | 5 ++-- tools/conv_glyphlist.py | 7 +++++ tools/pdfdiff.py | 8 ++++++ tools/pdfstats.py | 19 ++++++++----- tools/prof.py | 9 +++++++ 12 files changed, 149 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e23fcf0..3b0c3ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Sphinx errors during building of documentation ([#760](https://github.com/pdfminer/pdfminer.six/pull/760)) +### Deprecated + +- Usage of `if __name__ == "__main__"` where it was only intended for testing purposes ([#756](https://github.com/pdfminer/pdfminer.six/pull/756)) + ## [20220524] ### Fixed @@ -86,7 +90,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Using `io.TextIOBase` as the file to write to ([#616](https://github.com/pdfminer/pdfminer.six/pull/616)) - Parsing \r\n after the escape character in a literal string ([#616](https://github.com/pdfminer/pdfminer.six/pull/616)) -## Removed +### Removed - Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522)) - Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525)) - Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523)) @@ -152,12 +156,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - Group text lines if they are centered ([#384](https://github.com/pdfminer/pdfminer.six/pull/384)) -## [20200124] - 2020-01-24 +## [20200124] ### Security - Removed samples/issue-00152-embedded-pdf.pdf because it contains a possible security thread; a javascript enabled object ([#364](https://github.com/pdfminer/pdfminer.six/pull/364)) -## [20200121] - 2020-01-21 +## [20200121] ### Fixed - Interpret two's complement integer as unsigned integer ([#352](https://github.com/pdfminer/pdfminer.six/pull/352)) @@ -168,20 +172,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Removed - The command-line utility latin2ascii.py ([#360](https://github.com/pdfminer/pdfminer.six/pull/360)) -## [20200104] - 2019-01-04 +## [20200104] -## Removed +### Removed - Support for Python 2 ([#346](https://github.com/pdfminer/pdfminer.six/pull/346)) ### Changed - Enforce pep8 coding style by adding flake8 to CI ([#345](https://github.com/pdfminer/pdfminer.six/pull/345)) -## [20191110] - 2019-11-10 +## [20191110] ### Fixed - Wrong order of text box grouping introduced by PR #315 ([#335](https://github.com/pdfminer/pdfminer.six/pull/335)) -## [20191107] - 2019-11-07 +## [20191107] ### Deprecated - The argument `_py2_no_more_posargs` because Python2 is removed on January @@ -208,7 +212,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Removed - Files for external applications such as django, cgi and pyinstaller ([#320](https://github.com/pdfminer/pdfminer.six/pull/320)) -## [20191020] - 2019-10-20 +## [20191020] ### Deprecated - Support for Python 2 is dropped at January 1st, 2020 ([#307](https://github.com/pdfminer/pdfminer.six/pull/307)) @@ -230,7 +234,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - All dependencies are managed in `setup.py` ([#306](https://github.com/pdfminer/pdfminer.six/pull/306) and [#219](https://github.com/pdfminer/pdfminer.six/pull/219)) -## [20181108] - 2018-11-08 +## [20181108] ### Changed - Speedup layout analysis ([#141](https://github.com/pdfminer/pdfminer.six/pull/141)) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 704a9d3..01306ed 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -477,6 +477,15 @@ class CMapParser(PSStackParser[PSKeyword]): def main(argv: List[str]) -> None: + from warnings import warn + + warn( + "The function main() from cmapdb.py will be removed in 2023. It was probably " + "introduced for testing purposes a long time ago, and no longer relevant. " + "Feel free to create a GitHub issue if you disagree.", + DeprecationWarning, + ) + args = argv[1:] for fname in args: fp = open(fname, "rb") diff --git a/pdfminer/fontmetrics.py b/pdfminer/fontmetrics.py index 4fdf28b..2ed0f02 100644 --- a/pdfminer/fontmetrics.py +++ b/pdfminer/fontmetrics.py @@ -27,6 +27,48 @@ The following data were extracted from the AFM files: ### END Verbatim copy of the license part # flake8: noqa +from typing import Dict + + +def convert_font_metrics(path: str) -> None: + """Convert an AFM file to a mapping of font metrics. + + See below for the output. + """ + fonts = {} + with open(path, "r") as fileinput: + for line in fileinput.readlines(): + f = line.strip().split(" ") + if not f: + continue + k = f[0] + if k == "FontName": + fontname = f[1] + props = {"FontName": fontname, "Flags": 0} + chars: Dict[int, int] = {} + fonts[fontname] = (props, chars) + elif k == "C": + cid = int(f[1]) + if 0 <= cid and cid <= 255: + width = int(f[4]) + chars[cid] = width + elif k in ("CapHeight", "XHeight", "ItalicAngle", "Ascender", "Descender"): + k = {"Ascender": "Ascent", "Descender": "Descent"}.get(k, k) + props[k] = float(f[1]) + elif k in ("FontName", "FamilyName", "Weight"): + k = {"FamilyName": "FontFamily", "Weight": "FontWeight"}.get(k, k) + props[k] = f[1] + elif k == "IsFixedPitch": + if f[1].lower() == "true": + props["Flags"] = 64 + elif k == "FontBBox": + props[k] = tuple(map(float, f[1:5])) + print("# -*- python -*-") + print("FONT_METRICS = {") + for (fontname, (props, chars)) in fonts.items(): + print(" {!r}: {!r},".format(fontname, (props, chars))) + print("}") + FONT_METRICS = { "Courier": ( diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py index 46c3235..9d4eb90 100644 --- a/pdfminer/glyphlist.py +++ b/pdfminer/glyphlist.py @@ -51,6 +51,32 @@ The following data was taken by # (1) glyph name # (2) Unicode scalar value + +def convert_glyphlist(path: str) -> None: + """Convert a glyph list into a python representation. + + See output below. + """ + state = 0 + with open(path, "r") as fileinput: + for line in fileinput.readlines(): + line = line.strip() + if not line or line.startswith("#"): + if state == 1: + state = 2 + print("}\n") + print(line) + continue + if state == 0: + print("\nglyphname2unicode = {") + state = 1 + (name, x) = line.split(";") + codes = x.split(" ") + print( + " {!r}: u'{}',".format(name, "".join("\\u%s" % code for code in codes)) + ) + + glyphname2unicode = { "A": "\u0041", "AE": "\u00C6", diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 0b3e00a..0c33793 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -19,12 +19,12 @@ from typing import ( from . import settings from .cmapdb import CMap -from .cmapdb import IdentityUnicodeMap from .cmapdb import CMapBase from .cmapdb import CMapDB from .cmapdb import CMapParser -from .cmapdb import UnicodeMap from .cmapdb import FileUnicodeMap +from .cmapdb import IdentityUnicodeMap +from .cmapdb import UnicodeMap from .encodingdb import EncodingDB from .encodingdb import name2unicode from .fontmetrics import FONT_METRICS @@ -1187,6 +1187,15 @@ class PDFCIDFont(PDFFont): def main(argv: List[str]) -> None: + from warnings import warn + + warn( + "The function main() from pdffont.py will be removed in 2023. It was probably " + "introduced for testing purposes a long time ago, and no longer relevant. " + "Feel free to create a GitHub issue if you disagree.", + DeprecationWarning, + ) + for fname in argv[1:]: fp = open(fname, "rb") font = CFFFont(fname, fp) diff --git a/tests/test_highlevel_extracttext.py b/tests/test_highlevel_extracttext.py index b7733c0..842459d 100644 --- a/tests/test_highlevel_extracttext.py +++ b/tests/test_highlevel_extracttext.py @@ -168,7 +168,3 @@ class TestExtractPages(unittest.TestCase): elements = [element for element in page if isinstance(element, LTTextContainer)] self.assertEqual(len(elements), 1) self.assertEqual(elements[0].get_text(), "Text1\nText2\nText3\n") - - -if __name__ == "__main__": - unittest.main() diff --git a/tools/conv_afm.py b/tools/conv_afm.py index cb91baa..f666ee1 100755 --- a/tools/conv_afm.py +++ b/tools/conv_afm.py @@ -2,6 +2,7 @@ import sys import fileinput +from warnings import warn def main(argv): @@ -41,4 +42,11 @@ def main(argv): if __name__ == "__main__": + warn( + "The file conf_afm.py will be removed in 2023. Its functionality is" + "moved to pdfminer/font_metrics.py. Feel free to create a GitHub " + "issue if you disagree.", + DeprecationWarning, + ) + sys.exit(main(sys.argv)) # type: ignore[no-untyped-call] diff --git a/tools/conv_cmap.py b/tools/conv_cmap.py index e39c17e..e265ee4 100755 --- a/tools/conv_cmap.py +++ b/tools/conv_cmap.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -import sys -import pickle as pickle import codecs +import pickle as pickle +import sys class CMapConverter: @@ -19,6 +19,7 @@ class CMapConverter: def get_maps(self, enc): if enc.endswith("-H"): + (hmapenc, vmapenc) = (enc, None) elif enc == "H": (hmapenc, vmapenc) = ("H", "V") diff --git a/tools/conv_glyphlist.py b/tools/conv_glyphlist.py index 7a1183f..a572059 100755 --- a/tools/conv_glyphlist.py +++ b/tools/conv_glyphlist.py @@ -2,6 +2,7 @@ import sys import fileinput +from warnings import warn def main(argv): @@ -23,4 +24,10 @@ def main(argv): if __name__ == "__main__": + warn( + "The file conf_glpyhlist.py will be removed in 2023. Its functionality" + "is moved to pdfminer/glyphlist.py. Feel free to create a GitHub issue " + "if you disagree.", + DeprecationWarning, + ) sys.exit(main(sys.argv)) # type: ignore[no-untyped-call] diff --git a/tools/pdfdiff.py b/tools/pdfdiff.py index 43156e8..57ae4ef 100644 --- a/tools/pdfdiff.py +++ b/tools/pdfdiff.py @@ -7,10 +7,18 @@ import io import logging import sys from typing import Any, Iterable, List, Optional +from warnings import warn import pdfminer.settings from pdfminer import high_level, layout +warn( + "The file pdfdiff.py will be removed in 2023. It was probably introduced for " + "testing purposes a long time ago, and no longer relevant. Feel free to create a " + "GitHub issue if you disagree.", + DeprecationWarning, +) + pdfminer.settings.STRICT = False diff --git a/tools/pdfstats.py b/tools/pdfstats.py index 1b57b80..4eae67f 100755 --- a/tools/pdfstats.py +++ b/tools/pdfstats.py @@ -4,18 +4,25 @@ # print some stats to stdout # Usage: pdfstats.py -import sys -import os import collections +import os +import sys from typing import Any, Counter, Iterator, List +from warnings import warn -from pdfminer.pdfparser import PDFParser -from pdfminer.pdfdocument import PDFDocument, PDFTextExtractionNotAllowed -from pdfminer.pdfpage import PDFPage -from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.converter import PDFPageAggregator from pdfminer.layout import LAParams, LTContainer +from pdfminer.pdfdocument import PDFDocument, PDFTextExtractionNotAllowed +from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter +from pdfminer.pdfpage import PDFPage +from pdfminer.pdfparser import PDFParser +warn( + "The file pdfstats.py will be removed in 2023. It was probably introduced for " + "testing purposes a long time ago, and no longer relevant. Feel free to create a " + "GitHub issue if you disagree.", + DeprecationWarning, +) _, SCRIPT = os.path.split(__file__) diff --git a/tools/prof.py b/tools/prof.py index 0477fd9..b725e71 100644 --- a/tools/prof.py +++ b/tools/prof.py @@ -2,6 +2,15 @@ import sys from typing import List +from warnings import warn + +warn( + "The file prof.py will be removed in 2023. It was probably introduced for " + "testing purposes a long time ago, and no longer relevant. Feel free to create a " + "GitHub issue if you disagree.", + DeprecationWarning, +) + def prof_main(argv: List[str]) -> int: import hotshot.stats # type: ignore[import]