diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..e7f2a84 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +max-line-length = 88 +extend-ignore = + # See https://github.com/PyCQA/pycodestyle/issues/373 + E203, diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 78e110e..1159de8 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,22 +1,17 @@ **Pull request** -Thanks for improving pdfminer.six! Please include the following information to -help us discuss and merge this PR: - -- A description of why this PR is needed. What does it fix? What does it - improve? -- A summary of the things that this PR changes. -- Reference the issues that this PR fixes (use the fixes #(issue nr) syntax). - If this PR does not fix any issue, create the issue first and mention that - you are willing to work on it. +Please remove this paragraph and replace it with a description of your PR. +Also include links to the issues that it fixes. **How Has This Been Tested?** -Please describe the tests that you ran to verify your changes. Provide -instructions so we can reproduce. Include an example pdf if you have one. +Please repalce this paragraph with a description of how this PR has been +tested. Include the necessary instructions and files such that other can +reproduce it. **Checklist** +- [ ] I have formatted my code with [black](https://github.com/psf/black). - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added docstrings to newly created methods and classes diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index ba87249..41452b6 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -15,6 +15,15 @@ env: jobs: + check-code-formatting: + name: Check code formatting + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Check code formatting + uses: psf/black@stable + check-coding-style: name: Check coding style runs-on: ubuntu-latest diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dfaa8dd..6455696 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,7 +31,7 @@ Any contribution is appreciated! You might want to: * Include unit tests when possible. In case of bugs, this will help to prevent the same mistake in the future. In case of features, this will show that your code works correctly. * Code should work for Python 3.6+. -* Code should conform to PEP8 coding style. +* Code should be formatted with [black](https://github.com/psf/black). * New features should be well documented using docstrings. * Check spelling and grammar. * Don't forget to update the [CHANGELOG.md](CHANGELOG.md#[Unreleased]) @@ -68,3 +68,9 @@ Any contribution is appreciated! You might want to: ```sh nox -e py36 ``` + +4. After changing the code, run the black formatter. + + ```sh + black . + ``` diff --git a/docs/source/conf.py b/docs/source/conf.py index ccb6ec1..14e27ad 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -16,14 +16,13 @@ from typing import List import pdfminer -sys.path.insert(0, os.path.join( - os.path.abspath(os.path.dirname(__file__)), '../../')) +sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../")) # -- Project information ----------------------------------------------------- -project = 'pdfminer.six' -copyright = '2019, Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman' -author = 'Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman' +project = "pdfminer.six" +copyright = "2019, Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman" +author = "Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman" # The full version, including alpha/beta/rc tags release = pdfminer.__version__ @@ -35,16 +34,16 @@ release = pdfminer.__version__ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinxarg.ext', - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', + "sphinxarg.ext", + "sphinx.ext.autodoc", + "sphinx.ext.doctest", ] # Root rst file -master_doc = 'index' +master_doc = "index" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -57,9 +56,9 @@ exclude_patterns: List[str] = [] # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = "alabaster" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] diff --git a/noxfile.py b/noxfile.py index 03a38f4..a0cffe2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -6,53 +6,30 @@ PYTHON_ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] @nox.session def lint(session): - session.install('flake8') - session.run( - 'flake8', - 'pdfminer/', - 'tools/', - 'tests/', - '--count', - '--statistics' - ) + session.install("flake8") + session.run("flake8", "pdfminer/", "tools/", "tests/", "--count", "--statistics") @nox.session def types(session): - session.install('mypy') + session.install("mypy") session.run( - 'mypy', - '--install-types', - '--non-interactive', - '--show-error-codes', - '.' + "mypy", "--install-types", "--non-interactive", "--show-error-codes", "." ) @nox.session(python=PYTHON_ALL_VERSIONS) def tests(session): session.install("-e", ".[dev]") - session.run('pytest') + session.run("pytest") @nox.session def docs(session): session.install("-e", ".[docs]") session.run( - 'python', - '-m', - 'sphinx', - '-b', - 'html', - 'docs/source', - 'docs/build/html' + "python", "-m", "sphinx", "-b", "html", "docs/source", "docs/build/html" ) session.run( - 'python', - '-m', - 'sphinx', - '-b', - 'doctest', - 'docs/source', - 'docs/build/doctest' + "python", "-m", "sphinx", "-b", "doctest", "docs/source", "docs/build/doctest" ) diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py index 4025c15..5df9646 100644 --- a/pdfminer/__init__.py +++ b/pdfminer/__init__.py @@ -1,4 +1,4 @@ -__version__ = '20211012' +__version__ = "20211012" -if __name__ == '__main__': +if __name__ == "__main__": print(__version__) diff --git a/pdfminer/_saslprep.py b/pdfminer/_saslprep.py index 32c68cb..1f24318 100644 --- a/pdfminer/_saslprep.py +++ b/pdfminer/_saslprep.py @@ -18,7 +18,7 @@ """An implementation of RFC4013 SASLprep.""" -__all__ = ['saslprep'] +__all__ = ["saslprep"] import stringprep from typing import Callable, Tuple @@ -37,7 +37,8 @@ _PROHIBITED: Tuple[Callable[[str], bool], ...] = ( stringprep.in_table_c6, stringprep.in_table_c7, stringprep.in_table_c8, - stringprep.in_table_c9) + stringprep.in_table_c9, +) def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str: @@ -63,12 +64,12 @@ def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str: in_table_c12 = stringprep.in_table_c12 in_table_b1 = stringprep.in_table_b1 data = "".join( - ["\u0020" if in_table_c12(elt) else elt - for elt in data if not in_table_b1(elt)]) + ["\u0020" if in_table_c12(elt) else elt for elt in data if not in_table_b1(elt)] + ) # RFC3454 section 2, step 2 - Normalize # RFC4013 section 2.2 normalization - data = unicodedata.ucd_3_2_0.normalize('NFKC', data) + data = unicodedata.ucd_3_2_0.normalize("NFKC", data) in_table_d1 = stringprep.in_table_d1 if in_table_d1(data[0]): @@ -89,7 +90,6 @@ def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str: # RFC3454 section 2, step 3 and 4 - Prohibit and check bidi for char in data: if any(in_table(char) for in_table in prohibited): - raise ValueError( - "SASLprep: failed prohibited character check") + raise ValueError("SASLprep: failed prohibited character check") return data diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py index 88e7e2d..a767667 100644 --- a/pdfminer/arcfour.py +++ b/pdfminer/arcfour.py @@ -9,7 +9,6 @@ from typing import Sequence class Arcfour: - def __init__(self, key: Sequence[int]) -> None: # because Py3 range is not indexable s = [i for i in range(256)] @@ -24,12 +23,12 @@ class Arcfour: def process(self, data: bytes) -> bytes: (i, j) = (self.i, self.j) s = self.s - r = b'' + r = b"" for c in iter(data): - i = (i+1) % 256 - j = (j+s[i]) % 256 + i = (i + 1) % 256 + j = (j + s[i]) % 256 (s[i], s[j]) = (s[j], s[i]) - k = s[(s[i]+s[j]) % 256] + k = s[(s[i] + s[j]) % 256] r += bytes((c ^ k,)) (self.i, self.j) = (i, j) return r diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py index 7c7c757..dbe3d2a 100644 --- a/pdfminer/ascii85.py +++ b/pdfminer/ascii85.py @@ -21,30 +21,30 @@ def ascii85decode(data: bytes) -> bytes: """ n = b = 0 - out = b'' + out = b"" for i in iter(data): c = bytes((i,)) - if b'!' <= c and c <= b'u': + if b"!" <= c and c <= b"u": n += 1 - b = b*85+(ord(c)-33) + b = b * 85 + (ord(c) - 33) if n == 5: - out += struct.pack('>L', b) + out += struct.pack(">L", b) n = b = 0 - elif c == b'z': + elif c == b"z": assert n == 0, str(n) - out += b'\0\0\0\0' - elif c == b'~': + out += b"\0\0\0\0" + elif c == b"~": if n: - for _ in range(5-n): - b = b*85+84 - out += struct.pack('>L', b)[:n-1] + for _ in range(5 - n): + b = b * 85 + 84 + out += struct.pack(">L", b)[: n - 1] break return out # asciihexdecode(data) -hex_re = re.compile(br'([a-f\d]{2})', re.IGNORECASE) -trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE) +hex_re = re.compile(rb"([a-f\d]{2})", re.IGNORECASE) +trail_re = re.compile(rb"^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$", re.IGNORECASE) def asciihexdecode(data: bytes) -> bytes: @@ -57,15 +57,16 @@ def asciihexdecode(data: bytes) -> bytes: the EOD marker after reading an odd number of hexadecimal digits, it will behave as if a 0 followed the last digit. """ + def decode(x: bytes) -> bytes: i = int(x, 16) return bytes((i,)) - out = b'' + out = b"" for x in hex_re.findall(data): out += decode(x) m = trail_re.search(data) if m: - out += decode(m.group(1)+b'0') + out += decode(m.group(1) + b"0") return out diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py index f151ede..e64bf49 100644 --- a/pdfminer/ccitt.py +++ b/pdfminer/ccitt.py @@ -12,8 +12,18 @@ import array -from typing import (Any, Callable, Dict, Iterator, List, MutableSequence, - Optional, Sequence, Union, cast) +from typing import ( + Any, + Callable, + Dict, + Iterator, + List, + MutableSequence, + Optional, + Sequence, + Union, + cast, +) def get_bytes(data: bytes) -> Iterator[int]: @@ -46,7 +56,7 @@ class BitParser: if p[b] is None: p[b] = [None, None] p = p[b] - if bits[i] == '1': + if bits[i] == "1": b = 1 else: b = 0 @@ -74,252 +84,252 @@ class BitParser: class CCITTG4Parser(BitParser): MODE = [None, None] - BitParser.add(MODE, 0, '1') - BitParser.add(MODE, +1, '011') - BitParser.add(MODE, -1, '010') - BitParser.add(MODE, 'h', '001') - BitParser.add(MODE, 'p', '0001') - BitParser.add(MODE, +2, '000011') - BitParser.add(MODE, -2, '000010') - BitParser.add(MODE, +3, '0000011') - BitParser.add(MODE, -3, '0000010') - BitParser.add(MODE, 'u', '0000001111') - BitParser.add(MODE, 'x1', '0000001000') - BitParser.add(MODE, 'x2', '0000001001') - BitParser.add(MODE, 'x3', '0000001010') - BitParser.add(MODE, 'x4', '0000001011') - BitParser.add(MODE, 'x5', '0000001100') - BitParser.add(MODE, 'x6', '0000001101') - BitParser.add(MODE, 'x7', '0000001110') - BitParser.add(MODE, 'e', '000000000001000000000001') + BitParser.add(MODE, 0, "1") + BitParser.add(MODE, +1, "011") + BitParser.add(MODE, -1, "010") + BitParser.add(MODE, "h", "001") + BitParser.add(MODE, "p", "0001") + BitParser.add(MODE, +2, "000011") + BitParser.add(MODE, -2, "000010") + BitParser.add(MODE, +3, "0000011") + BitParser.add(MODE, -3, "0000010") + BitParser.add(MODE, "u", "0000001111") + BitParser.add(MODE, "x1", "0000001000") + BitParser.add(MODE, "x2", "0000001001") + BitParser.add(MODE, "x3", "0000001010") + BitParser.add(MODE, "x4", "0000001011") + BitParser.add(MODE, "x5", "0000001100") + BitParser.add(MODE, "x6", "0000001101") + BitParser.add(MODE, "x7", "0000001110") + BitParser.add(MODE, "e", "000000000001000000000001") WHITE = [None, None] - BitParser.add(WHITE, 0, '00110101') - BitParser.add(WHITE, 1, '000111') - BitParser.add(WHITE, 2, '0111') - BitParser.add(WHITE, 3, '1000') - BitParser.add(WHITE, 4, '1011') - BitParser.add(WHITE, 5, '1100') - BitParser.add(WHITE, 6, '1110') - BitParser.add(WHITE, 7, '1111') - BitParser.add(WHITE, 8, '10011') - BitParser.add(WHITE, 9, '10100') - BitParser.add(WHITE, 10, '00111') - BitParser.add(WHITE, 11, '01000') - BitParser.add(WHITE, 12, '001000') - BitParser.add(WHITE, 13, '000011') - BitParser.add(WHITE, 14, '110100') - BitParser.add(WHITE, 15, '110101') - BitParser.add(WHITE, 16, '101010') - BitParser.add(WHITE, 17, '101011') - BitParser.add(WHITE, 18, '0100111') - BitParser.add(WHITE, 19, '0001100') - BitParser.add(WHITE, 20, '0001000') - BitParser.add(WHITE, 21, '0010111') - BitParser.add(WHITE, 22, '0000011') - BitParser.add(WHITE, 23, '0000100') - BitParser.add(WHITE, 24, '0101000') - BitParser.add(WHITE, 25, '0101011') - BitParser.add(WHITE, 26, '0010011') - BitParser.add(WHITE, 27, '0100100') - BitParser.add(WHITE, 28, '0011000') - BitParser.add(WHITE, 29, '00000010') - BitParser.add(WHITE, 30, '00000011') - BitParser.add(WHITE, 31, '00011010') - BitParser.add(WHITE, 32, '00011011') - BitParser.add(WHITE, 33, '00010010') - BitParser.add(WHITE, 34, '00010011') - BitParser.add(WHITE, 35, '00010100') - BitParser.add(WHITE, 36, '00010101') - BitParser.add(WHITE, 37, '00010110') - BitParser.add(WHITE, 38, '00010111') - BitParser.add(WHITE, 39, '00101000') - BitParser.add(WHITE, 40, '00101001') - BitParser.add(WHITE, 41, '00101010') - BitParser.add(WHITE, 42, '00101011') - BitParser.add(WHITE, 43, '00101100') - BitParser.add(WHITE, 44, '00101101') - BitParser.add(WHITE, 45, '00000100') - BitParser.add(WHITE, 46, '00000101') - BitParser.add(WHITE, 47, '00001010') - BitParser.add(WHITE, 48, '00001011') - BitParser.add(WHITE, 49, '01010010') - BitParser.add(WHITE, 50, '01010011') - BitParser.add(WHITE, 51, '01010100') - BitParser.add(WHITE, 52, '01010101') - BitParser.add(WHITE, 53, '00100100') - BitParser.add(WHITE, 54, '00100101') - BitParser.add(WHITE, 55, '01011000') - BitParser.add(WHITE, 56, '01011001') - BitParser.add(WHITE, 57, '01011010') - BitParser.add(WHITE, 58, '01011011') - BitParser.add(WHITE, 59, '01001010') - BitParser.add(WHITE, 60, '01001011') - BitParser.add(WHITE, 61, '00110010') - BitParser.add(WHITE, 62, '00110011') - BitParser.add(WHITE, 63, '00110100') - BitParser.add(WHITE, 64, '11011') - BitParser.add(WHITE, 128, '10010') - BitParser.add(WHITE, 192, '010111') - BitParser.add(WHITE, 256, '0110111') - BitParser.add(WHITE, 320, '00110110') - BitParser.add(WHITE, 384, '00110111') - BitParser.add(WHITE, 448, '01100100') - BitParser.add(WHITE, 512, '01100101') - BitParser.add(WHITE, 576, '01101000') - BitParser.add(WHITE, 640, '01100111') - BitParser.add(WHITE, 704, '011001100') - BitParser.add(WHITE, 768, '011001101') - BitParser.add(WHITE, 832, '011010010') - BitParser.add(WHITE, 896, '011010011') - BitParser.add(WHITE, 960, '011010100') - BitParser.add(WHITE, 1024, '011010101') - BitParser.add(WHITE, 1088, '011010110') - BitParser.add(WHITE, 1152, '011010111') - BitParser.add(WHITE, 1216, '011011000') - BitParser.add(WHITE, 1280, '011011001') - BitParser.add(WHITE, 1344, '011011010') - BitParser.add(WHITE, 1408, '011011011') - BitParser.add(WHITE, 1472, '010011000') - BitParser.add(WHITE, 1536, '010011001') - BitParser.add(WHITE, 1600, '010011010') - BitParser.add(WHITE, 1664, '011000') - BitParser.add(WHITE, 1728, '010011011') - BitParser.add(WHITE, 1792, '00000001000') - BitParser.add(WHITE, 1856, '00000001100') - BitParser.add(WHITE, 1920, '00000001101') - BitParser.add(WHITE, 1984, '000000010010') - BitParser.add(WHITE, 2048, '000000010011') - BitParser.add(WHITE, 2112, '000000010100') - BitParser.add(WHITE, 2176, '000000010101') - BitParser.add(WHITE, 2240, '000000010110') - BitParser.add(WHITE, 2304, '000000010111') - BitParser.add(WHITE, 2368, '000000011100') - BitParser.add(WHITE, 2432, '000000011101') - BitParser.add(WHITE, 2496, '000000011110') - BitParser.add(WHITE, 2560, '000000011111') + BitParser.add(WHITE, 0, "00110101") + BitParser.add(WHITE, 1, "000111") + BitParser.add(WHITE, 2, "0111") + BitParser.add(WHITE, 3, "1000") + BitParser.add(WHITE, 4, "1011") + BitParser.add(WHITE, 5, "1100") + BitParser.add(WHITE, 6, "1110") + BitParser.add(WHITE, 7, "1111") + BitParser.add(WHITE, 8, "10011") + BitParser.add(WHITE, 9, "10100") + BitParser.add(WHITE, 10, "00111") + BitParser.add(WHITE, 11, "01000") + BitParser.add(WHITE, 12, "001000") + BitParser.add(WHITE, 13, "000011") + BitParser.add(WHITE, 14, "110100") + BitParser.add(WHITE, 15, "110101") + BitParser.add(WHITE, 16, "101010") + BitParser.add(WHITE, 17, "101011") + BitParser.add(WHITE, 18, "0100111") + BitParser.add(WHITE, 19, "0001100") + BitParser.add(WHITE, 20, "0001000") + BitParser.add(WHITE, 21, "0010111") + BitParser.add(WHITE, 22, "0000011") + BitParser.add(WHITE, 23, "0000100") + BitParser.add(WHITE, 24, "0101000") + BitParser.add(WHITE, 25, "0101011") + BitParser.add(WHITE, 26, "0010011") + BitParser.add(WHITE, 27, "0100100") + BitParser.add(WHITE, 28, "0011000") + BitParser.add(WHITE, 29, "00000010") + BitParser.add(WHITE, 30, "00000011") + BitParser.add(WHITE, 31, "00011010") + BitParser.add(WHITE, 32, "00011011") + BitParser.add(WHITE, 33, "00010010") + BitParser.add(WHITE, 34, "00010011") + BitParser.add(WHITE, 35, "00010100") + BitParser.add(WHITE, 36, "00010101") + BitParser.add(WHITE, 37, "00010110") + BitParser.add(WHITE, 38, "00010111") + BitParser.add(WHITE, 39, "00101000") + BitParser.add(WHITE, 40, "00101001") + BitParser.add(WHITE, 41, "00101010") + BitParser.add(WHITE, 42, "00101011") + BitParser.add(WHITE, 43, "00101100") + BitParser.add(WHITE, 44, "00101101") + BitParser.add(WHITE, 45, "00000100") + BitParser.add(WHITE, 46, "00000101") + BitParser.add(WHITE, 47, "00001010") + BitParser.add(WHITE, 48, "00001011") + BitParser.add(WHITE, 49, "01010010") + BitParser.add(WHITE, 50, "01010011") + BitParser.add(WHITE, 51, "01010100") + BitParser.add(WHITE, 52, "01010101") + BitParser.add(WHITE, 53, "00100100") + BitParser.add(WHITE, 54, "00100101") + BitParser.add(WHITE, 55, "01011000") + BitParser.add(WHITE, 56, "01011001") + BitParser.add(WHITE, 57, "01011010") + BitParser.add(WHITE, 58, "01011011") + BitParser.add(WHITE, 59, "01001010") + BitParser.add(WHITE, 60, "01001011") + BitParser.add(WHITE, 61, "00110010") + BitParser.add(WHITE, 62, "00110011") + BitParser.add(WHITE, 63, "00110100") + BitParser.add(WHITE, 64, "11011") + BitParser.add(WHITE, 128, "10010") + BitParser.add(WHITE, 192, "010111") + BitParser.add(WHITE, 256, "0110111") + BitParser.add(WHITE, 320, "00110110") + BitParser.add(WHITE, 384, "00110111") + BitParser.add(WHITE, 448, "01100100") + BitParser.add(WHITE, 512, "01100101") + BitParser.add(WHITE, 576, "01101000") + BitParser.add(WHITE, 640, "01100111") + BitParser.add(WHITE, 704, "011001100") + BitParser.add(WHITE, 768, "011001101") + BitParser.add(WHITE, 832, "011010010") + BitParser.add(WHITE, 896, "011010011") + BitParser.add(WHITE, 960, "011010100") + BitParser.add(WHITE, 1024, "011010101") + BitParser.add(WHITE, 1088, "011010110") + BitParser.add(WHITE, 1152, "011010111") + BitParser.add(WHITE, 1216, "011011000") + BitParser.add(WHITE, 1280, "011011001") + BitParser.add(WHITE, 1344, "011011010") + BitParser.add(WHITE, 1408, "011011011") + BitParser.add(WHITE, 1472, "010011000") + BitParser.add(WHITE, 1536, "010011001") + BitParser.add(WHITE, 1600, "010011010") + BitParser.add(WHITE, 1664, "011000") + BitParser.add(WHITE, 1728, "010011011") + BitParser.add(WHITE, 1792, "00000001000") + BitParser.add(WHITE, 1856, "00000001100") + BitParser.add(WHITE, 1920, "00000001101") + BitParser.add(WHITE, 1984, "000000010010") + BitParser.add(WHITE, 2048, "000000010011") + BitParser.add(WHITE, 2112, "000000010100") + BitParser.add(WHITE, 2176, "000000010101") + BitParser.add(WHITE, 2240, "000000010110") + BitParser.add(WHITE, 2304, "000000010111") + BitParser.add(WHITE, 2368, "000000011100") + BitParser.add(WHITE, 2432, "000000011101") + BitParser.add(WHITE, 2496, "000000011110") + BitParser.add(WHITE, 2560, "000000011111") BLACK = [None, None] - BitParser.add(BLACK, 0, '0000110111') - BitParser.add(BLACK, 1, '010') - BitParser.add(BLACK, 2, '11') - BitParser.add(BLACK, 3, '10') - BitParser.add(BLACK, 4, '011') - BitParser.add(BLACK, 5, '0011') - BitParser.add(BLACK, 6, '0010') - BitParser.add(BLACK, 7, '00011') - BitParser.add(BLACK, 8, '000101') - BitParser.add(BLACK, 9, '000100') - BitParser.add(BLACK, 10, '0000100') - BitParser.add(BLACK, 11, '0000101') - BitParser.add(BLACK, 12, '0000111') - BitParser.add(BLACK, 13, '00000100') - BitParser.add(BLACK, 14, '00000111') - BitParser.add(BLACK, 15, '000011000') - BitParser.add(BLACK, 16, '0000010111') - BitParser.add(BLACK, 17, '0000011000') - BitParser.add(BLACK, 18, '0000001000') - BitParser.add(BLACK, 19, '00001100111') - BitParser.add(BLACK, 20, '00001101000') - BitParser.add(BLACK, 21, '00001101100') - BitParser.add(BLACK, 22, '00000110111') - BitParser.add(BLACK, 23, '00000101000') - BitParser.add(BLACK, 24, '00000010111') - BitParser.add(BLACK, 25, '00000011000') - BitParser.add(BLACK, 26, '000011001010') - BitParser.add(BLACK, 27, '000011001011') - BitParser.add(BLACK, 28, '000011001100') - BitParser.add(BLACK, 29, '000011001101') - BitParser.add(BLACK, 30, '000001101000') - BitParser.add(BLACK, 31, '000001101001') - BitParser.add(BLACK, 32, '000001101010') - BitParser.add(BLACK, 33, '000001101011') - BitParser.add(BLACK, 34, '000011010010') - BitParser.add(BLACK, 35, '000011010011') - BitParser.add(BLACK, 36, '000011010100') - BitParser.add(BLACK, 37, '000011010101') - BitParser.add(BLACK, 38, '000011010110') - BitParser.add(BLACK, 39, '000011010111') - BitParser.add(BLACK, 40, '000001101100') - BitParser.add(BLACK, 41, '000001101101') - BitParser.add(BLACK, 42, '000011011010') - BitParser.add(BLACK, 43, '000011011011') - BitParser.add(BLACK, 44, '000001010100') - BitParser.add(BLACK, 45, '000001010101') - BitParser.add(BLACK, 46, '000001010110') - BitParser.add(BLACK, 47, '000001010111') - BitParser.add(BLACK, 48, '000001100100') - BitParser.add(BLACK, 49, '000001100101') - BitParser.add(BLACK, 50, '000001010010') - BitParser.add(BLACK, 51, '000001010011') - BitParser.add(BLACK, 52, '000000100100') - BitParser.add(BLACK, 53, '000000110111') - BitParser.add(BLACK, 54, '000000111000') - BitParser.add(BLACK, 55, '000000100111') - BitParser.add(BLACK, 56, '000000101000') - BitParser.add(BLACK, 57, '000001011000') - BitParser.add(BLACK, 58, '000001011001') - BitParser.add(BLACK, 59, '000000101011') - BitParser.add(BLACK, 60, '000000101100') - BitParser.add(BLACK, 61, '000001011010') - BitParser.add(BLACK, 62, '000001100110') - BitParser.add(BLACK, 63, '000001100111') - BitParser.add(BLACK, 64, '0000001111') - BitParser.add(BLACK, 128, '000011001000') - BitParser.add(BLACK, 192, '000011001001') - BitParser.add(BLACK, 256, '000001011011') - BitParser.add(BLACK, 320, '000000110011') - BitParser.add(BLACK, 384, '000000110100') - BitParser.add(BLACK, 448, '000000110101') - BitParser.add(BLACK, 512, '0000001101100') - BitParser.add(BLACK, 576, '0000001101101') - BitParser.add(BLACK, 640, '0000001001010') - BitParser.add(BLACK, 704, '0000001001011') - BitParser.add(BLACK, 768, '0000001001100') - BitParser.add(BLACK, 832, '0000001001101') - BitParser.add(BLACK, 896, '0000001110010') - BitParser.add(BLACK, 960, '0000001110011') - BitParser.add(BLACK, 1024, '0000001110100') - BitParser.add(BLACK, 1088, '0000001110101') - BitParser.add(BLACK, 1152, '0000001110110') - BitParser.add(BLACK, 1216, '0000001110111') - BitParser.add(BLACK, 1280, '0000001010010') - BitParser.add(BLACK, 1344, '0000001010011') - BitParser.add(BLACK, 1408, '0000001010100') - BitParser.add(BLACK, 1472, '0000001010101') - BitParser.add(BLACK, 1536, '0000001011010') - BitParser.add(BLACK, 1600, '0000001011011') - BitParser.add(BLACK, 1664, '0000001100100') - BitParser.add(BLACK, 1728, '0000001100101') - BitParser.add(BLACK, 1792, '00000001000') - BitParser.add(BLACK, 1856, '00000001100') - BitParser.add(BLACK, 1920, '00000001101') - BitParser.add(BLACK, 1984, '000000010010') - BitParser.add(BLACK, 2048, '000000010011') - BitParser.add(BLACK, 2112, '000000010100') - BitParser.add(BLACK, 2176, '000000010101') - BitParser.add(BLACK, 2240, '000000010110') - BitParser.add(BLACK, 2304, '000000010111') - BitParser.add(BLACK, 2368, '000000011100') - BitParser.add(BLACK, 2432, '000000011101') - BitParser.add(BLACK, 2496, '000000011110') - BitParser.add(BLACK, 2560, '000000011111') + BitParser.add(BLACK, 0, "0000110111") + BitParser.add(BLACK, 1, "010") + BitParser.add(BLACK, 2, "11") + BitParser.add(BLACK, 3, "10") + BitParser.add(BLACK, 4, "011") + BitParser.add(BLACK, 5, "0011") + BitParser.add(BLACK, 6, "0010") + BitParser.add(BLACK, 7, "00011") + BitParser.add(BLACK, 8, "000101") + BitParser.add(BLACK, 9, "000100") + BitParser.add(BLACK, 10, "0000100") + BitParser.add(BLACK, 11, "0000101") + BitParser.add(BLACK, 12, "0000111") + BitParser.add(BLACK, 13, "00000100") + BitParser.add(BLACK, 14, "00000111") + BitParser.add(BLACK, 15, "000011000") + BitParser.add(BLACK, 16, "0000010111") + BitParser.add(BLACK, 17, "0000011000") + BitParser.add(BLACK, 18, "0000001000") + BitParser.add(BLACK, 19, "00001100111") + BitParser.add(BLACK, 20, "00001101000") + BitParser.add(BLACK, 21, "00001101100") + BitParser.add(BLACK, 22, "00000110111") + BitParser.add(BLACK, 23, "00000101000") + BitParser.add(BLACK, 24, "00000010111") + BitParser.add(BLACK, 25, "00000011000") + BitParser.add(BLACK, 26, "000011001010") + BitParser.add(BLACK, 27, "000011001011") + BitParser.add(BLACK, 28, "000011001100") + BitParser.add(BLACK, 29, "000011001101") + BitParser.add(BLACK, 30, "000001101000") + BitParser.add(BLACK, 31, "000001101001") + BitParser.add(BLACK, 32, "000001101010") + BitParser.add(BLACK, 33, "000001101011") + BitParser.add(BLACK, 34, "000011010010") + BitParser.add(BLACK, 35, "000011010011") + BitParser.add(BLACK, 36, "000011010100") + BitParser.add(BLACK, 37, "000011010101") + BitParser.add(BLACK, 38, "000011010110") + BitParser.add(BLACK, 39, "000011010111") + BitParser.add(BLACK, 40, "000001101100") + BitParser.add(BLACK, 41, "000001101101") + BitParser.add(BLACK, 42, "000011011010") + BitParser.add(BLACK, 43, "000011011011") + BitParser.add(BLACK, 44, "000001010100") + BitParser.add(BLACK, 45, "000001010101") + BitParser.add(BLACK, 46, "000001010110") + BitParser.add(BLACK, 47, "000001010111") + BitParser.add(BLACK, 48, "000001100100") + BitParser.add(BLACK, 49, "000001100101") + BitParser.add(BLACK, 50, "000001010010") + BitParser.add(BLACK, 51, "000001010011") + BitParser.add(BLACK, 52, "000000100100") + BitParser.add(BLACK, 53, "000000110111") + BitParser.add(BLACK, 54, "000000111000") + BitParser.add(BLACK, 55, "000000100111") + BitParser.add(BLACK, 56, "000000101000") + BitParser.add(BLACK, 57, "000001011000") + BitParser.add(BLACK, 58, "000001011001") + BitParser.add(BLACK, 59, "000000101011") + BitParser.add(BLACK, 60, "000000101100") + BitParser.add(BLACK, 61, "000001011010") + BitParser.add(BLACK, 62, "000001100110") + BitParser.add(BLACK, 63, "000001100111") + BitParser.add(BLACK, 64, "0000001111") + BitParser.add(BLACK, 128, "000011001000") + BitParser.add(BLACK, 192, "000011001001") + BitParser.add(BLACK, 256, "000001011011") + BitParser.add(BLACK, 320, "000000110011") + BitParser.add(BLACK, 384, "000000110100") + BitParser.add(BLACK, 448, "000000110101") + BitParser.add(BLACK, 512, "0000001101100") + BitParser.add(BLACK, 576, "0000001101101") + BitParser.add(BLACK, 640, "0000001001010") + BitParser.add(BLACK, 704, "0000001001011") + BitParser.add(BLACK, 768, "0000001001100") + BitParser.add(BLACK, 832, "0000001001101") + BitParser.add(BLACK, 896, "0000001110010") + BitParser.add(BLACK, 960, "0000001110011") + BitParser.add(BLACK, 1024, "0000001110100") + BitParser.add(BLACK, 1088, "0000001110101") + BitParser.add(BLACK, 1152, "0000001110110") + BitParser.add(BLACK, 1216, "0000001110111") + BitParser.add(BLACK, 1280, "0000001010010") + BitParser.add(BLACK, 1344, "0000001010011") + BitParser.add(BLACK, 1408, "0000001010100") + BitParser.add(BLACK, 1472, "0000001010101") + BitParser.add(BLACK, 1536, "0000001011010") + BitParser.add(BLACK, 1600, "0000001011011") + BitParser.add(BLACK, 1664, "0000001100100") + BitParser.add(BLACK, 1728, "0000001100101") + BitParser.add(BLACK, 1792, "00000001000") + BitParser.add(BLACK, 1856, "00000001100") + BitParser.add(BLACK, 1920, "00000001101") + BitParser.add(BLACK, 1984, "000000010010") + BitParser.add(BLACK, 2048, "000000010011") + BitParser.add(BLACK, 2112, "000000010100") + BitParser.add(BLACK, 2176, "000000010101") + BitParser.add(BLACK, 2240, "000000010110") + BitParser.add(BLACK, 2304, "000000010111") + BitParser.add(BLACK, 2368, "000000011100") + BitParser.add(BLACK, 2432, "000000011101") + BitParser.add(BLACK, 2496, "000000011110") + BitParser.add(BLACK, 2560, "000000011111") UNCOMPRESSED = [None, None] - BitParser.add(UNCOMPRESSED, '1', '1') - BitParser.add(UNCOMPRESSED, '01', '01') - BitParser.add(UNCOMPRESSED, '001', '001') - BitParser.add(UNCOMPRESSED, '0001', '0001') - BitParser.add(UNCOMPRESSED, '00001', '00001') - BitParser.add(UNCOMPRESSED, '00000', '000001') - BitParser.add(UNCOMPRESSED, 'T00', '00000011') - BitParser.add(UNCOMPRESSED, 'T10', '00000010') - BitParser.add(UNCOMPRESSED, 'T000', '000000011') - BitParser.add(UNCOMPRESSED, 'T100', '000000010') - BitParser.add(UNCOMPRESSED, 'T0000', '0000000011') - BitParser.add(UNCOMPRESSED, 'T1000', '0000000010') - BitParser.add(UNCOMPRESSED, 'T00000', '00000000011') - BitParser.add(UNCOMPRESSED, 'T10000', '00000000010') + BitParser.add(UNCOMPRESSED, "1", "1") + BitParser.add(UNCOMPRESSED, "01", "01") + BitParser.add(UNCOMPRESSED, "001", "001") + BitParser.add(UNCOMPRESSED, "0001", "0001") + BitParser.add(UNCOMPRESSED, "00001", "00001") + BitParser.add(UNCOMPRESSED, "00000", "000001") + BitParser.add(UNCOMPRESSED, "T00", "00000011") + BitParser.add(UNCOMPRESSED, "T10", "00000010") + BitParser.add(UNCOMPRESSED, "T000", "000000011") + BitParser.add(UNCOMPRESSED, "T100", "000000010") + BitParser.add(UNCOMPRESSED, "T0000", "0000000011") + BitParser.add(UNCOMPRESSED, "T1000", "0000000010") + BitParser.add(UNCOMPRESSED, "T00000", "00000000011") + BitParser.add(UNCOMPRESSED, "T10000", "00000000010") class EOFB(Exception): pass @@ -352,21 +362,21 @@ class CCITTG4Parser(BitParser): return def _parse_mode(self, mode: object) -> BitParserState: - if mode == 'p': + if mode == "p": self._do_pass() self._flush_line() return self.MODE - elif mode == 'h': + elif mode == "h": self._n1 = 0 self._accept = self._parse_horiz1 if self._color: return self.WHITE else: return self.BLACK - elif mode == 'u': + elif mode == "u": self._accept = self._parse_uncompressed return self.UNCOMPRESSED - elif mode == 'e': + elif mode == "e": raise self.EOFB elif isinstance(mode, int): self._do_vertical(mode) @@ -381,7 +391,7 @@ class CCITTG4Parser(BitParser): self._n1 += n if n < 64: self._n2 = 0 - self._color = 1-self._color + self._color = 1 - self._color self._accept = self._parse_horiz2 if self._color: return self.WHITE @@ -393,7 +403,7 @@ class CCITTG4Parser(BitParser): raise self.InvalidData self._n2 += n if n < 64: - self._color = 1-self._color + self._color = 1 - self._color self._accept = self._parse_mode self._do_horizontal(self._n1, self._n2) self._flush_line() @@ -406,7 +416,7 @@ class CCITTG4Parser(BitParser): def _parse_uncompressed(self, bits: Optional[str]) -> BitParserState: if not bits: raise self.InvalidData - if bits.startswith('T'): + if bits.startswith("T"): self._accept = self._parse_mode self._color = int(bits[1]) self._do_uncompressed(bits[2:]) @@ -416,33 +426,37 @@ class CCITTG4Parser(BitParser): return self.UNCOMPRESSED def _get_bits(self) -> str: - return ''.join(str(b) for b in self._curline[:self._curpos]) + return "".join(str(b) for b in self._curline[: self._curpos]) def _get_refline(self, i: int) -> str: if i < 0: - return '[]'+''.join(str(b) for b in self._refline) + return "[]" + "".join(str(b) for b in self._refline) elif len(self._refline) <= i: - return ''.join(str(b) for b in self._refline)+'[]' + return "".join(str(b) for b in self._refline) + "[]" else: - return (''.join(str(b) for b in self._refline[:i]) + - '['+str(self._refline[i])+']' + - ''.join(str(b) for b in self._refline[i+1:])) + return ( + "".join(str(b) for b in self._refline[:i]) + + "[" + + str(self._refline[i]) + + "]" + + "".join(str(b) for b in self._refline[i + 1 :]) + ) def reset(self) -> None: self._y = 0 - self._curline = array.array('b', [1]*self.width) + self._curline = array.array("b", [1] * self.width) self._reset_line() self._accept = self._parse_mode self._state = self.MODE return def output_line(self, y: int, bits: Sequence[int]) -> None: - print(y, ''.join(str(b) for b in bits)) + print(y, "".join(str(b) for b in bits)) return def _reset_line(self) -> None: self._refline = self._curline - self._curline = array.array('b', [1]*self.width) + self._curline = array.array("b", [1] * self.width) self._curpos = -1 self._color = 1 return @@ -457,15 +471,17 @@ class CCITTG4Parser(BitParser): return def _do_vertical(self, dx: int) -> None: - x1 = self._curpos+1 + x1 = self._curpos + 1 while 1: if x1 == 0: - if (self._color == 1 and self._refline[x1] != self._color): + if self._color == 1 and self._refline[x1] != self._color: break elif x1 == len(self._refline): break - elif (self._refline[x1-1] == self._color and - self._refline[x1] != self._color): + elif ( + self._refline[x1 - 1] == self._color + and self._refline[x1] != self._color + ): break x1 += 1 x1 += dx @@ -478,29 +494,33 @@ class CCITTG4Parser(BitParser): for x in range(x0, x1): self._curline[x] = self._color self._curpos = x1 - self._color = 1-self._color + self._color = 1 - self._color return def _do_pass(self) -> None: - x1 = self._curpos+1 + x1 = self._curpos + 1 while 1: if x1 == 0: - if (self._color == 1 and self._refline[x1] != self._color): + if self._color == 1 and self._refline[x1] != self._color: break elif x1 == len(self._refline): break - elif (self._refline[x1-1] == self._color and - self._refline[x1] != self._color): + elif ( + self._refline[x1 - 1] == self._color + and self._refline[x1] != self._color + ): break x1 += 1 while 1: if x1 == 0: - if (self._color == 0 and self._refline[x1] == self._color): + if self._color == 0 and self._refline[x1] == self._color: break elif x1 == len(self._refline): break - elif (self._refline[x1-1] != self._color and - self._refline[x1] == self._color): + elif ( + self._refline[x1 - 1] != self._color + and self._refline[x1] == self._color + ): break x1 += 1 for x in range(self._curpos, x1): @@ -520,7 +540,7 @@ class CCITTG4Parser(BitParser): for _ in range(n2): if len(self._curline) <= x: break - self._curline[x] = 1-self._color + self._curline[x] = 1 - self._color x += 1 self._curpos = x return @@ -534,34 +554,34 @@ class CCITTG4Parser(BitParser): class CCITTFaxDecoder(CCITTG4Parser): - - def __init__(self, width: int, bytealign: bool = False, - reversed: bool = False) -> None: + def __init__( + self, width: int, bytealign: bool = False, reversed: bool = False + ) -> None: CCITTG4Parser.__init__(self, width, bytealign=bytealign) self.reversed = reversed - self._buf = b'' + self._buf = b"" return def close(self) -> bytes: return self._buf def output_line(self, y: int, bits: Sequence[int]) -> None: - arr = array.array('B', [0]*((len(bits)+7)//8)) + arr = array.array("B", [0] * ((len(bits) + 7) // 8)) if self.reversed: - bits = [1-b for b in bits] + bits = [1 - b for b in bits] for (i, b) in enumerate(bits): if b: - arr[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8] + arr[i // 8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8] self._buf += arr.tobytes() return def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes: - K = params.get('K') + K = params.get("K") if K == -1: - cols = cast(int, params.get('Columns')) - bytealign = cast(bool, params.get('EncodedByteAlign')) - reversed = cast(bool, params.get('BlackIs1')) + cols = cast(int, params.get("Columns")) + bytealign = cast(bool, params.get("EncodedByteAlign")) + reversed = cast(bool, params.get("BlackIs1")) parser = CCITTFaxDecoder(cols, bytealign=bytealign, reversed=reversed) else: raise ValueError(K) @@ -573,12 +593,14 @@ def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes: def main(argv: List[str]) -> None: if not argv[1:]: import unittest + unittest.main() return class Parser(CCITTG4Parser): def __init__(self, width: int, bytealign: bool = False) -> None: import pygame # type: ignore[import] + CCITTG4Parser.__init__(self, width, bytealign=bytealign) self.img = pygame.Surface((self.width, 1000)) return @@ -593,11 +615,13 @@ def main(argv: List[str]) -> None: def close(self) -> None: import pygame - pygame.image.save(self.img, 'out.bmp') + + pygame.image.save(self.img, "out.bmp") return + for path in argv[1:]: - fp = open(path, 'rb') - (_, _, k, w, h, _) = path.split('.') + fp = open(path, "rb") + (_, _, k, w, h, _) = path.split(".") parser = Parser(int(w)) parser.feedbytes(fp.read()) parser.close() diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 451aab0..b84d86b 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -16,8 +16,20 @@ import os.path import pickle as pickle import struct import sys -from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, - MutableMapping, Optional, TextIO, Tuple, Union, cast) +from typing import ( + Any, + BinaryIO, + Dict, + Iterable, + Iterator, + List, + MutableMapping, + Optional, + TextIO, + Tuple, + Union, + cast, +) from .encodingdb import name2unicode from .psparser import KWD @@ -45,7 +57,7 @@ class CMapBase: self.attrs: MutableMapping[str, object] = kwargs.copy() def is_vertical(self) -> bool: - return self.attrs.get('WMode', 0) != 0 + return self.attrs.get("WMode", 0) != 0 def set_attr(self, k: str, v: object) -> None: self.attrs[k] = v @@ -53,8 +65,7 @@ class CMapBase: def add_code2cid(self, code: str, cid: int) -> None: pass - def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int] - ) -> None: + def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]) -> None: pass def use_cmap(self, cmap: "CMapBase") -> None: @@ -65,13 +76,12 @@ class CMapBase: class CMap(CMapBase): - def __init__(self, **kwargs: Union[str, int]) -> None: CMapBase.__init__(self, **kwargs) self.code2cid: Dict[int, object] = {} def __repr__(self) -> str: - return '' % self.attrs.get('CMapName') + return "" % self.attrs.get("CMapName") def use_cmap(self, cmap: CMapBase) -> None: assert isinstance(cmap, CMap), str(type(cmap)) @@ -84,10 +94,11 @@ class CMap(CMapBase): copy(d, v) else: dst[k] = v + copy(self.code2cid, cmap.code2cid) def decode(self, code: bytes) -> Iterator[int]: - log.debug('decode: %r, %r', self, code) + log.debug("decode: %r, %r", self, code) d = self.code2cid for i in iter(code): if i in d: @@ -100,70 +111,70 @@ class CMap(CMapBase): else: d = self.code2cid - def dump(self, out: TextIO = sys.stdout, - code2cid: Optional[Dict[int, object]] = None, - code: Tuple[int, ...] = ()) -> None: + def dump( + self, + out: TextIO = sys.stdout, + code2cid: Optional[Dict[int, object]] = None, + code: Tuple[int, ...] = (), + ) -> None: if code2cid is None: code2cid = self.code2cid code = () for (k, v) in sorted(code2cid.items()): - c = code+(k,) + c = code + (k,) if isinstance(v, int): - out.write('code %r = cid %d\n' % (c, v)) + out.write("code %r = cid %d\n" % (c, v)) else: self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c) class IdentityCMap(CMapBase): - def decode(self, code: bytes) -> Tuple[int, ...]: - n = len(code)//2 + n = len(code) // 2 if n: - return struct.unpack('>%dH' % n, code) + return struct.unpack(">%dH" % n, code) else: return () class IdentityCMapByte(IdentityCMap): - def decode(self, code: bytes) -> Tuple[int, ...]: n = len(code) if n: - return struct.unpack('>%dB' % n, code) + return struct.unpack(">%dB" % n, code) else: return () class UnicodeMap(CMapBase): - def __init__(self, **kwargs: Union[str, int]) -> None: CMapBase.__init__(self, **kwargs) self.cid2unichr: Dict[int, str] = {} def __repr__(self) -> str: - return '' % self.attrs.get('CMapName') + return "" % self.attrs.get("CMapName") def get_unichr(self, cid: int) -> str: - log.debug('get_unichr: %r, %r', self, cid) + log.debug("get_unichr: %r, %r", self, cid) return self.cid2unichr[cid] def dump(self, out: TextIO = sys.stdout) -> None: for (k, v) in sorted(self.cid2unichr.items()): - out.write('cid %d = unicode %r\n' % (k, v)) + out.write("cid %d = unicode %r\n" % (k, v)) class IdentityUnicodeMap(UnicodeMap): def get_unichr(self, cid: int) -> str: """Interpret character id as unicode codepoint""" - log.debug('get_unichr: %r, %r', self, cid) + log.debug("get_unichr: %r, %r", self, cid) return chr(cid) class FileCMap(CMap): - def add_code2cid(self, code: str, cid: int) -> None: - assert isinstance(code, str) and isinstance(cid, int),\ - str((type(code), type(cid))) + assert isinstance(code, str) and isinstance(cid, int), str( + (type(code), type(cid)) + ) d = self.code2cid for c in code[:-1]: ci = ord(c) @@ -178,9 +189,7 @@ class FileCMap(CMap): class FileUnicodeMap(UnicodeMap): - - def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int] - ) -> None: + def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]) -> None: assert isinstance(cid, int), str(type(cid)) if isinstance(code, PSLiteral): # Interpret as an Adobe glyph name. @@ -188,7 +197,7 @@ class FileUnicodeMap(UnicodeMap): self.cid2unichr[cid] = name2unicode(code.name) elif isinstance(code, bytes): # Interpret as UTF-16BE. - self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore') + self.cid2unichr[cid] = code.decode("UTF-16BE", "ignore") elif isinstance(code, int): self.cid2unichr[cid] = chr(code) else: @@ -196,21 +205,19 @@ class FileUnicodeMap(UnicodeMap): class PyCMap(CMap): - def __init__(self, name: str, module: Any) -> None: super().__init__(CMapName=name) self.code2cid = module.CODE2CID if module.IS_VERTICAL: - self.attrs['WMode'] = 1 + self.attrs["WMode"] = 1 class PyUnicodeMap(UnicodeMap): - def __init__(self, name: str, module: Any, vertical: bool) -> None: super().__init__(CMapName=name) if vertical: self.cid2unichr = module.CID2UNICHR_V - self.attrs['WMode'] = 1 + self.attrs["WMode"] = 1 else: self.cid2unichr = module.CID2UNICHR_H @@ -226,10 +233,12 @@ class CMapDB: @classmethod def _load_data(cls, name: str) -> Any: name = name.replace("\0", "") - filename = '%s.pickle.gz' % name - log.debug('loading: %r', name) - cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), - os.path.join(os.path.dirname(__file__), 'cmap'),) + filename = "%s.pickle.gz" % name + log.debug("loading: %r", name) + cmap_paths = ( + os.environ.get("CMAP_PATH", "/usr/share/pdfminer/"), + os.path.join(os.path.dirname(__file__), "cmap"), + ) for directory in cmap_paths: path = os.path.join(directory, filename) if os.path.exists(path): @@ -243,13 +252,13 @@ class CMapDB: @classmethod def get_cmap(cls, name: str) -> CMapBase: - if name == 'Identity-H': + if name == "Identity-H": return IdentityCMap(WMode=0) - elif name == 'Identity-V': + elif name == "Identity-V": return IdentityCMap(WMode=1) - elif name == 'OneByteIdentityH': + elif name == "OneByteIdentityH": return IdentityCMapByte(WMode=0) - elif name == 'OneByteIdentityV': + elif name == "OneByteIdentityV": return IdentityCMapByte(WMode=1) try: return cls._cmap_cache[name] @@ -265,14 +274,12 @@ class CMapDB: return cls._umap_cache[name][vertical] except KeyError: pass - data = cls._load_data('to-unicode-%s' % name) - cls._umap_cache[name] = [PyUnicodeMap(name, data, v) - for v in (False, True)] + data = cls._load_data("to-unicode-%s" % name) + cls._umap_cache[name] = [PyUnicodeMap(name, data, v) for v in (False, True)] return cls._umap_cache[name][vertical] class CMapParser(PSStackParser[PSKeyword]): - def __init__(self, cmap: CMapBase, fp: BinaryIO) -> None: PSStackParser.__init__(self, fp) self.cmap = cmap @@ -287,22 +294,22 @@ class CMapParser(PSStackParser[PSKeyword]): pass return - KEYWORD_BEGINCMAP = KWD(b'begincmap') - KEYWORD_ENDCMAP = KWD(b'endcmap') - KEYWORD_USECMAP = KWD(b'usecmap') - KEYWORD_DEF = KWD(b'def') - KEYWORD_BEGINCODESPACERANGE = KWD(b'begincodespacerange') - KEYWORD_ENDCODESPACERANGE = KWD(b'endcodespacerange') - KEYWORD_BEGINCIDRANGE = KWD(b'begincidrange') - KEYWORD_ENDCIDRANGE = KWD(b'endcidrange') - KEYWORD_BEGINCIDCHAR = KWD(b'begincidchar') - KEYWORD_ENDCIDCHAR = KWD(b'endcidchar') - KEYWORD_BEGINBFRANGE = KWD(b'beginbfrange') - KEYWORD_ENDBFRANGE = KWD(b'endbfrange') - KEYWORD_BEGINBFCHAR = KWD(b'beginbfchar') - KEYWORD_ENDBFCHAR = KWD(b'endbfchar') - KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange') - KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange') + KEYWORD_BEGINCMAP = KWD(b"begincmap") + KEYWORD_ENDCMAP = KWD(b"endcmap") + KEYWORD_USECMAP = KWD(b"usecmap") + KEYWORD_DEF = KWD(b"def") + KEYWORD_BEGINCODESPACERANGE = KWD(b"begincodespacerange") + KEYWORD_ENDCODESPACERANGE = KWD(b"endcodespacerange") + KEYWORD_BEGINCIDRANGE = KWD(b"begincidrange") + KEYWORD_ENDCIDRANGE = KWD(b"endcidrange") + KEYWORD_BEGINCIDCHAR = KWD(b"begincidchar") + KEYWORD_ENDCIDCHAR = KWD(b"endcidchar") + KEYWORD_BEGINBFRANGE = KWD(b"beginbfrange") + KEYWORD_ENDBFRANGE = KWD(b"endbfrange") + KEYWORD_BEGINBFCHAR = KWD(b"beginbfchar") + KEYWORD_ENDBFCHAR = KWD(b"endbfchar") + KEYWORD_BEGINNOTDEFRANGE = KWD(b"beginnotdefrange") + KEYWORD_ENDNOTDEFRANGE = KWD(b"endnotdefrange") def do_keyword(self, pos: int, token: PSKeyword) -> None: if token is self.KEYWORD_BEGINCMAP: @@ -346,8 +353,12 @@ class CMapParser(PSStackParser[PSKeyword]): if token is self.KEYWORD_ENDCIDRANGE: objs = [obj for (__, obj) in self.popall()] for (s, e, cid) in choplist(3, objs): - if (not isinstance(s, bytes) or not isinstance(e, bytes) or - not isinstance(cid, int) or len(s) != len(e)): + if ( + not isinstance(s, bytes) + or not isinstance(e, bytes) + or not isinstance(cid, int) + or len(s) != len(e) + ): continue sprefix = s[:-4] eprefix = e[:-4] @@ -358,9 +369,9 @@ class CMapParser(PSStackParser[PSKeyword]): s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) - for i in range(e1-s1+1): - x = sprefix+struct.pack('>L', s1+i)[-vlen:] - self.cmap.add_cid2unichr(cid+i, x) + for i in range(e1 - s1 + 1): + x = sprefix + struct.pack(">L", s1 + i)[-vlen:] + self.cmap.add_cid2unichr(cid + i, x) return if token is self.KEYWORD_BEGINCIDCHAR: @@ -379,23 +390,26 @@ class CMapParser(PSStackParser[PSKeyword]): if token is self.KEYWORD_ENDBFRANGE: objs = [obj for (__, obj) in self.popall()] for (s, e, code) in choplist(3, objs): - if (not isinstance(s, bytes) or not isinstance(e, bytes) or - len(s) != len(e)): + if ( + not isinstance(s, bytes) + or not isinstance(e, bytes) + or len(s) != len(e) + ): continue s1 = nunpack(s) e1 = nunpack(e) if isinstance(code, list): - for i in range(e1-s1+1): - self.cmap.add_cid2unichr(s1+i, code[i]) + for i in range(e1 - s1 + 1): + self.cmap.add_cid2unichr(s1 + i, code[i]) else: assert isinstance(code, bytes) var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) - for i in range(e1-s1+1): - x = prefix+struct.pack('>L', base+i)[-vlen:] - self.cmap.add_cid2unichr(s1+i, x) + for i in range(e1 - s1 + 1): + x = prefix + struct.pack(">L", base + i)[-vlen:] + self.cmap.add_cid2unichr(s1 + i, x) return if token is self.KEYWORD_BEGINBFCHAR: @@ -422,7 +436,7 @@ class CMapParser(PSStackParser[PSKeyword]): def main(argv: List[str]) -> None: args = argv[1:] for fname in args: - fp = open(fname, 'rb') + fp = open(fname, "rb") cmap = FileUnicodeMap() CMapParser(cmap, fp).run() fp.close() @@ -430,5 +444,5 @@ def main(argv: List[str]) -> None: return -if __name__ == '__main__': +if __name__ == "__main__": main(sys.argv) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 14da61b..3516c78 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,8 +1,19 @@ import io import logging import re -from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO, - Tuple, TypeVar, Union, cast) +from typing import ( + BinaryIO, + Dict, + Generic, + List, + Optional, + Sequence, + TextIO, + Tuple, + TypeVar, + Union, + cast, +) from pdfminer.pdfcolor import PDFColorSpace from . import utils @@ -46,7 +57,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): self, rsrcmgr: PDFResourceManager, pageno: int = 1, - laparams: Optional[LAParams] = None + laparams: Optional[LAParams] = None, ) -> None: PDFTextDevice.__init__(self, rsrcmgr) self.pageno = pageno @@ -57,7 +68,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): (x0, y0, x1, y1) = page.mediabox (x0, y0) = apply_matrix_pt(ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(ctm, (x1, y1)) - mediabox = (0, 0, abs(x0-x1), abs(y0-y1)) + mediabox = (0, 0, abs(x0 - x1), abs(y0 - y1)) self.cur_item = LTPage(self.pageno, mediabox) def end_page(self, page: PDFPage) -> None: @@ -80,9 +91,11 @@ class PDFLayoutAnalyzer(PDFTextDevice): def render_image(self, name: str, stream: PDFStream) -> None: assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) - item = LTImage(name, stream, - (self.cur_item.x0, self.cur_item.y0, - self.cur_item.x1, self.cur_item.y1)) + item = LTImage( + name, + stream, + (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1), + ) self.cur_item.add(item) def paint_path( @@ -91,15 +104,15 @@ class PDFLayoutAnalyzer(PDFTextDevice): stroke: bool, fill: bool, evenodd: bool, - path: Sequence[PathSegment] + path: Sequence[PathSegment], ) -> None: """Paint paths described in section 4.4 of the PDF reference manual""" - shape = ''.join(x[0] for x in path) + shape = "".join(x[0] for x in path) - if shape.count('m') > 1: + if shape.count("m") > 1: # recurse if there are multiple m's in this shape - for m in re.finditer(r'm[^m]+', shape): - subpath = path[m.start(0):m.end(0)] + for m in re.finditer(r"m[^m]+", shape): + subpath = path[m.start(0) : m.end(0)] self.paint_path(gstate, stroke, fill, evenodd, subpath) else: @@ -110,38 +123,68 @@ class PDFLayoutAnalyzer(PDFTextDevice): # And, per Section 4.4's Table 4.9, all other path commands place # their point-position in their final two arguments. (Any preceding # arguments represent control points on Bézier curves.) - raw_pts = [cast(Point, p[-2:] if p[0] != 'h' else path[0][-2:]) - for p in path] + raw_pts = [ + cast(Point, p[-2:] if p[0] != "h" else path[0][-2:]) for p in path + ] pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts] - if shape in {'mlh', 'ml'}: + if shape in {"mlh", "ml"}: # single line segment # # Note: 'ml', in conditional above, is a frequent anomaly # that we want to support. - line = LTLine(gstate.linewidth, pts[0], pts[1], stroke, - fill, evenodd, gstate.scolor, gstate.ncolor) + line = LTLine( + gstate.linewidth, + pts[0], + pts[1], + stroke, + fill, + evenodd, + gstate.scolor, + gstate.ncolor, + ) self.cur_item.add(line) - elif shape in {'mlllh', 'mllll'}: + elif shape in {"mlllh", "mllll"}: (x0, y0), (x1, y1), (x2, y2), (x3, y3), _ = pts - is_closed_loop = (pts[0] == pts[4]) - has_square_coordinates = \ - (x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) \ - or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0) + is_closed_loop = pts[0] == pts[4] + has_square_coordinates = ( + x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0 + ) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0) if is_closed_loop and has_square_coordinates: - rect = LTRect(gstate.linewidth, (*pts[0], *pts[2]), stroke, - fill, evenodd, gstate.scolor, gstate.ncolor) + rect = LTRect( + gstate.linewidth, + (*pts[0], *pts[2]), + stroke, + fill, + evenodd, + gstate.scolor, + gstate.ncolor, + ) self.cur_item.add(rect) else: - curve = LTCurve(gstate.linewidth, pts, stroke, fill, - evenodd, gstate.scolor, gstate.ncolor) + curve = LTCurve( + gstate.linewidth, + pts, + stroke, + fill, + evenodd, + gstate.scolor, + gstate.ncolor, + ) self.cur_item.add(curve) else: - curve = LTCurve(gstate.linewidth, pts, stroke, fill, evenodd, - gstate.scolor, gstate.ncolor) + curve = LTCurve( + gstate.linewidth, + pts, + stroke, + fill, + evenodd, + gstate.scolor, + gstate.ncolor, + ) self.cur_item.add(curve) def render_char( @@ -153,7 +196,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): rise: float, cid: int, ncs: PDFColorSpace, - graphicstate: PDFGraphicState + graphicstate: PDFGraphicState, ) -> float: try: text = font.to_unichr(cid) @@ -162,14 +205,24 @@ class PDFLayoutAnalyzer(PDFTextDevice): text = self.handle_undefined_char(font, cid) textwidth = font.char_width(cid) textdisp = font.char_disp(cid) - item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth, - textdisp, ncs, graphicstate) + item = LTChar( + matrix, + font, + fontsize, + scaling, + rise, + text, + textwidth, + textdisp, + ncs, + graphicstate, + ) self.cur_item.add(item) return item.adv def handle_undefined_char(self, font: PDFFont, cid: int) -> str: - log.debug('undefined: %r, %r', font, cid) - return '(cid:%d)' % cid + log.debug("undefined: %r, %r", font, cid) + return "(cid:%d)" % cid def receive_layout(self, ltpage: LTPage) -> None: pass @@ -180,10 +233,9 @@ class PDFPageAggregator(PDFLayoutAnalyzer): self, rsrcmgr: PDFResourceManager, pageno: int = 1, - laparams: Optional[LAParams] = None + laparams: Optional[LAParams] = None, ) -> None: - PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, - laparams=laparams) + PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) self.result: Optional[LTPage] = None def receive_layout(self, ltpage: LTPage) -> None: @@ -195,7 +247,7 @@ class PDFPageAggregator(PDFLayoutAnalyzer): # Some PDFConverter children support only binary I/O -IOType = TypeVar('IOType', TextIO, BinaryIO, AnyIO) +IOType = TypeVar("IOType", TextIO, BinaryIO, AnyIO) class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]): @@ -203,12 +255,11 @@ class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]): self, rsrcmgr: PDFResourceManager, outfp: IOType, - codec: str = 'utf-8', + codec: str = "utf-8", pageno: int = 1, - laparams: Optional[LAParams] = None + laparams: Optional[LAParams] = None, ) -> None: - PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, - laparams=laparams) + PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) self.outfp: IOType = outfp self.codec = codec self.outfp_binary = self._is_binary_stream(self.outfp) @@ -216,9 +267,9 @@ class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]): @staticmethod def _is_binary_stream(outfp: AnyIO) -> bool: """Test if an stream is binary or not""" - if 'b' in getattr(outfp, 'mode', ''): + if "b" in getattr(outfp, "mode", ""): return True - elif hasattr(outfp, 'mode'): + elif hasattr(outfp, "mode"): # output stream has a mode, but it does not contain 'b' return False elif isinstance(outfp, io.BytesIO): @@ -236,19 +287,18 @@ class TextConverter(PDFConverter[AnyIO]): self, rsrcmgr: PDFResourceManager, outfp: AnyIO, - codec: str = 'utf-8', + codec: str = "utf-8", pageno: int = 1, laparams: Optional[LAParams] = None, showpageno: bool = False, - imagewriter: Optional[ImageWriter] = None + imagewriter: Optional[ImageWriter] = None, ) -> None: - super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno, - laparams=laparams) + super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams) self.showpageno = showpageno self.imagewriter = imagewriter def write_text(self, text: str) -> None: - text = utils.compatible_encode_method(text, self.codec, 'ignore') + text = utils.compatible_encode_method(text, self.codec, "ignore") if self.outfp_binary: cast(BinaryIO, self.outfp).write(text.encode()) else: @@ -262,14 +312,15 @@ class TextConverter(PDFConverter[AnyIO]): elif isinstance(item, LTText): self.write_text(item.get_text()) if isinstance(item, LTTextBox): - self.write_text('\n') + self.write_text("\n") elif isinstance(item, LTImage): if self.imagewriter is not None: self.imagewriter.export_image(item) + if self.showpageno: - self.write_text('Page %s\n' % ltpage.pageid) + self.write_text("Page %s\n" % ltpage.pageid) render(ltpage) - self.write_text('\f') + self.write_text("\f") # Some dummy functions to save memory/CPU when all that is wanted # is text. This stops all the image and drawing output from being @@ -286,54 +337,55 @@ class TextConverter(PDFConverter[AnyIO]): stroke: bool, fill: bool, evenodd: bool, - path: Sequence[PathSegment] + path: Sequence[PathSegment], ) -> None: return class HTMLConverter(PDFConverter[AnyIO]): RECT_COLORS = { - 'figure': 'yellow', - 'textline': 'magenta', - 'textbox': 'cyan', - 'textgroup': 'red', - 'curve': 'black', - 'page': 'gray', + "figure": "yellow", + "textline": "magenta", + "textbox": "cyan", + "textgroup": "red", + "curve": "black", + "page": "gray", } TEXT_COLORS = { - 'textbox': 'blue', - 'char': 'black', + "textbox": "blue", + "char": "black", } def __init__( self, rsrcmgr: PDFResourceManager, outfp: AnyIO, - codec: str = 'utf-8', + codec: str = "utf-8", pageno: int = 1, laparams: Optional[LAParams] = None, scale: float = 1, fontscale: float = 1.0, - layoutmode: str = 'normal', + layoutmode: str = "normal", showpageno: bool = True, pagemargin: int = 50, imagewriter: Optional[ImageWriter] = None, debug: int = 0, rect_colors: Optional[Dict[str, str]] = None, - text_colors: Optional[Dict[str, str]] = None + text_colors: Optional[Dict[str, str]] = None, ) -> None: - PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, - laparams=laparams) + PDFConverter.__init__( + self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams + ) # write() assumes a codec for binary I/O, or no codec for text I/O. if self.outfp_binary == (not self.codec): raise ValueError("Codec is required for a binary I/O output") if text_colors is None: - text_colors = {'char': 'black'} + text_colors = {"char": "black"} if rect_colors is None: - rect_colors = {'curve': 'black', 'page': 'gray'} + rect_colors = {"curve": "black", "page": "gray"} self.scale = scale self.fontscale = fontscale @@ -360,23 +412,27 @@ class HTMLConverter(PDFConverter[AnyIO]): return def write_header(self) -> None: - self.write('\n') + self.write("\n") if self.codec: - s = '\n' % self.codec + ) else: s = '\n' self.write(s) - self.write('\n') + self.write("\n") return def write_footer(self) -> None: - page_links = ['{}'.format(i, i) - for i in range(1, self.pageno)] - s = '
Page: %s
\n' % \ - ', '.join(page_links) + page_links = [ + '{}'.format(i, i) for i in range(1, self.pageno) + ] + s = '
Page: %s
\n' % ", ".join( + page_links + ) self.write(s) - self.write('\n') + self.write("\n") return def write_text(self, text: str) -> None: @@ -384,71 +440,67 @@ class HTMLConverter(PDFConverter[AnyIO]): return def place_rect( - self, - color: str, - borderwidth: int, - x: float, - y: float, - w: float, - h: float + self, color: str, borderwidth: int, x: float, y: float, w: float, h: float ) -> None: color2 = self.rect_colors.get(color) if color2 is not None: - s = '\n' % \ - (color2, borderwidth, x * self.scale, - (self._yoffset - y) * self.scale, w * self.scale, - h * self.scale) - self.write( - s) + s = ( + '\n' + % ( + color2, + borderwidth, + x * self.scale, + (self._yoffset - y) * self.scale, + w * self.scale, + h * self.scale, + ) + ) + self.write(s) return - def place_border( - self, - color: str, - borderwidth: int, - item: LTComponent - ) -> None: - self.place_rect(color, borderwidth, item.x0, item.y1, item.width, - item.height) + def place_border(self, color: str, borderwidth: int, item: LTComponent) -> None: + self.place_rect(color, borderwidth, item.x0, item.y1, item.width, item.height) return def place_image( - self, - item: LTImage, - borderwidth: int, - x: float, - y: float, - w: float, - h: float + self, item: LTImage, borderwidth: int, x: float, y: float, w: float, h: float ) -> None: if self.imagewriter is not None: name = self.imagewriter.export_image(item) - s = '\n' % \ - (enc(name), borderwidth, x * self.scale, - (self._yoffset - y) * self.scale, w * self.scale, - h * self.scale) + s = ( + '\n' + % ( + enc(name), + borderwidth, + x * self.scale, + (self._yoffset - y) * self.scale, + w * self.scale, + h * self.scale, + ) + ) self.write(s) return def place_text( - self, - color: str, - text: str, - x: float, - y: float, - size: float + self, color: str, text: str, x: float, y: float, size: float ) -> None: color2 = self.text_colors.get(color) if color2 is not None: - s = '' % \ - (color2, x * self.scale, (self._yoffset - y) * self.scale, - size * self.scale * self.fontscale) + s = ( + '' + % ( + color2, + x * self.scale, + (self._yoffset - y) * self.scale, + size * self.scale * self.fontscale, + ) + ) self.write(s) self.write_text(text) - self.write('\n') + self.write("\n") return def begin_div( @@ -459,47 +511,57 @@ class HTMLConverter(PDFConverter[AnyIO]): y: float, w: float, h: float, - writing_mode: str = 'False' + writing_mode: str = "False", ) -> None: self._fontstack.append(self._font) self._font = None - s = '
' % \ - (color, borderwidth, writing_mode, x * self.scale, - (self._yoffset - y) * self.scale, w * self.scale, h * self.scale) + s = ( + '
' + % ( + color, + borderwidth, + writing_mode, + x * self.scale, + (self._yoffset - y) * self.scale, + w * self.scale, + h * self.scale, + ) + ) self.write(s) return def end_div(self, color: str) -> None: if self._font is not None: - self.write('') + self.write("") self._font = self._fontstack.pop() - self.write('
') + self.write("
") return def put_text(self, text: str, fontname: str, fontsize: float) -> None: font = (fontname, fontsize) if font != self._font: if self._font is not None: - self.write('') + self.write("") # Remove subset tag from fontname, see PDF Reference 5.5.3 - fontname_without_subset_tag = fontname.split('+')[-1] - self.write('' % - (fontname_without_subset_tag, - fontsize * self.scale * self.fontscale)) + fontname_without_subset_tag = fontname.split("+")[-1] + self.write( + '' + % (fontname_without_subset_tag, fontsize * self.scale * self.fontscale) + ) self._font = font self.write_text(text) return def put_newline(self) -> None: - self.write('
') + self.write("
") return def receive_layout(self, ltpage: LTPage) -> None: def show_group(item: Union[LTTextGroup, TextGroupElement]) -> None: if isinstance(item, LTTextGroup): - self.place_border('textgroup', 1, item) + self.place_border("textgroup", 1, item) for child in item: show_group(child) return @@ -508,63 +570,74 @@ class HTMLConverter(PDFConverter[AnyIO]): child: LTItem if isinstance(item, LTPage): self._yoffset += item.y1 - self.place_border('page', 1, item) + self.place_border("page", 1, item) if self.showpageno: - self.write('
' % - ((self._yoffset-item.y1)*self.scale)) - self.write('Page {}
\n' - .format(item.pageid, item.pageid)) + self.write( + '
' + % ((self._yoffset - item.y1) * self.scale) + ) + self.write( + 'Page {}
\n'.format( + item.pageid, item.pageid + ) + ) for child in item: render(child) if item.groups is not None: for group in item.groups: show_group(group) elif isinstance(item, LTCurve): - self.place_border('curve', 1, item) + self.place_border("curve", 1, item) elif isinstance(item, LTFigure): - self.begin_div('figure', 1, item.x0, item.y1, item.width, - item.height) + self.begin_div("figure", 1, item.x0, item.y1, item.width, item.height) for child in item: render(child) - self.end_div('figure') + self.end_div("figure") elif isinstance(item, LTImage): - self.place_image(item, 1, item.x0, item.y1, item.width, - item.height) + self.place_image(item, 1, item.x0, item.y1, item.width, item.height) else: - if self.layoutmode == 'exact': + if self.layoutmode == "exact": if isinstance(item, LTTextLine): - self.place_border('textline', 1, item) + self.place_border("textline", 1, item) for child in item: render(child) elif isinstance(item, LTTextBox): - self.place_border('textbox', 1, item) - self.place_text('textbox', str(item.index+1), item.x0, - item.y1, 20) + self.place_border("textbox", 1, item) + self.place_text( + "textbox", str(item.index + 1), item.x0, item.y1, 20 + ) for child in item: render(child) elif isinstance(item, LTChar): - self.place_border('char', 1, item) - self.place_text('char', item.get_text(), item.x0, - item.y1, item.size) + self.place_border("char", 1, item) + self.place_text( + "char", item.get_text(), item.x0, item.y1, item.size + ) else: if isinstance(item, LTTextLine): for child in item: render(child) - if self.layoutmode != 'loose': + if self.layoutmode != "loose": self.put_newline() elif isinstance(item, LTTextBox): - self.begin_div('textbox', 1, item.x0, item.y1, - item.width, item.height, - item.get_writing_mode()) + self.begin_div( + "textbox", + 1, + item.x0, + item.y1, + item.width, + item.height, + item.get_writing_mode(), + ) for child in item: render(child) - self.end_div('textbox') + self.end_div("textbox") elif isinstance(item, LTChar): - self.put_text(item.get_text(), item.fontname, - item.size) + self.put_text(item.get_text(), item.fontname, item.size) elif isinstance(item, LTText): self.write_text(item.get_text()) return + render(ltpage) self._yoffset += self.pagemargin return @@ -576,20 +649,21 @@ class HTMLConverter(PDFConverter[AnyIO]): class XMLConverter(PDFConverter[AnyIO]): - CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]') + CONTROL = re.compile("[\x00-\x08\x0b-\x0c\x0e-\x1f]") def __init__( self, rsrcmgr: PDFResourceManager, outfp: AnyIO, - codec: str = 'utf-8', + codec: str = "utf-8", pageno: int = 1, laparams: Optional[LAParams] = None, imagewriter: Optional[ImageWriter] = None, - stripcontrol: bool = False + stripcontrol: bool = False, ) -> None: - PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, - laparams=laparams) + PDFConverter.__init__( + self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams + ) # write() assumes a codec for binary I/O, or no codec for text I/O. if self.outfp_binary == (not self.codec): @@ -612,100 +686,125 @@ class XMLConverter(PDFConverter[AnyIO]): self.write('\n' % self.codec) else: self.write('\n') - self.write('\n') + self.write("\n") return def write_footer(self) -> None: - self.write('\n') + self.write("\n") return def write_text(self, text: str) -> None: if self.stripcontrol: - text = self.CONTROL.sub('', text) + text = self.CONTROL.sub("", text) self.write(enc(text)) return def receive_layout(self, ltpage: LTPage) -> None: def show_group(item: LTItem) -> None: if isinstance(item, LTTextBox): - self.write('\n' % - (item.index, bbox2str(item.bbox))) + self.write( + '\n' + % (item.index, bbox2str(item.bbox)) + ) elif isinstance(item, LTTextGroup): self.write('\n' % bbox2str(item.bbox)) for child in item: show_group(child) - self.write('\n') + self.write("\n") return def render(item: LTItem) -> None: child: LTItem if isinstance(item, LTPage): - s = '\n' % \ - (item.pageid, bbox2str(item.bbox), item.rotate) + s = '\n' % ( + item.pageid, + bbox2str(item.bbox), + item.rotate, + ) self.write(s) for child in item: render(child) if item.groups is not None: - self.write('\n') + self.write("\n") for group in item.groups: show_group(group) - self.write('\n') - self.write('\n') + self.write("\n") + self.write("\n") elif isinstance(item, LTLine): - s = '\n' % \ - (item.linewidth, bbox2str(item.bbox)) + s = '\n' % ( + item.linewidth, + bbox2str(item.bbox), + ) self.write(s) elif isinstance(item, LTRect): - s = '\n' % \ - (item.linewidth, bbox2str(item.bbox)) + s = '\n' % ( + item.linewidth, + bbox2str(item.bbox), + ) self.write(s) elif isinstance(item, LTCurve): - s = '\n' % \ - (item.linewidth, bbox2str(item.bbox), item.get_pts()) + s = '\n' % ( + item.linewidth, + bbox2str(item.bbox), + item.get_pts(), + ) self.write(s) elif isinstance(item, LTFigure): - s = '
\n' % \ - (item.name, bbox2str(item.bbox)) + s = '
\n' % (item.name, bbox2str(item.bbox)) self.write(s) for child in item: render(child) - self.write('
\n') + self.write("
\n") elif isinstance(item, LTTextLine): self.write('\n' % bbox2str(item.bbox)) for child in item: render(child) - self.write('\n') + self.write("\n") elif isinstance(item, LTTextBox): - wmode = '' + wmode = "" if isinstance(item, LTTextBoxVertical): wmode = ' wmode="vertical"' - s = '\n' %\ - (item.index, bbox2str(item.bbox), wmode) + s = '\n' % ( + item.index, + bbox2str(item.bbox), + wmode, + ) self.write(s) for child in item: render(child) - self.write('\n') + self.write("\n") elif isinstance(item, LTChar): - s = '' % \ - (enc(item.fontname), bbox2str(item.bbox), - item.ncs.name, item.graphicstate.ncolor, item.size) + s = ( + '' + % ( + enc(item.fontname), + bbox2str(item.bbox), + item.ncs.name, + item.graphicstate.ncolor, + item.size, + ) + ) self.write(s) self.write_text(item.get_text()) - self.write('\n') + self.write("\n") elif isinstance(item, LTText): - self.write('%s\n' % item.get_text()) + self.write("%s\n" % item.get_text()) elif isinstance(item, LTImage): if self.imagewriter is not None: name = self.imagewriter.export_image(item) - self.write('\n' % - (enc(name), item.width, item.height)) + self.write( + '\n' + % (enc(name), item.width, item.height) + ) else: - self.write('\n' % - (item.width, item.height)) + self.write( + '\n' % (item.width, item.height) + ) else: - assert False, str(('Unhandled', item)) + assert False, str(("Unhandled", item)) return + render(ltpage) return diff --git a/pdfminer/data_structures.py b/pdfminer/data_structures.py index a372dfb..6e3f985 100644 --- a/pdfminer/data_structures.py +++ b/pdfminer/data_structures.py @@ -11,18 +11,19 @@ class NumberTree: See Section 3.8.6 of the PDF Reference. """ + def __init__(self, obj: Any): self._obj = dict_value(obj) self.nums: Optional[Iterable[Any]] = None self.kids: Optional[Iterable[Any]] = None self.limits: Optional[Iterable[Any]] = None - if 'Nums' in self._obj: - self.nums = list_value(self._obj['Nums']) - if 'Kids' in self._obj: - self.kids = list_value(self._obj['Kids']) - if 'Limits' in self._obj: - self.limits = list_value(self._obj['Limits']) + if "Nums" in self._obj: + self.nums = list_value(self._obj["Nums"]) + if "Kids" in self._obj: + self.kids = list_value(self._obj["Kids"]) + if "Limits" in self._obj: + self.limits = list_value(self._obj["Limits"]) def _parse(self) -> List[Tuple[int, Any]]: items = [] @@ -44,7 +45,7 @@ class NumberTree: if settings.STRICT: if not all(a[0] <= b[0] for a, b in zip(values, values[1:])): - raise PDFSyntaxError('Number tree elements are out of order') + raise PDFSyntaxError("Number tree elements are out of order") else: values.sort(key=lambda t: t[0]) diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py index 3db476f..b8ec731 100644 --- a/pdfminer/encodingdb.py +++ b/pdfminer/encodingdb.py @@ -6,7 +6,7 @@ from .glyphlist import glyphname2unicode from .latin_enc import ENCODING from .psparser import PSLiteral -HEXADECIMAL = re.compile(r'[0-9a-fA-F]+') +HEXADECIMAL = re.compile(r"[0-9a-fA-F]+") log = logging.getLogger(__name__) @@ -25,39 +25,41 @@ def name2unicode(name: str) -> str: :returns unicode character if name resembles something, otherwise a KeyError """ - name = name.split('.')[0] - components = name.split('_') + name = name.split(".")[0] + components = name.split("_") if len(components) > 1: - return ''.join(map(name2unicode, components)) + return "".join(map(name2unicode, components)) else: if name in glyphname2unicode: return glyphname2unicode[name] - elif name.startswith('uni'): - name_without_uni = name.strip('uni') + elif name.startswith("uni"): + name_without_uni = name.strip("uni") - if HEXADECIMAL.match(name_without_uni) and \ - len(name_without_uni) % 4 == 0: - unicode_digits = [int(name_without_uni[i:i + 4], base=16) - for i in range(0, len(name_without_uni), 4)] + if HEXADECIMAL.match(name_without_uni) and len(name_without_uni) % 4 == 0: + unicode_digits = [ + int(name_without_uni[i : i + 4], base=16) + for i in range(0, len(name_without_uni), 4) + ] for digit in unicode_digits: raise_key_error_for_invalid_unicode(digit) characters = map(chr, unicode_digits) - return ''.join(characters) + return "".join(characters) - elif name.startswith('u'): - name_without_u = name.strip('u') + elif name.startswith("u"): + name_without_u = name.strip("u") - if HEXADECIMAL.match(name_without_u) and \ - 4 <= len(name_without_u) <= 6: + if HEXADECIMAL.match(name_without_u) and 4 <= len(name_without_u) <= 6: unicode_digit = int(name_without_u, base=16) raise_key_error_for_invalid_unicode(unicode_digit) return chr(unicode_digit) - raise KeyError('Could not convert unicode name "%s" to character because ' - 'it does not match specification' % name) + raise KeyError( + 'Could not convert unicode name "%s" to character because ' + "it does not match specification" % name + ) def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None: @@ -67,8 +69,10 @@ def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None: :raises KeyError if unicode digit is invalid """ if 55295 < unicode_digit < 57344: - raise KeyError('Unicode digit %d is invalid because ' - 'it is in the range D800 through DFFF' % unicode_digit) + raise KeyError( + "Unicode digit %d is invalid because " + "it is in the range D800 through DFFF" % unicode_digit + ) class EncodingDB: @@ -89,17 +93,15 @@ class EncodingDB: pdf2unicode[pdf] = c encodings = { - 'StandardEncoding': std2unicode, - 'MacRomanEncoding': mac2unicode, - 'WinAnsiEncoding': win2unicode, - 'PDFDocEncoding': pdf2unicode, + "StandardEncoding": std2unicode, + "MacRomanEncoding": mac2unicode, + "WinAnsiEncoding": win2unicode, + "PDFDocEncoding": pdf2unicode, } @classmethod def get_encoding( - cls, - name: str, - diff: Optional[Iterable[object]] = None + cls, name: str, diff: Optional[Iterable[object]] = None ) -> Dict[int, str]: cid2unicode = cls.encodings.get(name, cls.std2unicode) if diff: diff --git a/pdfminer/fontmetrics.py b/pdfminer/fontmetrics.py index 2a1e36a..4fdf28b 100644 --- a/pdfminer/fontmetrics.py +++ b/pdfminer/fontmetrics.py @@ -29,18 +29,4379 @@ The following data were extracted from the AFM files: # flake8: noqa FONT_METRICS = { - 'Courier': ({'FontName': 'Courier', 'Descent': -194.0, 'FontBBox': (-6.0, -249.0, 639.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': 0.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}), - 'Courier-Bold': ({'FontName': 'Courier-Bold', 'Descent': -194.0, 'FontBBox': (-88.0, -249.0, 697.0, 811.0), 'FontWeight': 'Bold', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': 0.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}), - 'Courier-BoldOblique': ({'FontName': 'Courier-BoldOblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 758.0, 811.0), 'FontWeight': 'Bold', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}), - 'Courier-Oblique': ({'FontName': 'Courier-Oblique', 'Descent': -194.0, 'FontBBox': (-49.0, -249.0, 749.0, 803.0), 'FontWeight': 'Medium', 'CapHeight': 572.0, 'FontFamily': 'Courier', 'Flags': 64, 'XHeight': 434.0, 'ItalicAngle': -11.0, 'Ascent': 627.0}, {' ': 600, '!': 600, '"': 600, '#': 600, '$': 600, '%': 600, '&': 600, "'": 600, '(': 600, ')': 600, '*': 600, '+': 600, ',': 600, '-': 600, '.': 600, '/': 600, '0': 600, '1': 600, '2': 600, '3': 600, '4': 600, '5': 600, '6': 600, '7': 600, '8': 600, '9': 600, ':': 600, ';': 600, '<': 600, '=': 600, '>': 600, '?': 600, '@': 600, 'A': 600, 'B': 600, 'C': 600, 'D': 600, 'E': 600, 'F': 600, 'G': 600, 'H': 600, 'I': 600, 'J': 600, 'K': 600, 'L': 600, 'M': 600, 'N': 600, 'O': 600, 'P': 600, 'Q': 600, 'R': 600, 'S': 600, 'T': 600, 'U': 600, 'V': 600, 'W': 600, 'X': 600, 'Y': 600, 'Z': 600, '[': 600, '\\': 600, ']': 600, '^': 600, '_': 600, '`': 600, 'a': 600, 'b': 600, 'c': 600, 'd': 600, 'e': 600, 'f': 600, 'g': 600, 'h': 600, 'i': 600, 'j': 600, 'k': 600, 'l': 600, 'm': 600, 'n': 600, 'o': 600, 'p': 600, 'q': 600, 'r': 600, 's': 600, 't': 600, 'u': 600, 'v': 600, 'w': 600, 'x': 600, 'y': 600, 'z': 600, '{': 600, '|': 600, '}': 600, '~': 600, '\xa1': 600, '\xa2': 600, '\xa3': 600, '\xa4': 600, '\xa5': 600, '\xa6': 600, '\xa7': 600, '\xa8': 600, '\xa9': 600, '\xaa': 600, '\xab': 600, '\xac': 600, '\xae': 600, '\xaf': 600, '\xb0': 600, '\xb1': 600, '\xb2': 600, '\xb3': 600, '\xb4': 600, '\xb5': 600, '\xb6': 600, '\xb7': 600, '\xb8': 600, '\xb9': 600, '\xba': 600, '\xbb': 600, '\xbc': 600, '\xbd': 600, '\xbe': 600, '\xbf': 600, '\xc0': 600, '\xc1': 600, '\xc2': 600, '\xc3': 600, '\xc4': 600, '\xc5': 600, '\xc6': 600, '\xc7': 600, '\xc8': 600, '\xc9': 600, '\xca': 600, '\xcb': 600, '\xcc': 600, '\xcd': 600, '\xce': 600, '\xcf': 600, '\xd0': 600, '\xd1': 600, '\xd2': 600, '\xd3': 600, '\xd4': 600, '\xd5': 600, '\xd6': 600, '\xd7': 600, '\xd8': 600, '\xd9': 600, '\xda': 600, '\xdb': 600, '\xdc': 600, '\xdd': 600, '\xde': 600, '\xdf': 600, '\xe0': 600, '\xe1': 600, '\xe2': 600, '\xe3': 600, '\xe4': 600, '\xe5': 600, '\xe6': 600, '\xe7': 600, '\xe8': 600, '\xe9': 600, '\xea': 600, '\xeb': 600, '\xec': 600, '\xed': 600, '\xee': 600, '\xef': 600, '\xf0': 600, '\xf1': 600, '\xf2': 600, '\xf3': 600, '\xf4': 600, '\xf5': 600, '\xf6': 600, '\xf7': 600, '\xf8': 600, '\xf9': 600, '\xfa': 600, '\xfb': 600, '\xfc': 600, '\xfd': 600, '\xfe': 600, '\xff': 600, '\u0100': 600, '\u0101': 600, '\u0102': 600, '\u0103': 600, '\u0104': 600, '\u0105': 600, '\u0106': 600, '\u0107': 600, '\u010c': 600, '\u010d': 600, '\u010e': 600, '\u010f': 600, '\u0110': 600, '\u0111': 600, '\u0112': 600, '\u0113': 600, '\u0116': 600, '\u0117': 600, '\u0118': 600, '\u0119': 600, '\u011a': 600, '\u011b': 600, '\u011e': 600, '\u011f': 600, '\u0122': 600, '\u0123': 600, '\u012a': 600, '\u012b': 600, '\u012e': 600, '\u012f': 600, '\u0130': 600, '\u0131': 600, '\u0136': 600, '\u0137': 600, '\u0139': 600, '\u013a': 600, '\u013b': 600, '\u013c': 600, '\u013d': 600, '\u013e': 600, '\u0141': 600, '\u0142': 600, '\u0143': 600, '\u0144': 600, '\u0145': 600, '\u0146': 600, '\u0147': 600, '\u0148': 600, '\u014c': 600, '\u014d': 600, '\u0150': 600, '\u0151': 600, '\u0152': 600, '\u0153': 600, '\u0154': 600, '\u0155': 600, '\u0156': 600, '\u0157': 600, '\u0158': 600, '\u0159': 600, '\u015a': 600, '\u015b': 600, '\u015e': 600, '\u015f': 600, '\u0160': 600, '\u0161': 600, '\u0162': 600, '\u0163': 600, '\u0164': 600, '\u0165': 600, '\u016a': 600, '\u016b': 600, '\u016e': 600, '\u016f': 600, '\u0170': 600, '\u0171': 600, '\u0172': 600, '\u0173': 600, '\u0178': 600, '\u0179': 600, '\u017a': 600, '\u017b': 600, '\u017c': 600, '\u017d': 600, '\u017e': 600, '\u0192': 600, '\u0218': 600, '\u0219': 600, '\u02c6': 600, '\u02c7': 600, '\u02d8': 600, '\u02d9': 600, '\u02da': 600, '\u02db': 600, '\u02dc': 600, '\u02dd': 600, '\u2013': 600, '\u2014': 600, '\u2018': 600, '\u2019': 600, '\u201a': 600, '\u201c': 600, '\u201d': 600, '\u201e': 600, '\u2020': 600, '\u2021': 600, '\u2022': 600, '\u2026': 600, '\u2030': 600, '\u2039': 600, '\u203a': 600, '\u2044': 600, '\u2122': 600, '\u2202': 600, '\u2206': 600, '\u2211': 600, '\u2212': 600, '\u221a': 600, '\u2260': 600, '\u2264': 600, '\u2265': 600, '\u25ca': 600, '\uf6c3': 600, '\ufb01': 600, '\ufb02': 600}), - 'Helvetica': ({'FontName': 'Helvetica', 'Descent': -207.0, 'FontBBox': (-166.0, -225.0, 1000.0, 931.0), 'FontWeight': 'Medium', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 523.0, 'ItalicAngle': 0.0, 'Ascent': 718.0}, {' ': 278, '!': 278, '"': 355, '#': 556, '$': 556, '%': 889, '&': 667, "'": 191, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 278, ';': 278, '<': 584, '=': 584, '>': 584, '?': 556, '@': 1015, 'A': 667, 'B': 667, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 500, 'K': 667, 'L': 556, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 278, '\\': 278, ']': 278, '^': 469, '_': 556, '`': 333, 'a': 556, 'b': 556, 'c': 500, 'd': 556, 'e': 556, 'f': 278, 'g': 556, 'h': 556, 'i': 222, 'j': 222, 'k': 500, 'l': 222, 'm': 833, 'n': 556, 'o': 556, 'p': 556, 'q': 556, 'r': 333, 's': 500, 't': 278, 'u': 556, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 500, '{': 334, '|': 260, '}': 334, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 260, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 556, '\xb6': 537, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 667, '\xc1': 667, '\xc2': 667, '\xc3': 667, '\xc4': 667, '\xc5': 667, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 500, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 556, '\xf1': 556, '\xf2': 556, '\xf3': 556, '\xf4': 556, '\xf5': 556, '\xf6': 556, '\xf7': 584, '\xf8': 611, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 500, '\xfe': 556, '\xff': 500, '\u0100': 667, '\u0101': 556, '\u0102': 667, '\u0103': 556, '\u0104': 667, '\u0105': 556, '\u0106': 722, '\u0107': 500, '\u010c': 722, '\u010d': 500, '\u010e': 722, '\u010f': 643, '\u0110': 722, '\u0111': 556, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 556, '\u0122': 778, '\u0123': 556, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 222, '\u0130': 278, '\u0131': 278, '\u0136': 667, '\u0137': 500, '\u0139': 556, '\u013a': 222, '\u013b': 556, '\u013c': 222, '\u013d': 556, '\u013e': 299, '\u0141': 556, '\u0142': 222, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 778, '\u014d': 556, '\u0150': 778, '\u0151': 556, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 333, '\u0156': 722, '\u0157': 333, '\u0158': 722, '\u0159': 333, '\u015a': 667, '\u015b': 500, '\u015e': 667, '\u015f': 500, '\u0160': 667, '\u0161': 500, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 317, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 500, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 222, '\u2019': 222, '\u201a': 222, '\u201c': 333, '\u201d': 333, '\u201e': 333, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 500, '\ufb02': 500}), - 'Helvetica-Bold': ({'FontName': 'Helvetica-Bold', 'Descent': -207.0, 'FontBBox': (-170.0, -228.0, 1003.0, 962.0), 'FontWeight': 'Bold', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 532.0, 'ItalicAngle': 0.0, 'Ascent': 718.0}, {' ': 278, '!': 333, '"': 474, '#': 556, '$': 556, '%': 889, '&': 722, "'": 238, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 333, ';': 333, '<': 584, '=': 584, '>': 584, '?': 611, '@': 975, 'A': 722, 'B': 722, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 556, 'K': 722, 'L': 611, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 584, '_': 556, '`': 333, 'a': 556, 'b': 611, 'c': 556, 'd': 611, 'e': 556, 'f': 333, 'g': 611, 'h': 611, 'i': 278, 'j': 278, 'k': 556, 'l': 278, 'm': 889, 'n': 611, 'o': 611, 'p': 611, 'q': 611, 'r': 389, 's': 556, 't': 333, 'u': 611, 'v': 556, 'w': 778, 'x': 556, 'y': 556, 'z': 500, '{': 389, '|': 280, '}': 389, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 280, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 611, '\xb6': 556, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 556, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 611, '\xf1': 611, '\xf2': 611, '\xf3': 611, '\xf4': 611, '\xf5': 611, '\xf6': 611, '\xf7': 584, '\xf8': 611, '\xf9': 611, '\xfa': 611, '\xfb': 611, '\xfc': 611, '\xfd': 556, '\xfe': 611, '\xff': 556, '\u0100': 722, '\u0101': 556, '\u0102': 722, '\u0103': 556, '\u0104': 722, '\u0105': 556, '\u0106': 722, '\u0107': 556, '\u010c': 722, '\u010d': 556, '\u010e': 722, '\u010f': 743, '\u0110': 722, '\u0111': 611, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 611, '\u0122': 778, '\u0123': 611, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 278, '\u0130': 278, '\u0131': 278, '\u0136': 722, '\u0137': 556, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 400, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 611, '\u0145': 722, '\u0146': 611, '\u0147': 722, '\u0148': 611, '\u014c': 778, '\u014d': 611, '\u0150': 778, '\u0151': 611, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 389, '\u0156': 722, '\u0157': 389, '\u0158': 722, '\u0159': 389, '\u015a': 667, '\u015b': 556, '\u015e': 667, '\u015f': 556, '\u0160': 667, '\u0161': 556, '\u0162': 611, '\u0163': 333, '\u0164': 611, '\u0165': 389, '\u016a': 722, '\u016b': 611, '\u016e': 722, '\u016f': 611, '\u0170': 722, '\u0171': 611, '\u0172': 722, '\u0173': 611, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 556, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 278, '\u2019': 278, '\u201a': 278, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 611, '\ufb02': 611}), - 'Helvetica-BoldOblique': ({'FontName': 'Helvetica-BoldOblique', 'Descent': -207.0, 'FontBBox': (-175.0, -228.0, 1114.0, 962.0), 'FontWeight': 'Bold', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 532.0, 'ItalicAngle': -12.0, 'Ascent': 718.0}, {' ': 278, '!': 333, '"': 474, '#': 556, '$': 556, '%': 889, '&': 722, "'": 238, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 333, ';': 333, '<': 584, '=': 584, '>': 584, '?': 611, '@': 975, 'A': 722, 'B': 722, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 556, 'K': 722, 'L': 611, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 584, '_': 556, '`': 333, 'a': 556, 'b': 611, 'c': 556, 'd': 611, 'e': 556, 'f': 333, 'g': 611, 'h': 611, 'i': 278, 'j': 278, 'k': 556, 'l': 278, 'm': 889, 'n': 611, 'o': 611, 'p': 611, 'q': 611, 'r': 389, 's': 556, 't': 333, 'u': 611, 'v': 556, 'w': 778, 'x': 556, 'y': 556, 'z': 500, '{': 389, '|': 280, '}': 389, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 280, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 611, '\xb6': 556, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 556, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 611, '\xf1': 611, '\xf2': 611, '\xf3': 611, '\xf4': 611, '\xf5': 611, '\xf6': 611, '\xf7': 584, '\xf8': 611, '\xf9': 611, '\xfa': 611, '\xfb': 611, '\xfc': 611, '\xfd': 556, '\xfe': 611, '\xff': 556, '\u0100': 722, '\u0101': 556, '\u0102': 722, '\u0103': 556, '\u0104': 722, '\u0105': 556, '\u0106': 722, '\u0107': 556, '\u010c': 722, '\u010d': 556, '\u010e': 722, '\u010f': 743, '\u0110': 722, '\u0111': 611, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 611, '\u0122': 778, '\u0123': 611, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 278, '\u0130': 278, '\u0131': 278, '\u0136': 722, '\u0137': 556, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 400, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 611, '\u0145': 722, '\u0146': 611, '\u0147': 722, '\u0148': 611, '\u014c': 778, '\u014d': 611, '\u0150': 778, '\u0151': 611, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 389, '\u0156': 722, '\u0157': 389, '\u0158': 722, '\u0159': 389, '\u015a': 667, '\u015b': 556, '\u015e': 667, '\u015f': 556, '\u0160': 667, '\u0161': 556, '\u0162': 611, '\u0163': 333, '\u0164': 611, '\u0165': 389, '\u016a': 722, '\u016b': 611, '\u016e': 722, '\u016f': 611, '\u0170': 722, '\u0171': 611, '\u0172': 722, '\u0173': 611, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 556, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 278, '\u2019': 278, '\u201a': 278, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 611, '\ufb02': 611}), - 'Helvetica-Oblique': ({'FontName': 'Helvetica-Oblique', 'Descent': -207.0, 'FontBBox': (-171.0, -225.0, 1116.0, 931.0), 'FontWeight': 'Medium', 'CapHeight': 718.0, 'FontFamily': 'Helvetica', 'Flags': 0, 'XHeight': 523.0, 'ItalicAngle': -12.0, 'Ascent': 718.0}, {' ': 278, '!': 278, '"': 355, '#': 556, '$': 556, '%': 889, '&': 667, "'": 191, '(': 333, ')': 333, '*': 389, '+': 584, ',': 278, '-': 333, '.': 278, '/': 278, '0': 556, '1': 556, '2': 556, '3': 556, '4': 556, '5': 556, '6': 556, '7': 556, '8': 556, '9': 556, ':': 278, ';': 278, '<': 584, '=': 584, '>': 584, '?': 556, '@': 1015, 'A': 667, 'B': 667, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 722, 'I': 278, 'J': 500, 'K': 667, 'L': 556, 'M': 833, 'N': 722, 'O': 778, 'P': 667, 'Q': 778, 'R': 722, 'S': 667, 'T': 611, 'U': 722, 'V': 667, 'W': 944, 'X': 667, 'Y': 667, 'Z': 611, '[': 278, '\\': 278, ']': 278, '^': 469, '_': 556, '`': 333, 'a': 556, 'b': 556, 'c': 500, 'd': 556, 'e': 556, 'f': 278, 'g': 556, 'h': 556, 'i': 222, 'j': 222, 'k': 500, 'l': 222, 'm': 833, 'n': 556, 'o': 556, 'p': 556, 'q': 556, 'r': 333, 's': 500, 't': 278, 'u': 556, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 500, '{': 334, '|': 260, '}': 334, '~': 584, '\xa1': 333, '\xa2': 556, '\xa3': 556, '\xa4': 556, '\xa5': 556, '\xa6': 260, '\xa7': 556, '\xa8': 333, '\xa9': 737, '\xaa': 370, '\xab': 556, '\xac': 584, '\xae': 737, '\xaf': 333, '\xb0': 400, '\xb1': 584, '\xb2': 333, '\xb3': 333, '\xb4': 333, '\xb5': 556, '\xb6': 537, '\xb7': 278, '\xb8': 333, '\xb9': 333, '\xba': 365, '\xbb': 556, '\xbc': 834, '\xbd': 834, '\xbe': 834, '\xbf': 611, '\xc0': 667, '\xc1': 667, '\xc2': 667, '\xc3': 667, '\xc4': 667, '\xc5': 667, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 278, '\xcd': 278, '\xce': 278, '\xcf': 278, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 584, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 667, '\xde': 667, '\xdf': 611, '\xe0': 556, '\xe1': 556, '\xe2': 556, '\xe3': 556, '\xe4': 556, '\xe5': 556, '\xe6': 889, '\xe7': 500, '\xe8': 556, '\xe9': 556, '\xea': 556, '\xeb': 556, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 556, '\xf1': 556, '\xf2': 556, '\xf3': 556, '\xf4': 556, '\xf5': 556, '\xf6': 556, '\xf7': 584, '\xf8': 611, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 500, '\xfe': 556, '\xff': 500, '\u0100': 667, '\u0101': 556, '\u0102': 667, '\u0103': 556, '\u0104': 667, '\u0105': 556, '\u0106': 722, '\u0107': 500, '\u010c': 722, '\u010d': 500, '\u010e': 722, '\u010f': 643, '\u0110': 722, '\u0111': 556, '\u0112': 667, '\u0113': 556, '\u0116': 667, '\u0117': 556, '\u0118': 667, '\u0119': 556, '\u011a': 667, '\u011b': 556, '\u011e': 778, '\u011f': 556, '\u0122': 778, '\u0123': 556, '\u012a': 278, '\u012b': 278, '\u012e': 278, '\u012f': 222, '\u0130': 278, '\u0131': 278, '\u0136': 667, '\u0137': 500, '\u0139': 556, '\u013a': 222, '\u013b': 556, '\u013c': 222, '\u013d': 556, '\u013e': 299, '\u0141': 556, '\u0142': 222, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 778, '\u014d': 556, '\u0150': 778, '\u0151': 556, '\u0152': 1000, '\u0153': 944, '\u0154': 722, '\u0155': 333, '\u0156': 722, '\u0157': 333, '\u0158': 722, '\u0159': 333, '\u015a': 667, '\u015b': 500, '\u015e': 667, '\u015f': 500, '\u0160': 667, '\u0161': 500, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 317, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 667, '\u0179': 611, '\u017a': 500, '\u017b': 611, '\u017c': 500, '\u017d': 611, '\u017e': 500, '\u0192': 556, '\u0218': 667, '\u0219': 500, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 556, '\u2014': 1000, '\u2018': 222, '\u2019': 222, '\u201a': 222, '\u201c': 333, '\u201d': 333, '\u201e': 333, '\u2020': 556, '\u2021': 556, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 584, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 500, '\ufb02': 500}), - 'Symbol': ({'FontName': 'Symbol', 'FontBBox': (-180.0, -293.0, 1090.0, 1010.0), 'FontWeight': 'Medium', 'FontFamily': 'Symbol', 'Flags': 0, 'ItalicAngle': 0.0}, {' ': 250, '!': 333, '#': 500, '%': 833, '&': 778, '(': 333, ')': 333, '+': 549, ',': 250, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 278, ';': 278, '<': 549, '=': 549, '>': 549, '?': 444, '[': 333, ']': 333, '_': 500, '{': 480, '|': 200, '}': 480, '\xac': 713, '\xb0': 400, '\xb1': 549, '\xb5': 576, '\xd7': 549, '\xf7': 549, '\u0192': 500, '\u0391': 722, '\u0392': 667, '\u0393': 603, '\u0395': 611, '\u0396': 611, '\u0397': 722, '\u0398': 741, '\u0399': 333, '\u039a': 722, '\u039b': 686, '\u039c': 889, '\u039d': 722, '\u039e': 645, '\u039f': 722, '\u03a0': 768, '\u03a1': 556, '\u03a3': 592, '\u03a4': 611, '\u03a5': 690, '\u03a6': 763, '\u03a7': 722, '\u03a8': 795, '\u03b1': 631, '\u03b2': 549, '\u03b3': 411, '\u03b4': 494, '\u03b5': 439, '\u03b6': 494, '\u03b7': 603, '\u03b8': 521, '\u03b9': 329, '\u03ba': 549, '\u03bb': 549, '\u03bd': 521, '\u03be': 493, '\u03bf': 549, '\u03c0': 549, '\u03c1': 549, '\u03c2': 439, '\u03c3': 603, '\u03c4': 439, '\u03c5': 576, '\u03c6': 521, '\u03c7': 549, '\u03c8': 686, '\u03c9': 686, '\u03d1': 631, '\u03d2': 620, '\u03d5': 603, '\u03d6': 713, '\u2022': 460, '\u2026': 1000, '\u2032': 247, '\u2033': 411, '\u2044': 167, '\u20ac': 750, '\u2111': 686, '\u2118': 987, '\u211c': 795, '\u2126': 768, '\u2135': 823, '\u2190': 987, '\u2191': 603, '\u2192': 987, '\u2193': 603, '\u2194': 1042, '\u21b5': 658, '\u21d0': 987, '\u21d1': 603, '\u21d2': 987, '\u21d3': 603, '\u21d4': 1042, '\u2200': 713, '\u2202': 494, '\u2203': 549, '\u2205': 823, '\u2206': 612, '\u2207': 713, '\u2208': 713, '\u2209': 713, '\u220b': 439, '\u220f': 823, '\u2211': 713, '\u2212': 549, '\u2217': 500, '\u221a': 549, '\u221d': 713, '\u221e': 713, '\u2220': 768, '\u2227': 603, '\u2228': 603, '\u2229': 768, '\u222a': 768, '\u222b': 274, '\u2234': 863, '\u223c': 549, '\u2245': 549, '\u2248': 549, '\u2260': 549, '\u2261': 549, '\u2264': 549, '\u2265': 549, '\u2282': 713, '\u2283': 713, '\u2284': 713, '\u2286': 713, '\u2287': 713, '\u2295': 768, '\u2297': 768, '\u22a5': 658, '\u22c5': 250, '\u2320': 686, '\u2321': 686, '\u2329': 329, '\u232a': 329, '\u25ca': 494, '\u2660': 753, '\u2663': 753, '\u2665': 753, '\u2666': 753, '\uf6d9': 790, '\uf6da': 790, '\uf6db': 890, '\uf8e5': 500, '\uf8e6': 603, '\uf8e7': 1000, '\uf8e8': 790, '\uf8e9': 790, '\uf8ea': 786, '\uf8eb': 384, '\uf8ec': 384, '\uf8ed': 384, '\uf8ee': 384, '\uf8ef': 384, '\uf8f0': 384, '\uf8f1': 494, '\uf8f2': 494, '\uf8f3': 494, '\uf8f4': 494, '\uf8f5': 686, '\uf8f6': 384, '\uf8f7': 384, '\uf8f8': 384, '\uf8f9': 384, '\uf8fa': 384, '\uf8fb': 384, '\uf8fc': 494, '\uf8fd': 494, '\uf8fe': 494, '\uf8ff': 790}), - 'Times-Bold': ({'FontName': 'Times-Bold', 'Descent': -217.0, 'FontBBox': (-168.0, -218.0, 1000.0, 935.0), 'FontWeight': 'Bold', 'CapHeight': 676.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 461.0, 'ItalicAngle': 0.0, 'Ascent': 683.0}, {' ': 250, '!': 333, '"': 555, '#': 500, '$': 500, '%': 1000, '&': 833, "'": 278, '(': 333, ')': 333, '*': 500, '+': 570, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 333, ';': 333, '<': 570, '=': 570, '>': 570, '?': 500, '@': 930, 'A': 722, 'B': 667, 'C': 722, 'D': 722, 'E': 667, 'F': 611, 'G': 778, 'H': 778, 'I': 389, 'J': 500, 'K': 778, 'L': 667, 'M': 944, 'N': 722, 'O': 778, 'P': 611, 'Q': 778, 'R': 722, 'S': 556, 'T': 667, 'U': 722, 'V': 722, 'W': 1000, 'X': 722, 'Y': 722, 'Z': 667, '[': 333, '\\': 278, ']': 333, '^': 581, '_': 500, '`': 333, 'a': 500, 'b': 556, 'c': 444, 'd': 556, 'e': 444, 'f': 333, 'g': 500, 'h': 556, 'i': 278, 'j': 333, 'k': 556, 'l': 278, 'm': 833, 'n': 556, 'o': 500, 'p': 556, 'q': 556, 'r': 444, 's': 389, 't': 333, 'u': 556, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 444, '{': 394, '|': 220, '}': 394, '~': 520, '\xa1': 333, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 220, '\xa7': 500, '\xa8': 333, '\xa9': 747, '\xaa': 300, '\xab': 500, '\xac': 570, '\xae': 747, '\xaf': 333, '\xb0': 400, '\xb1': 570, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 556, '\xb6': 540, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 330, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 500, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 1000, '\xc7': 722, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 389, '\xcd': 389, '\xce': 389, '\xcf': 389, '\xd0': 722, '\xd1': 722, '\xd2': 778, '\xd3': 778, '\xd4': 778, '\xd5': 778, '\xd6': 778, '\xd7': 570, '\xd8': 778, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 722, '\xde': 611, '\xdf': 556, '\xe0': 500, '\xe1': 500, '\xe2': 500, '\xe3': 500, '\xe4': 500, '\xe5': 500, '\xe6': 722, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 556, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 570, '\xf8': 500, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 500, '\xfe': 556, '\xff': 500, '\u0100': 722, '\u0101': 500, '\u0102': 722, '\u0103': 500, '\u0104': 722, '\u0105': 500, '\u0106': 722, '\u0107': 444, '\u010c': 722, '\u010d': 444, '\u010e': 722, '\u010f': 672, '\u0110': 722, '\u0111': 556, '\u0112': 667, '\u0113': 444, '\u0116': 667, '\u0117': 444, '\u0118': 667, '\u0119': 444, '\u011a': 667, '\u011b': 444, '\u011e': 778, '\u011f': 500, '\u0122': 778, '\u0123': 500, '\u012a': 389, '\u012b': 278, '\u012e': 389, '\u012f': 278, '\u0130': 389, '\u0131': 278, '\u0136': 778, '\u0137': 556, '\u0139': 667, '\u013a': 278, '\u013b': 667, '\u013c': 278, '\u013d': 667, '\u013e': 394, '\u0141': 667, '\u0142': 278, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 778, '\u014d': 500, '\u0150': 778, '\u0151': 500, '\u0152': 1000, '\u0153': 722, '\u0154': 722, '\u0155': 444, '\u0156': 722, '\u0157': 444, '\u0158': 722, '\u0159': 444, '\u015a': 556, '\u015b': 389, '\u015e': 556, '\u015f': 389, '\u0160': 556, '\u0161': 389, '\u0162': 667, '\u0163': 333, '\u0164': 667, '\u0165': 416, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 722, '\u0179': 667, '\u017a': 444, '\u017b': 667, '\u017c': 444, '\u017d': 667, '\u017e': 444, '\u0192': 500, '\u0218': 556, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 1000, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 570, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 556, '\ufb02': 556}), - 'Times-BoldItalic': ({'FontName': 'Times-BoldItalic', 'Descent': -217.0, 'FontBBox': (-200.0, -218.0, 996.0, 921.0), 'FontWeight': 'Bold', 'CapHeight': 669.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 462.0, 'ItalicAngle': -15.0, 'Ascent': 683.0}, {' ': 250, '!': 389, '"': 555, '#': 500, '$': 500, '%': 833, '&': 778, "'": 278, '(': 333, ')': 333, '*': 500, '+': 570, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 333, ';': 333, '<': 570, '=': 570, '>': 570, '?': 500, '@': 832, 'A': 667, 'B': 667, 'C': 667, 'D': 722, 'E': 667, 'F': 667, 'G': 722, 'H': 778, 'I': 389, 'J': 500, 'K': 667, 'L': 611, 'M': 889, 'N': 722, 'O': 722, 'P': 611, 'Q': 722, 'R': 667, 'S': 556, 'T': 611, 'U': 722, 'V': 667, 'W': 889, 'X': 667, 'Y': 611, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 570, '_': 500, '`': 333, 'a': 500, 'b': 500, 'c': 444, 'd': 500, 'e': 444, 'f': 333, 'g': 500, 'h': 556, 'i': 278, 'j': 278, 'k': 500, 'l': 278, 'm': 778, 'n': 556, 'o': 500, 'p': 500, 'q': 500, 'r': 389, 's': 389, 't': 278, 'u': 556, 'v': 444, 'w': 667, 'x': 500, 'y': 444, 'z': 389, '{': 348, '|': 220, '}': 348, '~': 570, '\xa1': 389, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 220, '\xa7': 500, '\xa8': 333, '\xa9': 747, '\xaa': 266, '\xab': 500, '\xac': 606, '\xae': 747, '\xaf': 333, '\xb0': 400, '\xb1': 570, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 576, '\xb6': 500, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 300, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 500, '\xc0': 667, '\xc1': 667, '\xc2': 667, '\xc3': 667, '\xc4': 667, '\xc5': 667, '\xc6': 944, '\xc7': 667, '\xc8': 667, '\xc9': 667, '\xca': 667, '\xcb': 667, '\xcc': 389, '\xcd': 389, '\xce': 389, '\xcf': 389, '\xd0': 722, '\xd1': 722, '\xd2': 722, '\xd3': 722, '\xd4': 722, '\xd5': 722, '\xd6': 722, '\xd7': 570, '\xd8': 722, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 611, '\xde': 611, '\xdf': 500, '\xe0': 500, '\xe1': 500, '\xe2': 500, '\xe3': 500, '\xe4': 500, '\xe5': 500, '\xe6': 722, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 556, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 570, '\xf8': 500, '\xf9': 556, '\xfa': 556, '\xfb': 556, '\xfc': 556, '\xfd': 444, '\xfe': 500, '\xff': 444, '\u0100': 667, '\u0101': 500, '\u0102': 667, '\u0103': 500, '\u0104': 667, '\u0105': 500, '\u0106': 667, '\u0107': 444, '\u010c': 667, '\u010d': 444, '\u010e': 722, '\u010f': 608, '\u0110': 722, '\u0111': 500, '\u0112': 667, '\u0113': 444, '\u0116': 667, '\u0117': 444, '\u0118': 667, '\u0119': 444, '\u011a': 667, '\u011b': 444, '\u011e': 722, '\u011f': 500, '\u0122': 722, '\u0123': 500, '\u012a': 389, '\u012b': 278, '\u012e': 389, '\u012f': 278, '\u0130': 389, '\u0131': 278, '\u0136': 667, '\u0137': 500, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 382, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 556, '\u0145': 722, '\u0146': 556, '\u0147': 722, '\u0148': 556, '\u014c': 722, '\u014d': 500, '\u0150': 722, '\u0151': 500, '\u0152': 944, '\u0153': 722, '\u0154': 667, '\u0155': 389, '\u0156': 667, '\u0157': 389, '\u0158': 667, '\u0159': 389, '\u015a': 556, '\u015b': 389, '\u015e': 556, '\u015f': 389, '\u0160': 556, '\u0161': 389, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 366, '\u016a': 722, '\u016b': 556, '\u016e': 722, '\u016f': 556, '\u0170': 722, '\u0171': 556, '\u0172': 722, '\u0173': 556, '\u0178': 611, '\u0179': 611, '\u017a': 389, '\u017b': 611, '\u017c': 389, '\u017d': 611, '\u017e': 389, '\u0192': 500, '\u0218': 556, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 1000, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 500, '\u201d': 500, '\u201e': 500, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 1000, '\u2202': 494, '\u2206': 612, '\u2211': 600, '\u2212': 606, '\u221a': 549, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 494, '\uf6c3': 250, '\ufb01': 556, '\ufb02': 556}), - 'Times-Italic': ({'FontName': 'Times-Italic', 'Descent': -217.0, 'FontBBox': (-169.0, -217.0, 1010.0, 883.0), 'FontWeight': 'Medium', 'CapHeight': 653.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 441.0, 'ItalicAngle': -15.5, 'Ascent': 683.0}, {' ': 250, '!': 333, '"': 420, '#': 500, '$': 500, '%': 833, '&': 778, "'": 214, '(': 333, ')': 333, '*': 500, '+': 675, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 333, ';': 333, '<': 675, '=': 675, '>': 675, '?': 500, '@': 920, 'A': 611, 'B': 611, 'C': 667, 'D': 722, 'E': 611, 'F': 611, 'G': 722, 'H': 722, 'I': 333, 'J': 444, 'K': 667, 'L': 556, 'M': 833, 'N': 667, 'O': 722, 'P': 611, 'Q': 722, 'R': 611, 'S': 500, 'T': 556, 'U': 722, 'V': 611, 'W': 833, 'X': 611, 'Y': 556, 'Z': 556, '[': 389, '\\': 278, ']': 389, '^': 422, '_': 500, '`': 333, 'a': 500, 'b': 500, 'c': 444, 'd': 500, 'e': 444, 'f': 278, 'g': 500, 'h': 500, 'i': 278, 'j': 278, 'k': 444, 'l': 278, 'm': 722, 'n': 500, 'o': 500, 'p': 500, 'q': 500, 'r': 389, 's': 389, 't': 278, 'u': 500, 'v': 444, 'w': 667, 'x': 444, 'y': 444, 'z': 389, '{': 400, '|': 275, '}': 400, '~': 541, '\xa1': 389, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 275, '\xa7': 500, '\xa8': 333, '\xa9': 760, '\xaa': 276, '\xab': 500, '\xac': 675, '\xae': 760, '\xaf': 333, '\xb0': 400, '\xb1': 675, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 500, '\xb6': 523, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 310, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 500, '\xc0': 611, '\xc1': 611, '\xc2': 611, '\xc3': 611, '\xc4': 611, '\xc5': 611, '\xc6': 889, '\xc7': 667, '\xc8': 611, '\xc9': 611, '\xca': 611, '\xcb': 611, '\xcc': 333, '\xcd': 333, '\xce': 333, '\xcf': 333, '\xd0': 722, '\xd1': 667, '\xd2': 722, '\xd3': 722, '\xd4': 722, '\xd5': 722, '\xd6': 722, '\xd7': 675, '\xd8': 722, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 556, '\xde': 611, '\xdf': 500, '\xe0': 500, '\xe1': 500, '\xe2': 500, '\xe3': 500, '\xe4': 500, '\xe5': 500, '\xe6': 667, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 500, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 675, '\xf8': 500, '\xf9': 500, '\xfa': 500, '\xfb': 500, '\xfc': 500, '\xfd': 444, '\xfe': 500, '\xff': 444, '\u0100': 611, '\u0101': 500, '\u0102': 611, '\u0103': 500, '\u0104': 611, '\u0105': 500, '\u0106': 667, '\u0107': 444, '\u010c': 667, '\u010d': 444, '\u010e': 722, '\u010f': 544, '\u0110': 722, '\u0111': 500, '\u0112': 611, '\u0113': 444, '\u0116': 611, '\u0117': 444, '\u0118': 611, '\u0119': 444, '\u011a': 611, '\u011b': 444, '\u011e': 722, '\u011f': 500, '\u0122': 722, '\u0123': 500, '\u012a': 333, '\u012b': 278, '\u012e': 333, '\u012f': 278, '\u0130': 333, '\u0131': 278, '\u0136': 667, '\u0137': 444, '\u0139': 556, '\u013a': 278, '\u013b': 556, '\u013c': 278, '\u013d': 611, '\u013e': 300, '\u0141': 556, '\u0142': 278, '\u0143': 667, '\u0144': 500, '\u0145': 667, '\u0146': 500, '\u0147': 667, '\u0148': 500, '\u014c': 722, '\u014d': 500, '\u0150': 722, '\u0151': 500, '\u0152': 944, '\u0153': 667, '\u0154': 611, '\u0155': 389, '\u0156': 611, '\u0157': 389, '\u0158': 611, '\u0159': 389, '\u015a': 500, '\u015b': 389, '\u015e': 500, '\u015f': 389, '\u0160': 500, '\u0161': 389, '\u0162': 556, '\u0163': 278, '\u0164': 556, '\u0165': 300, '\u016a': 722, '\u016b': 500, '\u016e': 722, '\u016f': 500, '\u0170': 722, '\u0171': 500, '\u0172': 722, '\u0173': 500, '\u0178': 556, '\u0179': 556, '\u017a': 389, '\u017b': 556, '\u017c': 389, '\u017d': 556, '\u017e': 389, '\u0192': 500, '\u0218': 500, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 889, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 556, '\u201d': 556, '\u201e': 556, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 889, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 980, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 675, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 500, '\ufb02': 500}), - 'Times-Roman': ({'FontName': 'Times-Roman', 'Descent': -217.0, 'FontBBox': (-168.0, -218.0, 1000.0, 898.0), 'FontWeight': 'Roman', 'CapHeight': 662.0, 'FontFamily': 'Times', 'Flags': 0, 'XHeight': 450.0, 'ItalicAngle': 0.0, 'Ascent': 683.0}, {' ': 250, '!': 333, '"': 408, '#': 500, '$': 500, '%': 833, '&': 778, "'": 180, '(': 333, ')': 333, '*': 500, '+': 564, ',': 250, '-': 333, '.': 250, '/': 278, '0': 500, '1': 500, '2': 500, '3': 500, '4': 500, '5': 500, '6': 500, '7': 500, '8': 500, '9': 500, ':': 278, ';': 278, '<': 564, '=': 564, '>': 564, '?': 444, '@': 921, 'A': 722, 'B': 667, 'C': 667, 'D': 722, 'E': 611, 'F': 556, 'G': 722, 'H': 722, 'I': 333, 'J': 389, 'K': 722, 'L': 611, 'M': 889, 'N': 722, 'O': 722, 'P': 556, 'Q': 722, 'R': 667, 'S': 556, 'T': 611, 'U': 722, 'V': 722, 'W': 944, 'X': 722, 'Y': 722, 'Z': 611, '[': 333, '\\': 278, ']': 333, '^': 469, '_': 500, '`': 333, 'a': 444, 'b': 500, 'c': 444, 'd': 500, 'e': 444, 'f': 333, 'g': 500, 'h': 500, 'i': 278, 'j': 278, 'k': 500, 'l': 278, 'm': 778, 'n': 500, 'o': 500, 'p': 500, 'q': 500, 'r': 333, 's': 389, 't': 278, 'u': 500, 'v': 500, 'w': 722, 'x': 500, 'y': 500, 'z': 444, '{': 480, '|': 200, '}': 480, '~': 541, '\xa1': 333, '\xa2': 500, '\xa3': 500, '\xa4': 500, '\xa5': 500, '\xa6': 200, '\xa7': 500, '\xa8': 333, '\xa9': 760, '\xaa': 276, '\xab': 500, '\xac': 564, '\xae': 760, '\xaf': 333, '\xb0': 400, '\xb1': 564, '\xb2': 300, '\xb3': 300, '\xb4': 333, '\xb5': 500, '\xb6': 453, '\xb7': 250, '\xb8': 333, '\xb9': 300, '\xba': 310, '\xbb': 500, '\xbc': 750, '\xbd': 750, '\xbe': 750, '\xbf': 444, '\xc0': 722, '\xc1': 722, '\xc2': 722, '\xc3': 722, '\xc4': 722, '\xc5': 722, '\xc6': 889, '\xc7': 667, '\xc8': 611, '\xc9': 611, '\xca': 611, '\xcb': 611, '\xcc': 333, '\xcd': 333, '\xce': 333, '\xcf': 333, '\xd0': 722, '\xd1': 722, '\xd2': 722, '\xd3': 722, '\xd4': 722, '\xd5': 722, '\xd6': 722, '\xd7': 564, '\xd8': 722, '\xd9': 722, '\xda': 722, '\xdb': 722, '\xdc': 722, '\xdd': 722, '\xde': 556, '\xdf': 500, '\xe0': 444, '\xe1': 444, '\xe2': 444, '\xe3': 444, '\xe4': 444, '\xe5': 444, '\xe6': 667, '\xe7': 444, '\xe8': 444, '\xe9': 444, '\xea': 444, '\xeb': 444, '\xec': 278, '\xed': 278, '\xee': 278, '\xef': 278, '\xf0': 500, '\xf1': 500, '\xf2': 500, '\xf3': 500, '\xf4': 500, '\xf5': 500, '\xf6': 500, '\xf7': 564, '\xf8': 500, '\xf9': 500, '\xfa': 500, '\xfb': 500, '\xfc': 500, '\xfd': 500, '\xfe': 500, '\xff': 500, '\u0100': 722, '\u0101': 444, '\u0102': 722, '\u0103': 444, '\u0104': 722, '\u0105': 444, '\u0106': 667, '\u0107': 444, '\u010c': 667, '\u010d': 444, '\u010e': 722, '\u010f': 588, '\u0110': 722, '\u0111': 500, '\u0112': 611, '\u0113': 444, '\u0116': 611, '\u0117': 444, '\u0118': 611, '\u0119': 444, '\u011a': 611, '\u011b': 444, '\u011e': 722, '\u011f': 500, '\u0122': 722, '\u0123': 500, '\u012a': 333, '\u012b': 278, '\u012e': 333, '\u012f': 278, '\u0130': 333, '\u0131': 278, '\u0136': 722, '\u0137': 500, '\u0139': 611, '\u013a': 278, '\u013b': 611, '\u013c': 278, '\u013d': 611, '\u013e': 344, '\u0141': 611, '\u0142': 278, '\u0143': 722, '\u0144': 500, '\u0145': 722, '\u0146': 500, '\u0147': 722, '\u0148': 500, '\u014c': 722, '\u014d': 500, '\u0150': 722, '\u0151': 500, '\u0152': 889, '\u0153': 722, '\u0154': 667, '\u0155': 333, '\u0156': 667, '\u0157': 333, '\u0158': 667, '\u0159': 333, '\u015a': 556, '\u015b': 389, '\u015e': 556, '\u015f': 389, '\u0160': 556, '\u0161': 389, '\u0162': 611, '\u0163': 278, '\u0164': 611, '\u0165': 326, '\u016a': 722, '\u016b': 500, '\u016e': 722, '\u016f': 500, '\u0170': 722, '\u0171': 500, '\u0172': 722, '\u0173': 500, '\u0178': 722, '\u0179': 611, '\u017a': 444, '\u017b': 611, '\u017c': 444, '\u017d': 611, '\u017e': 444, '\u0192': 500, '\u0218': 556, '\u0219': 389, '\u02c6': 333, '\u02c7': 333, '\u02d8': 333, '\u02d9': 333, '\u02da': 333, '\u02db': 333, '\u02dc': 333, '\u02dd': 333, '\u2013': 500, '\u2014': 1000, '\u2018': 333, '\u2019': 333, '\u201a': 333, '\u201c': 444, '\u201d': 444, '\u201e': 444, '\u2020': 500, '\u2021': 500, '\u2022': 350, '\u2026': 1000, '\u2030': 1000, '\u2039': 333, '\u203a': 333, '\u2044': 167, '\u2122': 980, '\u2202': 476, '\u2206': 612, '\u2211': 600, '\u2212': 564, '\u221a': 453, '\u2260': 549, '\u2264': 549, '\u2265': 549, '\u25ca': 471, '\uf6c3': 250, '\ufb01': 556, '\ufb02': 556}), - 'ZapfDingbats': ({'FontName': 'ZapfDingbats', 'FontBBox': (-1.0, -143.0, 981.0, 820.0), 'FontWeight': 'Medium', 'FontFamily': 'ITC', 'Flags': 0, 'ItalicAngle': 0.0}, {'\x01': 974, '\x02': 961, '\x03': 980, '\x04': 719, '\x05': 789, '\x06': 494, '\x07': 552, '\x08': 537, '\t': 577, '\n': 692, '\x0b': 960, '\x0c': 939, '\r': 549, '\x0e': 855, '\x0f': 911, '\x10': 933, '\x11': 945, '\x12': 974, '\x13': 755, '\x14': 846, '\x15': 762, '\x16': 761, '\x17': 571, '\x18': 677, '\x19': 763, '\x1a': 760, '\x1b': 759, '\x1c': 754, '\x1d': 786, '\x1e': 788, '\x1f': 788, ' ': 790, '!': 793, '"': 794, '#': 816, '$': 823, '%': 789, '&': 841, "'": 823, '(': 833, ')': 816, '*': 831, '+': 923, ',': 744, '-': 723, '.': 749, '/': 790, '0': 792, '1': 695, '2': 776, '3': 768, '4': 792, '5': 759, '6': 707, '7': 708, '8': 682, '9': 701, ':': 826, ';': 815, '<': 789, '=': 789, '>': 707, '?': 687, '@': 696, 'A': 689, 'B': 786, 'C': 787, 'D': 713, 'E': 791, 'F': 785, 'G': 791, 'H': 873, 'I': 761, 'J': 762, 'K': 759, 'L': 892, 'M': 892, 'N': 788, 'O': 784, 'Q': 438, 'R': 138, 'S': 277, 'T': 415, 'U': 509, 'V': 410, 'W': 234, 'X': 234, 'Y': 390, 'Z': 390, '[': 276, '\\': 276, ']': 317, '^': 317, '_': 334, '`': 334, 'a': 392, 'b': 392, 'c': 668, 'd': 668, 'e': 732, 'f': 544, 'g': 544, 'h': 910, 'i': 911, 'j': 667, 'k': 760, 'l': 760, 'm': 626, 'n': 694, 'o': 595, 'p': 776, 'u': 690, 'v': 791, 'w': 790, 'x': 788, 'y': 788, 'z': 788, '{': 788, '|': 788, '}': 788, '~': 788, '\x7f': 788, '\x80': 788, '\x81': 788, '\x82': 788, '\x83': 788, '\x84': 788, '\x85': 788, '\x86': 788, '\x87': 788, '\x88': 788, '\x89': 788, '\x8a': 788, '\x8b': 788, '\x8c': 788, '\x8d': 788, '\x8e': 788, '\x8f': 788, '\x90': 788, '\x91': 788, '\x92': 788, '\x93': 788, '\x94': 788, '\x95': 788, '\x96': 788, '\x97': 788, '\x98': 788, '\x99': 788, '\x9a': 788, '\x9b': 788, '\x9c': 788, '\x9d': 788, '\x9e': 788, '\x9f': 788, '\xa0': 894, '\xa1': 838, '\xa2': 924, '\xa3': 1016, '\xa4': 458, '\xa5': 924, '\xa6': 918, '\xa7': 927, '\xa8': 928, '\xa9': 928, '\xaa': 834, '\xab': 873, '\xac': 828, '\xad': 924, '\xae': 917, '\xaf': 930, '\xb0': 931, '\xb1': 463, '\xb2': 883, '\xb3': 836, '\xb4': 867, '\xb5': 696, '\xb6': 874, '\xb7': 760, '\xb8': 946, '\xb9': 865, '\xba': 967, '\xbb': 831, '\xbc': 873, '\xbd': 927, '\xbe': 970, '\xbf': 918, '\xc0': 748, '\xc1': 836, '\xc2': 771, '\xc3': 888, '\xc4': 748, '\xc5': 771, '\xc6': 888, '\xc7': 867, '\xc8': 696, '\xc9': 874, '\xca': 974, '\xcb': 762, '\xcc': 759, '\xcd': 509, '\xce': 410}), + "Courier": ( + { + "FontName": "Courier", + "Descent": -194.0, + "FontBBox": (-6.0, -249.0, 639.0, 803.0), + "FontWeight": "Medium", + "CapHeight": 572.0, + "FontFamily": "Courier", + "Flags": 64, + "XHeight": 434.0, + "ItalicAngle": 0.0, + "Ascent": 627.0, + }, + { + " ": 600, + "!": 600, + '"': 600, + "#": 600, + "$": 600, + "%": 600, + "&": 600, + "'": 600, + "(": 600, + ")": 600, + "*": 600, + "+": 600, + ",": 600, + "-": 600, + ".": 600, + "/": 600, + "0": 600, + "1": 600, + "2": 600, + "3": 600, + "4": 600, + "5": 600, + "6": 600, + "7": 600, + "8": 600, + "9": 600, + ":": 600, + ";": 600, + "<": 600, + "=": 600, + ">": 600, + "?": 600, + "@": 600, + "A": 600, + "B": 600, + "C": 600, + "D": 600, + "E": 600, + "F": 600, + "G": 600, + "H": 600, + "I": 600, + "J": 600, + "K": 600, + "L": 600, + "M": 600, + "N": 600, + "O": 600, + "P": 600, + "Q": 600, + "R": 600, + "S": 600, + "T": 600, + "U": 600, + "V": 600, + "W": 600, + "X": 600, + "Y": 600, + "Z": 600, + "[": 600, + "\\": 600, + "]": 600, + "^": 600, + "_": 600, + "`": 600, + "a": 600, + "b": 600, + "c": 600, + "d": 600, + "e": 600, + "f": 600, + "g": 600, + "h": 600, + "i": 600, + "j": 600, + "k": 600, + "l": 600, + "m": 600, + "n": 600, + "o": 600, + "p": 600, + "q": 600, + "r": 600, + "s": 600, + "t": 600, + "u": 600, + "v": 600, + "w": 600, + "x": 600, + "y": 600, + "z": 600, + "{": 600, + "|": 600, + "}": 600, + "~": 600, + "\xa1": 600, + "\xa2": 600, + "\xa3": 600, + "\xa4": 600, + "\xa5": 600, + "\xa6": 600, + "\xa7": 600, + "\xa8": 600, + "\xa9": 600, + "\xaa": 600, + "\xab": 600, + "\xac": 600, + "\xae": 600, + "\xaf": 600, + "\xb0": 600, + "\xb1": 600, + "\xb2": 600, + "\xb3": 600, + "\xb4": 600, + "\xb5": 600, + "\xb6": 600, + "\xb7": 600, + "\xb8": 600, + "\xb9": 600, + "\xba": 600, + "\xbb": 600, + "\xbc": 600, + "\xbd": 600, + "\xbe": 600, + "\xbf": 600, + "\xc0": 600, + "\xc1": 600, + "\xc2": 600, + "\xc3": 600, + "\xc4": 600, + "\xc5": 600, + "\xc6": 600, + "\xc7": 600, + "\xc8": 600, + "\xc9": 600, + "\xca": 600, + "\xcb": 600, + "\xcc": 600, + "\xcd": 600, + "\xce": 600, + "\xcf": 600, + "\xd0": 600, + "\xd1": 600, + "\xd2": 600, + "\xd3": 600, + "\xd4": 600, + "\xd5": 600, + "\xd6": 600, + "\xd7": 600, + "\xd8": 600, + "\xd9": 600, + "\xda": 600, + "\xdb": 600, + "\xdc": 600, + "\xdd": 600, + "\xde": 600, + "\xdf": 600, + "\xe0": 600, + "\xe1": 600, + "\xe2": 600, + "\xe3": 600, + "\xe4": 600, + "\xe5": 600, + "\xe6": 600, + "\xe7": 600, + "\xe8": 600, + "\xe9": 600, + "\xea": 600, + "\xeb": 600, + "\xec": 600, + "\xed": 600, + "\xee": 600, + "\xef": 600, + "\xf0": 600, + "\xf1": 600, + "\xf2": 600, + "\xf3": 600, + "\xf4": 600, + "\xf5": 600, + "\xf6": 600, + "\xf7": 600, + "\xf8": 600, + "\xf9": 600, + "\xfa": 600, + "\xfb": 600, + "\xfc": 600, + "\xfd": 600, + "\xfe": 600, + "\xff": 600, + "\u0100": 600, + "\u0101": 600, + "\u0102": 600, + "\u0103": 600, + "\u0104": 600, + "\u0105": 600, + "\u0106": 600, + "\u0107": 600, + "\u010c": 600, + "\u010d": 600, + "\u010e": 600, + "\u010f": 600, + "\u0110": 600, + "\u0111": 600, + "\u0112": 600, + "\u0113": 600, + "\u0116": 600, + "\u0117": 600, + "\u0118": 600, + "\u0119": 600, + "\u011a": 600, + "\u011b": 600, + "\u011e": 600, + "\u011f": 600, + "\u0122": 600, + "\u0123": 600, + "\u012a": 600, + "\u012b": 600, + "\u012e": 600, + "\u012f": 600, + "\u0130": 600, + "\u0131": 600, + "\u0136": 600, + "\u0137": 600, + "\u0139": 600, + "\u013a": 600, + "\u013b": 600, + "\u013c": 600, + "\u013d": 600, + "\u013e": 600, + "\u0141": 600, + "\u0142": 600, + "\u0143": 600, + "\u0144": 600, + "\u0145": 600, + "\u0146": 600, + "\u0147": 600, + "\u0148": 600, + "\u014c": 600, + "\u014d": 600, + "\u0150": 600, + "\u0151": 600, + "\u0152": 600, + "\u0153": 600, + "\u0154": 600, + "\u0155": 600, + "\u0156": 600, + "\u0157": 600, + "\u0158": 600, + "\u0159": 600, + "\u015a": 600, + "\u015b": 600, + "\u015e": 600, + "\u015f": 600, + "\u0160": 600, + "\u0161": 600, + "\u0162": 600, + "\u0163": 600, + "\u0164": 600, + "\u0165": 600, + "\u016a": 600, + "\u016b": 600, + "\u016e": 600, + "\u016f": 600, + "\u0170": 600, + "\u0171": 600, + "\u0172": 600, + "\u0173": 600, + "\u0178": 600, + "\u0179": 600, + "\u017a": 600, + "\u017b": 600, + "\u017c": 600, + "\u017d": 600, + "\u017e": 600, + "\u0192": 600, + "\u0218": 600, + "\u0219": 600, + "\u02c6": 600, + "\u02c7": 600, + "\u02d8": 600, + "\u02d9": 600, + "\u02da": 600, + "\u02db": 600, + "\u02dc": 600, + "\u02dd": 600, + "\u2013": 600, + "\u2014": 600, + "\u2018": 600, + "\u2019": 600, + "\u201a": 600, + "\u201c": 600, + "\u201d": 600, + "\u201e": 600, + "\u2020": 600, + "\u2021": 600, + "\u2022": 600, + "\u2026": 600, + "\u2030": 600, + "\u2039": 600, + "\u203a": 600, + "\u2044": 600, + "\u2122": 600, + "\u2202": 600, + "\u2206": 600, + "\u2211": 600, + "\u2212": 600, + "\u221a": 600, + "\u2260": 600, + "\u2264": 600, + "\u2265": 600, + "\u25ca": 600, + "\uf6c3": 600, + "\ufb01": 600, + "\ufb02": 600, + }, + ), + "Courier-Bold": ( + { + "FontName": "Courier-Bold", + "Descent": -194.0, + "FontBBox": (-88.0, -249.0, 697.0, 811.0), + "FontWeight": "Bold", + "CapHeight": 572.0, + "FontFamily": "Courier", + "Flags": 64, + "XHeight": 434.0, + "ItalicAngle": 0.0, + "Ascent": 627.0, + }, + { + " ": 600, + "!": 600, + '"': 600, + "#": 600, + "$": 600, + "%": 600, + "&": 600, + "'": 600, + "(": 600, + ")": 600, + "*": 600, + "+": 600, + ",": 600, + "-": 600, + ".": 600, + "/": 600, + "0": 600, + "1": 600, + "2": 600, + "3": 600, + "4": 600, + "5": 600, + "6": 600, + "7": 600, + "8": 600, + "9": 600, + ":": 600, + ";": 600, + "<": 600, + "=": 600, + ">": 600, + "?": 600, + "@": 600, + "A": 600, + "B": 600, + "C": 600, + "D": 600, + "E": 600, + "F": 600, + "G": 600, + "H": 600, + "I": 600, + "J": 600, + "K": 600, + "L": 600, + "M": 600, + "N": 600, + "O": 600, + "P": 600, + "Q": 600, + "R": 600, + "S": 600, + "T": 600, + "U": 600, + "V": 600, + "W": 600, + "X": 600, + "Y": 600, + "Z": 600, + "[": 600, + "\\": 600, + "]": 600, + "^": 600, + "_": 600, + "`": 600, + "a": 600, + "b": 600, + "c": 600, + "d": 600, + "e": 600, + "f": 600, + "g": 600, + "h": 600, + "i": 600, + "j": 600, + "k": 600, + "l": 600, + "m": 600, + "n": 600, + "o": 600, + "p": 600, + "q": 600, + "r": 600, + "s": 600, + "t": 600, + "u": 600, + "v": 600, + "w": 600, + "x": 600, + "y": 600, + "z": 600, + "{": 600, + "|": 600, + "}": 600, + "~": 600, + "\xa1": 600, + "\xa2": 600, + "\xa3": 600, + "\xa4": 600, + "\xa5": 600, + "\xa6": 600, + "\xa7": 600, + "\xa8": 600, + "\xa9": 600, + "\xaa": 600, + "\xab": 600, + "\xac": 600, + "\xae": 600, + "\xaf": 600, + "\xb0": 600, + "\xb1": 600, + "\xb2": 600, + "\xb3": 600, + "\xb4": 600, + "\xb5": 600, + "\xb6": 600, + "\xb7": 600, + "\xb8": 600, + "\xb9": 600, + "\xba": 600, + "\xbb": 600, + "\xbc": 600, + "\xbd": 600, + "\xbe": 600, + "\xbf": 600, + "\xc0": 600, + "\xc1": 600, + "\xc2": 600, + "\xc3": 600, + "\xc4": 600, + "\xc5": 600, + "\xc6": 600, + "\xc7": 600, + "\xc8": 600, + "\xc9": 600, + "\xca": 600, + "\xcb": 600, + "\xcc": 600, + "\xcd": 600, + "\xce": 600, + "\xcf": 600, + "\xd0": 600, + "\xd1": 600, + "\xd2": 600, + "\xd3": 600, + "\xd4": 600, + "\xd5": 600, + "\xd6": 600, + "\xd7": 600, + "\xd8": 600, + "\xd9": 600, + "\xda": 600, + "\xdb": 600, + "\xdc": 600, + "\xdd": 600, + "\xde": 600, + "\xdf": 600, + "\xe0": 600, + "\xe1": 600, + "\xe2": 600, + "\xe3": 600, + "\xe4": 600, + "\xe5": 600, + "\xe6": 600, + "\xe7": 600, + "\xe8": 600, + "\xe9": 600, + "\xea": 600, + "\xeb": 600, + "\xec": 600, + "\xed": 600, + "\xee": 600, + "\xef": 600, + "\xf0": 600, + "\xf1": 600, + "\xf2": 600, + "\xf3": 600, + "\xf4": 600, + "\xf5": 600, + "\xf6": 600, + "\xf7": 600, + "\xf8": 600, + "\xf9": 600, + "\xfa": 600, + "\xfb": 600, + "\xfc": 600, + "\xfd": 600, + "\xfe": 600, + "\xff": 600, + "\u0100": 600, + "\u0101": 600, + "\u0102": 600, + "\u0103": 600, + "\u0104": 600, + "\u0105": 600, + "\u0106": 600, + "\u0107": 600, + "\u010c": 600, + "\u010d": 600, + "\u010e": 600, + "\u010f": 600, + "\u0110": 600, + "\u0111": 600, + "\u0112": 600, + "\u0113": 600, + "\u0116": 600, + "\u0117": 600, + "\u0118": 600, + "\u0119": 600, + "\u011a": 600, + "\u011b": 600, + "\u011e": 600, + "\u011f": 600, + "\u0122": 600, + "\u0123": 600, + "\u012a": 600, + "\u012b": 600, + "\u012e": 600, + "\u012f": 600, + "\u0130": 600, + "\u0131": 600, + "\u0136": 600, + "\u0137": 600, + "\u0139": 600, + "\u013a": 600, + "\u013b": 600, + "\u013c": 600, + "\u013d": 600, + "\u013e": 600, + "\u0141": 600, + "\u0142": 600, + "\u0143": 600, + "\u0144": 600, + "\u0145": 600, + "\u0146": 600, + "\u0147": 600, + "\u0148": 600, + "\u014c": 600, + "\u014d": 600, + "\u0150": 600, + "\u0151": 600, + "\u0152": 600, + "\u0153": 600, + "\u0154": 600, + "\u0155": 600, + "\u0156": 600, + "\u0157": 600, + "\u0158": 600, + "\u0159": 600, + "\u015a": 600, + "\u015b": 600, + "\u015e": 600, + "\u015f": 600, + "\u0160": 600, + "\u0161": 600, + "\u0162": 600, + "\u0163": 600, + "\u0164": 600, + "\u0165": 600, + "\u016a": 600, + "\u016b": 600, + "\u016e": 600, + "\u016f": 600, + "\u0170": 600, + "\u0171": 600, + "\u0172": 600, + "\u0173": 600, + "\u0178": 600, + "\u0179": 600, + "\u017a": 600, + "\u017b": 600, + "\u017c": 600, + "\u017d": 600, + "\u017e": 600, + "\u0192": 600, + "\u0218": 600, + "\u0219": 600, + "\u02c6": 600, + "\u02c7": 600, + "\u02d8": 600, + "\u02d9": 600, + "\u02da": 600, + "\u02db": 600, + "\u02dc": 600, + "\u02dd": 600, + "\u2013": 600, + "\u2014": 600, + "\u2018": 600, + "\u2019": 600, + "\u201a": 600, + "\u201c": 600, + "\u201d": 600, + "\u201e": 600, + "\u2020": 600, + "\u2021": 600, + "\u2022": 600, + "\u2026": 600, + "\u2030": 600, + "\u2039": 600, + "\u203a": 600, + "\u2044": 600, + "\u2122": 600, + "\u2202": 600, + "\u2206": 600, + "\u2211": 600, + "\u2212": 600, + "\u221a": 600, + "\u2260": 600, + "\u2264": 600, + "\u2265": 600, + "\u25ca": 600, + "\uf6c3": 600, + "\ufb01": 600, + "\ufb02": 600, + }, + ), + "Courier-BoldOblique": ( + { + "FontName": "Courier-BoldOblique", + "Descent": -194.0, + "FontBBox": (-49.0, -249.0, 758.0, 811.0), + "FontWeight": "Bold", + "CapHeight": 572.0, + "FontFamily": "Courier", + "Flags": 64, + "XHeight": 434.0, + "ItalicAngle": -11.0, + "Ascent": 627.0, + }, + { + " ": 600, + "!": 600, + '"': 600, + "#": 600, + "$": 600, + "%": 600, + "&": 600, + "'": 600, + "(": 600, + ")": 600, + "*": 600, + "+": 600, + ",": 600, + "-": 600, + ".": 600, + "/": 600, + "0": 600, + "1": 600, + "2": 600, + "3": 600, + "4": 600, + "5": 600, + "6": 600, + "7": 600, + "8": 600, + "9": 600, + ":": 600, + ";": 600, + "<": 600, + "=": 600, + ">": 600, + "?": 600, + "@": 600, + "A": 600, + "B": 600, + "C": 600, + "D": 600, + "E": 600, + "F": 600, + "G": 600, + "H": 600, + "I": 600, + "J": 600, + "K": 600, + "L": 600, + "M": 600, + "N": 600, + "O": 600, + "P": 600, + "Q": 600, + "R": 600, + "S": 600, + "T": 600, + "U": 600, + "V": 600, + "W": 600, + "X": 600, + "Y": 600, + "Z": 600, + "[": 600, + "\\": 600, + "]": 600, + "^": 600, + "_": 600, + "`": 600, + "a": 600, + "b": 600, + "c": 600, + "d": 600, + "e": 600, + "f": 600, + "g": 600, + "h": 600, + "i": 600, + "j": 600, + "k": 600, + "l": 600, + "m": 600, + "n": 600, + "o": 600, + "p": 600, + "q": 600, + "r": 600, + "s": 600, + "t": 600, + "u": 600, + "v": 600, + "w": 600, + "x": 600, + "y": 600, + "z": 600, + "{": 600, + "|": 600, + "}": 600, + "~": 600, + "\xa1": 600, + "\xa2": 600, + "\xa3": 600, + "\xa4": 600, + "\xa5": 600, + "\xa6": 600, + "\xa7": 600, + "\xa8": 600, + "\xa9": 600, + "\xaa": 600, + "\xab": 600, + "\xac": 600, + "\xae": 600, + "\xaf": 600, + "\xb0": 600, + "\xb1": 600, + "\xb2": 600, + "\xb3": 600, + "\xb4": 600, + "\xb5": 600, + "\xb6": 600, + "\xb7": 600, + "\xb8": 600, + "\xb9": 600, + "\xba": 600, + "\xbb": 600, + "\xbc": 600, + "\xbd": 600, + "\xbe": 600, + "\xbf": 600, + "\xc0": 600, + "\xc1": 600, + "\xc2": 600, + "\xc3": 600, + "\xc4": 600, + "\xc5": 600, + "\xc6": 600, + "\xc7": 600, + "\xc8": 600, + "\xc9": 600, + "\xca": 600, + "\xcb": 600, + "\xcc": 600, + "\xcd": 600, + "\xce": 600, + "\xcf": 600, + "\xd0": 600, + "\xd1": 600, + "\xd2": 600, + "\xd3": 600, + "\xd4": 600, + "\xd5": 600, + "\xd6": 600, + "\xd7": 600, + "\xd8": 600, + "\xd9": 600, + "\xda": 600, + "\xdb": 600, + "\xdc": 600, + "\xdd": 600, + "\xde": 600, + "\xdf": 600, + "\xe0": 600, + "\xe1": 600, + "\xe2": 600, + "\xe3": 600, + "\xe4": 600, + "\xe5": 600, + "\xe6": 600, + "\xe7": 600, + "\xe8": 600, + "\xe9": 600, + "\xea": 600, + "\xeb": 600, + "\xec": 600, + "\xed": 600, + "\xee": 600, + "\xef": 600, + "\xf0": 600, + "\xf1": 600, + "\xf2": 600, + "\xf3": 600, + "\xf4": 600, + "\xf5": 600, + "\xf6": 600, + "\xf7": 600, + "\xf8": 600, + "\xf9": 600, + "\xfa": 600, + "\xfb": 600, + "\xfc": 600, + "\xfd": 600, + "\xfe": 600, + "\xff": 600, + "\u0100": 600, + "\u0101": 600, + "\u0102": 600, + "\u0103": 600, + "\u0104": 600, + "\u0105": 600, + "\u0106": 600, + "\u0107": 600, + "\u010c": 600, + "\u010d": 600, + "\u010e": 600, + "\u010f": 600, + "\u0110": 600, + "\u0111": 600, + "\u0112": 600, + "\u0113": 600, + "\u0116": 600, + "\u0117": 600, + "\u0118": 600, + "\u0119": 600, + "\u011a": 600, + "\u011b": 600, + "\u011e": 600, + "\u011f": 600, + "\u0122": 600, + "\u0123": 600, + "\u012a": 600, + "\u012b": 600, + "\u012e": 600, + "\u012f": 600, + "\u0130": 600, + "\u0131": 600, + "\u0136": 600, + "\u0137": 600, + "\u0139": 600, + "\u013a": 600, + "\u013b": 600, + "\u013c": 600, + "\u013d": 600, + "\u013e": 600, + "\u0141": 600, + "\u0142": 600, + "\u0143": 600, + "\u0144": 600, + "\u0145": 600, + "\u0146": 600, + "\u0147": 600, + "\u0148": 600, + "\u014c": 600, + "\u014d": 600, + "\u0150": 600, + "\u0151": 600, + "\u0152": 600, + "\u0153": 600, + "\u0154": 600, + "\u0155": 600, + "\u0156": 600, + "\u0157": 600, + "\u0158": 600, + "\u0159": 600, + "\u015a": 600, + "\u015b": 600, + "\u015e": 600, + "\u015f": 600, + "\u0160": 600, + "\u0161": 600, + "\u0162": 600, + "\u0163": 600, + "\u0164": 600, + "\u0165": 600, + "\u016a": 600, + "\u016b": 600, + "\u016e": 600, + "\u016f": 600, + "\u0170": 600, + "\u0171": 600, + "\u0172": 600, + "\u0173": 600, + "\u0178": 600, + "\u0179": 600, + "\u017a": 600, + "\u017b": 600, + "\u017c": 600, + "\u017d": 600, + "\u017e": 600, + "\u0192": 600, + "\u0218": 600, + "\u0219": 600, + "\u02c6": 600, + "\u02c7": 600, + "\u02d8": 600, + "\u02d9": 600, + "\u02da": 600, + "\u02db": 600, + "\u02dc": 600, + "\u02dd": 600, + "\u2013": 600, + "\u2014": 600, + "\u2018": 600, + "\u2019": 600, + "\u201a": 600, + "\u201c": 600, + "\u201d": 600, + "\u201e": 600, + "\u2020": 600, + "\u2021": 600, + "\u2022": 600, + "\u2026": 600, + "\u2030": 600, + "\u2039": 600, + "\u203a": 600, + "\u2044": 600, + "\u2122": 600, + "\u2202": 600, + "\u2206": 600, + "\u2211": 600, + "\u2212": 600, + "\u221a": 600, + "\u2260": 600, + "\u2264": 600, + "\u2265": 600, + "\u25ca": 600, + "\uf6c3": 600, + "\ufb01": 600, + "\ufb02": 600, + }, + ), + "Courier-Oblique": ( + { + "FontName": "Courier-Oblique", + "Descent": -194.0, + "FontBBox": (-49.0, -249.0, 749.0, 803.0), + "FontWeight": "Medium", + "CapHeight": 572.0, + "FontFamily": "Courier", + "Flags": 64, + "XHeight": 434.0, + "ItalicAngle": -11.0, + "Ascent": 627.0, + }, + { + " ": 600, + "!": 600, + '"': 600, + "#": 600, + "$": 600, + "%": 600, + "&": 600, + "'": 600, + "(": 600, + ")": 600, + "*": 600, + "+": 600, + ",": 600, + "-": 600, + ".": 600, + "/": 600, + "0": 600, + "1": 600, + "2": 600, + "3": 600, + "4": 600, + "5": 600, + "6": 600, + "7": 600, + "8": 600, + "9": 600, + ":": 600, + ";": 600, + "<": 600, + "=": 600, + ">": 600, + "?": 600, + "@": 600, + "A": 600, + "B": 600, + "C": 600, + "D": 600, + "E": 600, + "F": 600, + "G": 600, + "H": 600, + "I": 600, + "J": 600, + "K": 600, + "L": 600, + "M": 600, + "N": 600, + "O": 600, + "P": 600, + "Q": 600, + "R": 600, + "S": 600, + "T": 600, + "U": 600, + "V": 600, + "W": 600, + "X": 600, + "Y": 600, + "Z": 600, + "[": 600, + "\\": 600, + "]": 600, + "^": 600, + "_": 600, + "`": 600, + "a": 600, + "b": 600, + "c": 600, + "d": 600, + "e": 600, + "f": 600, + "g": 600, + "h": 600, + "i": 600, + "j": 600, + "k": 600, + "l": 600, + "m": 600, + "n": 600, + "o": 600, + "p": 600, + "q": 600, + "r": 600, + "s": 600, + "t": 600, + "u": 600, + "v": 600, + "w": 600, + "x": 600, + "y": 600, + "z": 600, + "{": 600, + "|": 600, + "}": 600, + "~": 600, + "\xa1": 600, + "\xa2": 600, + "\xa3": 600, + "\xa4": 600, + "\xa5": 600, + "\xa6": 600, + "\xa7": 600, + "\xa8": 600, + "\xa9": 600, + "\xaa": 600, + "\xab": 600, + "\xac": 600, + "\xae": 600, + "\xaf": 600, + "\xb0": 600, + "\xb1": 600, + "\xb2": 600, + "\xb3": 600, + "\xb4": 600, + "\xb5": 600, + "\xb6": 600, + "\xb7": 600, + "\xb8": 600, + "\xb9": 600, + "\xba": 600, + "\xbb": 600, + "\xbc": 600, + "\xbd": 600, + "\xbe": 600, + "\xbf": 600, + "\xc0": 600, + "\xc1": 600, + "\xc2": 600, + "\xc3": 600, + "\xc4": 600, + "\xc5": 600, + "\xc6": 600, + "\xc7": 600, + "\xc8": 600, + "\xc9": 600, + "\xca": 600, + "\xcb": 600, + "\xcc": 600, + "\xcd": 600, + "\xce": 600, + "\xcf": 600, + "\xd0": 600, + "\xd1": 600, + "\xd2": 600, + "\xd3": 600, + "\xd4": 600, + "\xd5": 600, + "\xd6": 600, + "\xd7": 600, + "\xd8": 600, + "\xd9": 600, + "\xda": 600, + "\xdb": 600, + "\xdc": 600, + "\xdd": 600, + "\xde": 600, + "\xdf": 600, + "\xe0": 600, + "\xe1": 600, + "\xe2": 600, + "\xe3": 600, + "\xe4": 600, + "\xe5": 600, + "\xe6": 600, + "\xe7": 600, + "\xe8": 600, + "\xe9": 600, + "\xea": 600, + "\xeb": 600, + "\xec": 600, + "\xed": 600, + "\xee": 600, + "\xef": 600, + "\xf0": 600, + "\xf1": 600, + "\xf2": 600, + "\xf3": 600, + "\xf4": 600, + "\xf5": 600, + "\xf6": 600, + "\xf7": 600, + "\xf8": 600, + "\xf9": 600, + "\xfa": 600, + "\xfb": 600, + "\xfc": 600, + "\xfd": 600, + "\xfe": 600, + "\xff": 600, + "\u0100": 600, + "\u0101": 600, + "\u0102": 600, + "\u0103": 600, + "\u0104": 600, + "\u0105": 600, + "\u0106": 600, + "\u0107": 600, + "\u010c": 600, + "\u010d": 600, + "\u010e": 600, + "\u010f": 600, + "\u0110": 600, + "\u0111": 600, + "\u0112": 600, + "\u0113": 600, + "\u0116": 600, + "\u0117": 600, + "\u0118": 600, + "\u0119": 600, + "\u011a": 600, + "\u011b": 600, + "\u011e": 600, + "\u011f": 600, + "\u0122": 600, + "\u0123": 600, + "\u012a": 600, + "\u012b": 600, + "\u012e": 600, + "\u012f": 600, + "\u0130": 600, + "\u0131": 600, + "\u0136": 600, + "\u0137": 600, + "\u0139": 600, + "\u013a": 600, + "\u013b": 600, + "\u013c": 600, + "\u013d": 600, + "\u013e": 600, + "\u0141": 600, + "\u0142": 600, + "\u0143": 600, + "\u0144": 600, + "\u0145": 600, + "\u0146": 600, + "\u0147": 600, + "\u0148": 600, + "\u014c": 600, + "\u014d": 600, + "\u0150": 600, + "\u0151": 600, + "\u0152": 600, + "\u0153": 600, + "\u0154": 600, + "\u0155": 600, + "\u0156": 600, + "\u0157": 600, + "\u0158": 600, + "\u0159": 600, + "\u015a": 600, + "\u015b": 600, + "\u015e": 600, + "\u015f": 600, + "\u0160": 600, + "\u0161": 600, + "\u0162": 600, + "\u0163": 600, + "\u0164": 600, + "\u0165": 600, + "\u016a": 600, + "\u016b": 600, + "\u016e": 600, + "\u016f": 600, + "\u0170": 600, + "\u0171": 600, + "\u0172": 600, + "\u0173": 600, + "\u0178": 600, + "\u0179": 600, + "\u017a": 600, + "\u017b": 600, + "\u017c": 600, + "\u017d": 600, + "\u017e": 600, + "\u0192": 600, + "\u0218": 600, + "\u0219": 600, + "\u02c6": 600, + "\u02c7": 600, + "\u02d8": 600, + "\u02d9": 600, + "\u02da": 600, + "\u02db": 600, + "\u02dc": 600, + "\u02dd": 600, + "\u2013": 600, + "\u2014": 600, + "\u2018": 600, + "\u2019": 600, + "\u201a": 600, + "\u201c": 600, + "\u201d": 600, + "\u201e": 600, + "\u2020": 600, + "\u2021": 600, + "\u2022": 600, + "\u2026": 600, + "\u2030": 600, + "\u2039": 600, + "\u203a": 600, + "\u2044": 600, + "\u2122": 600, + "\u2202": 600, + "\u2206": 600, + "\u2211": 600, + "\u2212": 600, + "\u221a": 600, + "\u2260": 600, + "\u2264": 600, + "\u2265": 600, + "\u25ca": 600, + "\uf6c3": 600, + "\ufb01": 600, + "\ufb02": 600, + }, + ), + "Helvetica": ( + { + "FontName": "Helvetica", + "Descent": -207.0, + "FontBBox": (-166.0, -225.0, 1000.0, 931.0), + "FontWeight": "Medium", + "CapHeight": 718.0, + "FontFamily": "Helvetica", + "Flags": 0, + "XHeight": 523.0, + "ItalicAngle": 0.0, + "Ascent": 718.0, + }, + { + " ": 278, + "!": 278, + '"': 355, + "#": 556, + "$": 556, + "%": 889, + "&": 667, + "'": 191, + "(": 333, + ")": 333, + "*": 389, + "+": 584, + ",": 278, + "-": 333, + ".": 278, + "/": 278, + "0": 556, + "1": 556, + "2": 556, + "3": 556, + "4": 556, + "5": 556, + "6": 556, + "7": 556, + "8": 556, + "9": 556, + ":": 278, + ";": 278, + "<": 584, + "=": 584, + ">": 584, + "?": 556, + "@": 1015, + "A": 667, + "B": 667, + "C": 722, + "D": 722, + "E": 667, + "F": 611, + "G": 778, + "H": 722, + "I": 278, + "J": 500, + "K": 667, + "L": 556, + "M": 833, + "N": 722, + "O": 778, + "P": 667, + "Q": 778, + "R": 722, + "S": 667, + "T": 611, + "U": 722, + "V": 667, + "W": 944, + "X": 667, + "Y": 667, + "Z": 611, + "[": 278, + "\\": 278, + "]": 278, + "^": 469, + "_": 556, + "`": 333, + "a": 556, + "b": 556, + "c": 500, + "d": 556, + "e": 556, + "f": 278, + "g": 556, + "h": 556, + "i": 222, + "j": 222, + "k": 500, + "l": 222, + "m": 833, + "n": 556, + "o": 556, + "p": 556, + "q": 556, + "r": 333, + "s": 500, + "t": 278, + "u": 556, + "v": 500, + "w": 722, + "x": 500, + "y": 500, + "z": 500, + "{": 334, + "|": 260, + "}": 334, + "~": 584, + "\xa1": 333, + "\xa2": 556, + "\xa3": 556, + "\xa4": 556, + "\xa5": 556, + "\xa6": 260, + "\xa7": 556, + "\xa8": 333, + "\xa9": 737, + "\xaa": 370, + "\xab": 556, + "\xac": 584, + "\xae": 737, + "\xaf": 333, + "\xb0": 400, + "\xb1": 584, + "\xb2": 333, + "\xb3": 333, + "\xb4": 333, + "\xb5": 556, + "\xb6": 537, + "\xb7": 278, + "\xb8": 333, + "\xb9": 333, + "\xba": 365, + "\xbb": 556, + "\xbc": 834, + "\xbd": 834, + "\xbe": 834, + "\xbf": 611, + "\xc0": 667, + "\xc1": 667, + "\xc2": 667, + "\xc3": 667, + "\xc4": 667, + "\xc5": 667, + "\xc6": 1000, + "\xc7": 722, + "\xc8": 667, + "\xc9": 667, + "\xca": 667, + "\xcb": 667, + "\xcc": 278, + "\xcd": 278, + "\xce": 278, + "\xcf": 278, + "\xd0": 722, + "\xd1": 722, + "\xd2": 778, + "\xd3": 778, + "\xd4": 778, + "\xd5": 778, + "\xd6": 778, + "\xd7": 584, + "\xd8": 778, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 667, + "\xde": 667, + "\xdf": 611, + "\xe0": 556, + "\xe1": 556, + "\xe2": 556, + "\xe3": 556, + "\xe4": 556, + "\xe5": 556, + "\xe6": 889, + "\xe7": 500, + "\xe8": 556, + "\xe9": 556, + "\xea": 556, + "\xeb": 556, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 556, + "\xf1": 556, + "\xf2": 556, + "\xf3": 556, + "\xf4": 556, + "\xf5": 556, + "\xf6": 556, + "\xf7": 584, + "\xf8": 611, + "\xf9": 556, + "\xfa": 556, + "\xfb": 556, + "\xfc": 556, + "\xfd": 500, + "\xfe": 556, + "\xff": 500, + "\u0100": 667, + "\u0101": 556, + "\u0102": 667, + "\u0103": 556, + "\u0104": 667, + "\u0105": 556, + "\u0106": 722, + "\u0107": 500, + "\u010c": 722, + "\u010d": 500, + "\u010e": 722, + "\u010f": 643, + "\u0110": 722, + "\u0111": 556, + "\u0112": 667, + "\u0113": 556, + "\u0116": 667, + "\u0117": 556, + "\u0118": 667, + "\u0119": 556, + "\u011a": 667, + "\u011b": 556, + "\u011e": 778, + "\u011f": 556, + "\u0122": 778, + "\u0123": 556, + "\u012a": 278, + "\u012b": 278, + "\u012e": 278, + "\u012f": 222, + "\u0130": 278, + "\u0131": 278, + "\u0136": 667, + "\u0137": 500, + "\u0139": 556, + "\u013a": 222, + "\u013b": 556, + "\u013c": 222, + "\u013d": 556, + "\u013e": 299, + "\u0141": 556, + "\u0142": 222, + "\u0143": 722, + "\u0144": 556, + "\u0145": 722, + "\u0146": 556, + "\u0147": 722, + "\u0148": 556, + "\u014c": 778, + "\u014d": 556, + "\u0150": 778, + "\u0151": 556, + "\u0152": 1000, + "\u0153": 944, + "\u0154": 722, + "\u0155": 333, + "\u0156": 722, + "\u0157": 333, + "\u0158": 722, + "\u0159": 333, + "\u015a": 667, + "\u015b": 500, + "\u015e": 667, + "\u015f": 500, + "\u0160": 667, + "\u0161": 500, + "\u0162": 611, + "\u0163": 278, + "\u0164": 611, + "\u0165": 317, + "\u016a": 722, + "\u016b": 556, + "\u016e": 722, + "\u016f": 556, + "\u0170": 722, + "\u0171": 556, + "\u0172": 722, + "\u0173": 556, + "\u0178": 667, + "\u0179": 611, + "\u017a": 500, + "\u017b": 611, + "\u017c": 500, + "\u017d": 611, + "\u017e": 500, + "\u0192": 556, + "\u0218": 667, + "\u0219": 500, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 556, + "\u2014": 1000, + "\u2018": 222, + "\u2019": 222, + "\u201a": 222, + "\u201c": 333, + "\u201d": 333, + "\u201e": 333, + "\u2020": 556, + "\u2021": 556, + "\u2022": 350, + "\u2026": 1000, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 1000, + "\u2202": 476, + "\u2206": 612, + "\u2211": 600, + "\u2212": 584, + "\u221a": 453, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 471, + "\uf6c3": 250, + "\ufb01": 500, + "\ufb02": 500, + }, + ), + "Helvetica-Bold": ( + { + "FontName": "Helvetica-Bold", + "Descent": -207.0, + "FontBBox": (-170.0, -228.0, 1003.0, 962.0), + "FontWeight": "Bold", + "CapHeight": 718.0, + "FontFamily": "Helvetica", + "Flags": 0, + "XHeight": 532.0, + "ItalicAngle": 0.0, + "Ascent": 718.0, + }, + { + " ": 278, + "!": 333, + '"': 474, + "#": 556, + "$": 556, + "%": 889, + "&": 722, + "'": 238, + "(": 333, + ")": 333, + "*": 389, + "+": 584, + ",": 278, + "-": 333, + ".": 278, + "/": 278, + "0": 556, + "1": 556, + "2": 556, + "3": 556, + "4": 556, + "5": 556, + "6": 556, + "7": 556, + "8": 556, + "9": 556, + ":": 333, + ";": 333, + "<": 584, + "=": 584, + ">": 584, + "?": 611, + "@": 975, + "A": 722, + "B": 722, + "C": 722, + "D": 722, + "E": 667, + "F": 611, + "G": 778, + "H": 722, + "I": 278, + "J": 556, + "K": 722, + "L": 611, + "M": 833, + "N": 722, + "O": 778, + "P": 667, + "Q": 778, + "R": 722, + "S": 667, + "T": 611, + "U": 722, + "V": 667, + "W": 944, + "X": 667, + "Y": 667, + "Z": 611, + "[": 333, + "\\": 278, + "]": 333, + "^": 584, + "_": 556, + "`": 333, + "a": 556, + "b": 611, + "c": 556, + "d": 611, + "e": 556, + "f": 333, + "g": 611, + "h": 611, + "i": 278, + "j": 278, + "k": 556, + "l": 278, + "m": 889, + "n": 611, + "o": 611, + "p": 611, + "q": 611, + "r": 389, + "s": 556, + "t": 333, + "u": 611, + "v": 556, + "w": 778, + "x": 556, + "y": 556, + "z": 500, + "{": 389, + "|": 280, + "}": 389, + "~": 584, + "\xa1": 333, + "\xa2": 556, + "\xa3": 556, + "\xa4": 556, + "\xa5": 556, + "\xa6": 280, + "\xa7": 556, + "\xa8": 333, + "\xa9": 737, + "\xaa": 370, + "\xab": 556, + "\xac": 584, + "\xae": 737, + "\xaf": 333, + "\xb0": 400, + "\xb1": 584, + "\xb2": 333, + "\xb3": 333, + "\xb4": 333, + "\xb5": 611, + "\xb6": 556, + "\xb7": 278, + "\xb8": 333, + "\xb9": 333, + "\xba": 365, + "\xbb": 556, + "\xbc": 834, + "\xbd": 834, + "\xbe": 834, + "\xbf": 611, + "\xc0": 722, + "\xc1": 722, + "\xc2": 722, + "\xc3": 722, + "\xc4": 722, + "\xc5": 722, + "\xc6": 1000, + "\xc7": 722, + "\xc8": 667, + "\xc9": 667, + "\xca": 667, + "\xcb": 667, + "\xcc": 278, + "\xcd": 278, + "\xce": 278, + "\xcf": 278, + "\xd0": 722, + "\xd1": 722, + "\xd2": 778, + "\xd3": 778, + "\xd4": 778, + "\xd5": 778, + "\xd6": 778, + "\xd7": 584, + "\xd8": 778, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 667, + "\xde": 667, + "\xdf": 611, + "\xe0": 556, + "\xe1": 556, + "\xe2": 556, + "\xe3": 556, + "\xe4": 556, + "\xe5": 556, + "\xe6": 889, + "\xe7": 556, + "\xe8": 556, + "\xe9": 556, + "\xea": 556, + "\xeb": 556, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 611, + "\xf1": 611, + "\xf2": 611, + "\xf3": 611, + "\xf4": 611, + "\xf5": 611, + "\xf6": 611, + "\xf7": 584, + "\xf8": 611, + "\xf9": 611, + "\xfa": 611, + "\xfb": 611, + "\xfc": 611, + "\xfd": 556, + "\xfe": 611, + "\xff": 556, + "\u0100": 722, + "\u0101": 556, + "\u0102": 722, + "\u0103": 556, + "\u0104": 722, + "\u0105": 556, + "\u0106": 722, + "\u0107": 556, + "\u010c": 722, + "\u010d": 556, + "\u010e": 722, + "\u010f": 743, + "\u0110": 722, + "\u0111": 611, + "\u0112": 667, + "\u0113": 556, + "\u0116": 667, + "\u0117": 556, + "\u0118": 667, + "\u0119": 556, + "\u011a": 667, + "\u011b": 556, + "\u011e": 778, + "\u011f": 611, + "\u0122": 778, + "\u0123": 611, + "\u012a": 278, + "\u012b": 278, + "\u012e": 278, + "\u012f": 278, + "\u0130": 278, + "\u0131": 278, + "\u0136": 722, + "\u0137": 556, + "\u0139": 611, + "\u013a": 278, + "\u013b": 611, + "\u013c": 278, + "\u013d": 611, + "\u013e": 400, + "\u0141": 611, + "\u0142": 278, + "\u0143": 722, + "\u0144": 611, + "\u0145": 722, + "\u0146": 611, + "\u0147": 722, + "\u0148": 611, + "\u014c": 778, + "\u014d": 611, + "\u0150": 778, + "\u0151": 611, + "\u0152": 1000, + "\u0153": 944, + "\u0154": 722, + "\u0155": 389, + "\u0156": 722, + "\u0157": 389, + "\u0158": 722, + "\u0159": 389, + "\u015a": 667, + "\u015b": 556, + "\u015e": 667, + "\u015f": 556, + "\u0160": 667, + "\u0161": 556, + "\u0162": 611, + "\u0163": 333, + "\u0164": 611, + "\u0165": 389, + "\u016a": 722, + "\u016b": 611, + "\u016e": 722, + "\u016f": 611, + "\u0170": 722, + "\u0171": 611, + "\u0172": 722, + "\u0173": 611, + "\u0178": 667, + "\u0179": 611, + "\u017a": 500, + "\u017b": 611, + "\u017c": 500, + "\u017d": 611, + "\u017e": 500, + "\u0192": 556, + "\u0218": 667, + "\u0219": 556, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 556, + "\u2014": 1000, + "\u2018": 278, + "\u2019": 278, + "\u201a": 278, + "\u201c": 500, + "\u201d": 500, + "\u201e": 500, + "\u2020": 556, + "\u2021": 556, + "\u2022": 350, + "\u2026": 1000, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 1000, + "\u2202": 494, + "\u2206": 612, + "\u2211": 600, + "\u2212": 584, + "\u221a": 549, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 494, + "\uf6c3": 250, + "\ufb01": 611, + "\ufb02": 611, + }, + ), + "Helvetica-BoldOblique": ( + { + "FontName": "Helvetica-BoldOblique", + "Descent": -207.0, + "FontBBox": (-175.0, -228.0, 1114.0, 962.0), + "FontWeight": "Bold", + "CapHeight": 718.0, + "FontFamily": "Helvetica", + "Flags": 0, + "XHeight": 532.0, + "ItalicAngle": -12.0, + "Ascent": 718.0, + }, + { + " ": 278, + "!": 333, + '"': 474, + "#": 556, + "$": 556, + "%": 889, + "&": 722, + "'": 238, + "(": 333, + ")": 333, + "*": 389, + "+": 584, + ",": 278, + "-": 333, + ".": 278, + "/": 278, + "0": 556, + "1": 556, + "2": 556, + "3": 556, + "4": 556, + "5": 556, + "6": 556, + "7": 556, + "8": 556, + "9": 556, + ":": 333, + ";": 333, + "<": 584, + "=": 584, + ">": 584, + "?": 611, + "@": 975, + "A": 722, + "B": 722, + "C": 722, + "D": 722, + "E": 667, + "F": 611, + "G": 778, + "H": 722, + "I": 278, + "J": 556, + "K": 722, + "L": 611, + "M": 833, + "N": 722, + "O": 778, + "P": 667, + "Q": 778, + "R": 722, + "S": 667, + "T": 611, + "U": 722, + "V": 667, + "W": 944, + "X": 667, + "Y": 667, + "Z": 611, + "[": 333, + "\\": 278, + "]": 333, + "^": 584, + "_": 556, + "`": 333, + "a": 556, + "b": 611, + "c": 556, + "d": 611, + "e": 556, + "f": 333, + "g": 611, + "h": 611, + "i": 278, + "j": 278, + "k": 556, + "l": 278, + "m": 889, + "n": 611, + "o": 611, + "p": 611, + "q": 611, + "r": 389, + "s": 556, + "t": 333, + "u": 611, + "v": 556, + "w": 778, + "x": 556, + "y": 556, + "z": 500, + "{": 389, + "|": 280, + "}": 389, + "~": 584, + "\xa1": 333, + "\xa2": 556, + "\xa3": 556, + "\xa4": 556, + "\xa5": 556, + "\xa6": 280, + "\xa7": 556, + "\xa8": 333, + "\xa9": 737, + "\xaa": 370, + "\xab": 556, + "\xac": 584, + "\xae": 737, + "\xaf": 333, + "\xb0": 400, + "\xb1": 584, + "\xb2": 333, + "\xb3": 333, + "\xb4": 333, + "\xb5": 611, + "\xb6": 556, + "\xb7": 278, + "\xb8": 333, + "\xb9": 333, + "\xba": 365, + "\xbb": 556, + "\xbc": 834, + "\xbd": 834, + "\xbe": 834, + "\xbf": 611, + "\xc0": 722, + "\xc1": 722, + "\xc2": 722, + "\xc3": 722, + "\xc4": 722, + "\xc5": 722, + "\xc6": 1000, + "\xc7": 722, + "\xc8": 667, + "\xc9": 667, + "\xca": 667, + "\xcb": 667, + "\xcc": 278, + "\xcd": 278, + "\xce": 278, + "\xcf": 278, + "\xd0": 722, + "\xd1": 722, + "\xd2": 778, + "\xd3": 778, + "\xd4": 778, + "\xd5": 778, + "\xd6": 778, + "\xd7": 584, + "\xd8": 778, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 667, + "\xde": 667, + "\xdf": 611, + "\xe0": 556, + "\xe1": 556, + "\xe2": 556, + "\xe3": 556, + "\xe4": 556, + "\xe5": 556, + "\xe6": 889, + "\xe7": 556, + "\xe8": 556, + "\xe9": 556, + "\xea": 556, + "\xeb": 556, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 611, + "\xf1": 611, + "\xf2": 611, + "\xf3": 611, + "\xf4": 611, + "\xf5": 611, + "\xf6": 611, + "\xf7": 584, + "\xf8": 611, + "\xf9": 611, + "\xfa": 611, + "\xfb": 611, + "\xfc": 611, + "\xfd": 556, + "\xfe": 611, + "\xff": 556, + "\u0100": 722, + "\u0101": 556, + "\u0102": 722, + "\u0103": 556, + "\u0104": 722, + "\u0105": 556, + "\u0106": 722, + "\u0107": 556, + "\u010c": 722, + "\u010d": 556, + "\u010e": 722, + "\u010f": 743, + "\u0110": 722, + "\u0111": 611, + "\u0112": 667, + "\u0113": 556, + "\u0116": 667, + "\u0117": 556, + "\u0118": 667, + "\u0119": 556, + "\u011a": 667, + "\u011b": 556, + "\u011e": 778, + "\u011f": 611, + "\u0122": 778, + "\u0123": 611, + "\u012a": 278, + "\u012b": 278, + "\u012e": 278, + "\u012f": 278, + "\u0130": 278, + "\u0131": 278, + "\u0136": 722, + "\u0137": 556, + "\u0139": 611, + "\u013a": 278, + "\u013b": 611, + "\u013c": 278, + "\u013d": 611, + "\u013e": 400, + "\u0141": 611, + "\u0142": 278, + "\u0143": 722, + "\u0144": 611, + "\u0145": 722, + "\u0146": 611, + "\u0147": 722, + "\u0148": 611, + "\u014c": 778, + "\u014d": 611, + "\u0150": 778, + "\u0151": 611, + "\u0152": 1000, + "\u0153": 944, + "\u0154": 722, + "\u0155": 389, + "\u0156": 722, + "\u0157": 389, + "\u0158": 722, + "\u0159": 389, + "\u015a": 667, + "\u015b": 556, + "\u015e": 667, + "\u015f": 556, + "\u0160": 667, + "\u0161": 556, + "\u0162": 611, + "\u0163": 333, + "\u0164": 611, + "\u0165": 389, + "\u016a": 722, + "\u016b": 611, + "\u016e": 722, + "\u016f": 611, + "\u0170": 722, + "\u0171": 611, + "\u0172": 722, + "\u0173": 611, + "\u0178": 667, + "\u0179": 611, + "\u017a": 500, + "\u017b": 611, + "\u017c": 500, + "\u017d": 611, + "\u017e": 500, + "\u0192": 556, + "\u0218": 667, + "\u0219": 556, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 556, + "\u2014": 1000, + "\u2018": 278, + "\u2019": 278, + "\u201a": 278, + "\u201c": 500, + "\u201d": 500, + "\u201e": 500, + "\u2020": 556, + "\u2021": 556, + "\u2022": 350, + "\u2026": 1000, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 1000, + "\u2202": 494, + "\u2206": 612, + "\u2211": 600, + "\u2212": 584, + "\u221a": 549, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 494, + "\uf6c3": 250, + "\ufb01": 611, + "\ufb02": 611, + }, + ), + "Helvetica-Oblique": ( + { + "FontName": "Helvetica-Oblique", + "Descent": -207.0, + "FontBBox": (-171.0, -225.0, 1116.0, 931.0), + "FontWeight": "Medium", + "CapHeight": 718.0, + "FontFamily": "Helvetica", + "Flags": 0, + "XHeight": 523.0, + "ItalicAngle": -12.0, + "Ascent": 718.0, + }, + { + " ": 278, + "!": 278, + '"': 355, + "#": 556, + "$": 556, + "%": 889, + "&": 667, + "'": 191, + "(": 333, + ")": 333, + "*": 389, + "+": 584, + ",": 278, + "-": 333, + ".": 278, + "/": 278, + "0": 556, + "1": 556, + "2": 556, + "3": 556, + "4": 556, + "5": 556, + "6": 556, + "7": 556, + "8": 556, + "9": 556, + ":": 278, + ";": 278, + "<": 584, + "=": 584, + ">": 584, + "?": 556, + "@": 1015, + "A": 667, + "B": 667, + "C": 722, + "D": 722, + "E": 667, + "F": 611, + "G": 778, + "H": 722, + "I": 278, + "J": 500, + "K": 667, + "L": 556, + "M": 833, + "N": 722, + "O": 778, + "P": 667, + "Q": 778, + "R": 722, + "S": 667, + "T": 611, + "U": 722, + "V": 667, + "W": 944, + "X": 667, + "Y": 667, + "Z": 611, + "[": 278, + "\\": 278, + "]": 278, + "^": 469, + "_": 556, + "`": 333, + "a": 556, + "b": 556, + "c": 500, + "d": 556, + "e": 556, + "f": 278, + "g": 556, + "h": 556, + "i": 222, + "j": 222, + "k": 500, + "l": 222, + "m": 833, + "n": 556, + "o": 556, + "p": 556, + "q": 556, + "r": 333, + "s": 500, + "t": 278, + "u": 556, + "v": 500, + "w": 722, + "x": 500, + "y": 500, + "z": 500, + "{": 334, + "|": 260, + "}": 334, + "~": 584, + "\xa1": 333, + "\xa2": 556, + "\xa3": 556, + "\xa4": 556, + "\xa5": 556, + "\xa6": 260, + "\xa7": 556, + "\xa8": 333, + "\xa9": 737, + "\xaa": 370, + "\xab": 556, + "\xac": 584, + "\xae": 737, + "\xaf": 333, + "\xb0": 400, + "\xb1": 584, + "\xb2": 333, + "\xb3": 333, + "\xb4": 333, + "\xb5": 556, + "\xb6": 537, + "\xb7": 278, + "\xb8": 333, + "\xb9": 333, + "\xba": 365, + "\xbb": 556, + "\xbc": 834, + "\xbd": 834, + "\xbe": 834, + "\xbf": 611, + "\xc0": 667, + "\xc1": 667, + "\xc2": 667, + "\xc3": 667, + "\xc4": 667, + "\xc5": 667, + "\xc6": 1000, + "\xc7": 722, + "\xc8": 667, + "\xc9": 667, + "\xca": 667, + "\xcb": 667, + "\xcc": 278, + "\xcd": 278, + "\xce": 278, + "\xcf": 278, + "\xd0": 722, + "\xd1": 722, + "\xd2": 778, + "\xd3": 778, + "\xd4": 778, + "\xd5": 778, + "\xd6": 778, + "\xd7": 584, + "\xd8": 778, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 667, + "\xde": 667, + "\xdf": 611, + "\xe0": 556, + "\xe1": 556, + "\xe2": 556, + "\xe3": 556, + "\xe4": 556, + "\xe5": 556, + "\xe6": 889, + "\xe7": 500, + "\xe8": 556, + "\xe9": 556, + "\xea": 556, + "\xeb": 556, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 556, + "\xf1": 556, + "\xf2": 556, + "\xf3": 556, + "\xf4": 556, + "\xf5": 556, + "\xf6": 556, + "\xf7": 584, + "\xf8": 611, + "\xf9": 556, + "\xfa": 556, + "\xfb": 556, + "\xfc": 556, + "\xfd": 500, + "\xfe": 556, + "\xff": 500, + "\u0100": 667, + "\u0101": 556, + "\u0102": 667, + "\u0103": 556, + "\u0104": 667, + "\u0105": 556, + "\u0106": 722, + "\u0107": 500, + "\u010c": 722, + "\u010d": 500, + "\u010e": 722, + "\u010f": 643, + "\u0110": 722, + "\u0111": 556, + "\u0112": 667, + "\u0113": 556, + "\u0116": 667, + "\u0117": 556, + "\u0118": 667, + "\u0119": 556, + "\u011a": 667, + "\u011b": 556, + "\u011e": 778, + "\u011f": 556, + "\u0122": 778, + "\u0123": 556, + "\u012a": 278, + "\u012b": 278, + "\u012e": 278, + "\u012f": 222, + "\u0130": 278, + "\u0131": 278, + "\u0136": 667, + "\u0137": 500, + "\u0139": 556, + "\u013a": 222, + "\u013b": 556, + "\u013c": 222, + "\u013d": 556, + "\u013e": 299, + "\u0141": 556, + "\u0142": 222, + "\u0143": 722, + "\u0144": 556, + "\u0145": 722, + "\u0146": 556, + "\u0147": 722, + "\u0148": 556, + "\u014c": 778, + "\u014d": 556, + "\u0150": 778, + "\u0151": 556, + "\u0152": 1000, + "\u0153": 944, + "\u0154": 722, + "\u0155": 333, + "\u0156": 722, + "\u0157": 333, + "\u0158": 722, + "\u0159": 333, + "\u015a": 667, + "\u015b": 500, + "\u015e": 667, + "\u015f": 500, + "\u0160": 667, + "\u0161": 500, + "\u0162": 611, + "\u0163": 278, + "\u0164": 611, + "\u0165": 317, + "\u016a": 722, + "\u016b": 556, + "\u016e": 722, + "\u016f": 556, + "\u0170": 722, + "\u0171": 556, + "\u0172": 722, + "\u0173": 556, + "\u0178": 667, + "\u0179": 611, + "\u017a": 500, + "\u017b": 611, + "\u017c": 500, + "\u017d": 611, + "\u017e": 500, + "\u0192": 556, + "\u0218": 667, + "\u0219": 500, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 556, + "\u2014": 1000, + "\u2018": 222, + "\u2019": 222, + "\u201a": 222, + "\u201c": 333, + "\u201d": 333, + "\u201e": 333, + "\u2020": 556, + "\u2021": 556, + "\u2022": 350, + "\u2026": 1000, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 1000, + "\u2202": 476, + "\u2206": 612, + "\u2211": 600, + "\u2212": 584, + "\u221a": 453, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 471, + "\uf6c3": 250, + "\ufb01": 500, + "\ufb02": 500, + }, + ), + "Symbol": ( + { + "FontName": "Symbol", + "FontBBox": (-180.0, -293.0, 1090.0, 1010.0), + "FontWeight": "Medium", + "FontFamily": "Symbol", + "Flags": 0, + "ItalicAngle": 0.0, + }, + { + " ": 250, + "!": 333, + "#": 500, + "%": 833, + "&": 778, + "(": 333, + ")": 333, + "+": 549, + ",": 250, + ".": 250, + "/": 278, + "0": 500, + "1": 500, + "2": 500, + "3": 500, + "4": 500, + "5": 500, + "6": 500, + "7": 500, + "8": 500, + "9": 500, + ":": 278, + ";": 278, + "<": 549, + "=": 549, + ">": 549, + "?": 444, + "[": 333, + "]": 333, + "_": 500, + "{": 480, + "|": 200, + "}": 480, + "\xac": 713, + "\xb0": 400, + "\xb1": 549, + "\xb5": 576, + "\xd7": 549, + "\xf7": 549, + "\u0192": 500, + "\u0391": 722, + "\u0392": 667, + "\u0393": 603, + "\u0395": 611, + "\u0396": 611, + "\u0397": 722, + "\u0398": 741, + "\u0399": 333, + "\u039a": 722, + "\u039b": 686, + "\u039c": 889, + "\u039d": 722, + "\u039e": 645, + "\u039f": 722, + "\u03a0": 768, + "\u03a1": 556, + "\u03a3": 592, + "\u03a4": 611, + "\u03a5": 690, + "\u03a6": 763, + "\u03a7": 722, + "\u03a8": 795, + "\u03b1": 631, + "\u03b2": 549, + "\u03b3": 411, + "\u03b4": 494, + "\u03b5": 439, + "\u03b6": 494, + "\u03b7": 603, + "\u03b8": 521, + "\u03b9": 329, + "\u03ba": 549, + "\u03bb": 549, + "\u03bd": 521, + "\u03be": 493, + "\u03bf": 549, + "\u03c0": 549, + "\u03c1": 549, + "\u03c2": 439, + "\u03c3": 603, + "\u03c4": 439, + "\u03c5": 576, + "\u03c6": 521, + "\u03c7": 549, + "\u03c8": 686, + "\u03c9": 686, + "\u03d1": 631, + "\u03d2": 620, + "\u03d5": 603, + "\u03d6": 713, + "\u2022": 460, + "\u2026": 1000, + "\u2032": 247, + "\u2033": 411, + "\u2044": 167, + "\u20ac": 750, + "\u2111": 686, + "\u2118": 987, + "\u211c": 795, + "\u2126": 768, + "\u2135": 823, + "\u2190": 987, + "\u2191": 603, + "\u2192": 987, + "\u2193": 603, + "\u2194": 1042, + "\u21b5": 658, + "\u21d0": 987, + "\u21d1": 603, + "\u21d2": 987, + "\u21d3": 603, + "\u21d4": 1042, + "\u2200": 713, + "\u2202": 494, + "\u2203": 549, + "\u2205": 823, + "\u2206": 612, + "\u2207": 713, + "\u2208": 713, + "\u2209": 713, + "\u220b": 439, + "\u220f": 823, + "\u2211": 713, + "\u2212": 549, + "\u2217": 500, + "\u221a": 549, + "\u221d": 713, + "\u221e": 713, + "\u2220": 768, + "\u2227": 603, + "\u2228": 603, + "\u2229": 768, + "\u222a": 768, + "\u222b": 274, + "\u2234": 863, + "\u223c": 549, + "\u2245": 549, + "\u2248": 549, + "\u2260": 549, + "\u2261": 549, + "\u2264": 549, + "\u2265": 549, + "\u2282": 713, + "\u2283": 713, + "\u2284": 713, + "\u2286": 713, + "\u2287": 713, + "\u2295": 768, + "\u2297": 768, + "\u22a5": 658, + "\u22c5": 250, + "\u2320": 686, + "\u2321": 686, + "\u2329": 329, + "\u232a": 329, + "\u25ca": 494, + "\u2660": 753, + "\u2663": 753, + "\u2665": 753, + "\u2666": 753, + "\uf6d9": 790, + "\uf6da": 790, + "\uf6db": 890, + "\uf8e5": 500, + "\uf8e6": 603, + "\uf8e7": 1000, + "\uf8e8": 790, + "\uf8e9": 790, + "\uf8ea": 786, + "\uf8eb": 384, + "\uf8ec": 384, + "\uf8ed": 384, + "\uf8ee": 384, + "\uf8ef": 384, + "\uf8f0": 384, + "\uf8f1": 494, + "\uf8f2": 494, + "\uf8f3": 494, + "\uf8f4": 494, + "\uf8f5": 686, + "\uf8f6": 384, + "\uf8f7": 384, + "\uf8f8": 384, + "\uf8f9": 384, + "\uf8fa": 384, + "\uf8fb": 384, + "\uf8fc": 494, + "\uf8fd": 494, + "\uf8fe": 494, + "\uf8ff": 790, + }, + ), + "Times-Bold": ( + { + "FontName": "Times-Bold", + "Descent": -217.0, + "FontBBox": (-168.0, -218.0, 1000.0, 935.0), + "FontWeight": "Bold", + "CapHeight": 676.0, + "FontFamily": "Times", + "Flags": 0, + "XHeight": 461.0, + "ItalicAngle": 0.0, + "Ascent": 683.0, + }, + { + " ": 250, + "!": 333, + '"': 555, + "#": 500, + "$": 500, + "%": 1000, + "&": 833, + "'": 278, + "(": 333, + ")": 333, + "*": 500, + "+": 570, + ",": 250, + "-": 333, + ".": 250, + "/": 278, + "0": 500, + "1": 500, + "2": 500, + "3": 500, + "4": 500, + "5": 500, + "6": 500, + "7": 500, + "8": 500, + "9": 500, + ":": 333, + ";": 333, + "<": 570, + "=": 570, + ">": 570, + "?": 500, + "@": 930, + "A": 722, + "B": 667, + "C": 722, + "D": 722, + "E": 667, + "F": 611, + "G": 778, + "H": 778, + "I": 389, + "J": 500, + "K": 778, + "L": 667, + "M": 944, + "N": 722, + "O": 778, + "P": 611, + "Q": 778, + "R": 722, + "S": 556, + "T": 667, + "U": 722, + "V": 722, + "W": 1000, + "X": 722, + "Y": 722, + "Z": 667, + "[": 333, + "\\": 278, + "]": 333, + "^": 581, + "_": 500, + "`": 333, + "a": 500, + "b": 556, + "c": 444, + "d": 556, + "e": 444, + "f": 333, + "g": 500, + "h": 556, + "i": 278, + "j": 333, + "k": 556, + "l": 278, + "m": 833, + "n": 556, + "o": 500, + "p": 556, + "q": 556, + "r": 444, + "s": 389, + "t": 333, + "u": 556, + "v": 500, + "w": 722, + "x": 500, + "y": 500, + "z": 444, + "{": 394, + "|": 220, + "}": 394, + "~": 520, + "\xa1": 333, + "\xa2": 500, + "\xa3": 500, + "\xa4": 500, + "\xa5": 500, + "\xa6": 220, + "\xa7": 500, + "\xa8": 333, + "\xa9": 747, + "\xaa": 300, + "\xab": 500, + "\xac": 570, + "\xae": 747, + "\xaf": 333, + "\xb0": 400, + "\xb1": 570, + "\xb2": 300, + "\xb3": 300, + "\xb4": 333, + "\xb5": 556, + "\xb6": 540, + "\xb7": 250, + "\xb8": 333, + "\xb9": 300, + "\xba": 330, + "\xbb": 500, + "\xbc": 750, + "\xbd": 750, + "\xbe": 750, + "\xbf": 500, + "\xc0": 722, + "\xc1": 722, + "\xc2": 722, + "\xc3": 722, + "\xc4": 722, + "\xc5": 722, + "\xc6": 1000, + "\xc7": 722, + "\xc8": 667, + "\xc9": 667, + "\xca": 667, + "\xcb": 667, + "\xcc": 389, + "\xcd": 389, + "\xce": 389, + "\xcf": 389, + "\xd0": 722, + "\xd1": 722, + "\xd2": 778, + "\xd3": 778, + "\xd4": 778, + "\xd5": 778, + "\xd6": 778, + "\xd7": 570, + "\xd8": 778, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 722, + "\xde": 611, + "\xdf": 556, + "\xe0": 500, + "\xe1": 500, + "\xe2": 500, + "\xe3": 500, + "\xe4": 500, + "\xe5": 500, + "\xe6": 722, + "\xe7": 444, + "\xe8": 444, + "\xe9": 444, + "\xea": 444, + "\xeb": 444, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 500, + "\xf1": 556, + "\xf2": 500, + "\xf3": 500, + "\xf4": 500, + "\xf5": 500, + "\xf6": 500, + "\xf7": 570, + "\xf8": 500, + "\xf9": 556, + "\xfa": 556, + "\xfb": 556, + "\xfc": 556, + "\xfd": 500, + "\xfe": 556, + "\xff": 500, + "\u0100": 722, + "\u0101": 500, + "\u0102": 722, + "\u0103": 500, + "\u0104": 722, + "\u0105": 500, + "\u0106": 722, + "\u0107": 444, + "\u010c": 722, + "\u010d": 444, + "\u010e": 722, + "\u010f": 672, + "\u0110": 722, + "\u0111": 556, + "\u0112": 667, + "\u0113": 444, + "\u0116": 667, + "\u0117": 444, + "\u0118": 667, + "\u0119": 444, + "\u011a": 667, + "\u011b": 444, + "\u011e": 778, + "\u011f": 500, + "\u0122": 778, + "\u0123": 500, + "\u012a": 389, + "\u012b": 278, + "\u012e": 389, + "\u012f": 278, + "\u0130": 389, + "\u0131": 278, + "\u0136": 778, + "\u0137": 556, + "\u0139": 667, + "\u013a": 278, + "\u013b": 667, + "\u013c": 278, + "\u013d": 667, + "\u013e": 394, + "\u0141": 667, + "\u0142": 278, + "\u0143": 722, + "\u0144": 556, + "\u0145": 722, + "\u0146": 556, + "\u0147": 722, + "\u0148": 556, + "\u014c": 778, + "\u014d": 500, + "\u0150": 778, + "\u0151": 500, + "\u0152": 1000, + "\u0153": 722, + "\u0154": 722, + "\u0155": 444, + "\u0156": 722, + "\u0157": 444, + "\u0158": 722, + "\u0159": 444, + "\u015a": 556, + "\u015b": 389, + "\u015e": 556, + "\u015f": 389, + "\u0160": 556, + "\u0161": 389, + "\u0162": 667, + "\u0163": 333, + "\u0164": 667, + "\u0165": 416, + "\u016a": 722, + "\u016b": 556, + "\u016e": 722, + "\u016f": 556, + "\u0170": 722, + "\u0171": 556, + "\u0172": 722, + "\u0173": 556, + "\u0178": 722, + "\u0179": 667, + "\u017a": 444, + "\u017b": 667, + "\u017c": 444, + "\u017d": 667, + "\u017e": 444, + "\u0192": 500, + "\u0218": 556, + "\u0219": 389, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 500, + "\u2014": 1000, + "\u2018": 333, + "\u2019": 333, + "\u201a": 333, + "\u201c": 500, + "\u201d": 500, + "\u201e": 500, + "\u2020": 500, + "\u2021": 500, + "\u2022": 350, + "\u2026": 1000, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 1000, + "\u2202": 494, + "\u2206": 612, + "\u2211": 600, + "\u2212": 570, + "\u221a": 549, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 494, + "\uf6c3": 250, + "\ufb01": 556, + "\ufb02": 556, + }, + ), + "Times-BoldItalic": ( + { + "FontName": "Times-BoldItalic", + "Descent": -217.0, + "FontBBox": (-200.0, -218.0, 996.0, 921.0), + "FontWeight": "Bold", + "CapHeight": 669.0, + "FontFamily": "Times", + "Flags": 0, + "XHeight": 462.0, + "ItalicAngle": -15.0, + "Ascent": 683.0, + }, + { + " ": 250, + "!": 389, + '"': 555, + "#": 500, + "$": 500, + "%": 833, + "&": 778, + "'": 278, + "(": 333, + ")": 333, + "*": 500, + "+": 570, + ",": 250, + "-": 333, + ".": 250, + "/": 278, + "0": 500, + "1": 500, + "2": 500, + "3": 500, + "4": 500, + "5": 500, + "6": 500, + "7": 500, + "8": 500, + "9": 500, + ":": 333, + ";": 333, + "<": 570, + "=": 570, + ">": 570, + "?": 500, + "@": 832, + "A": 667, + "B": 667, + "C": 667, + "D": 722, + "E": 667, + "F": 667, + "G": 722, + "H": 778, + "I": 389, + "J": 500, + "K": 667, + "L": 611, + "M": 889, + "N": 722, + "O": 722, + "P": 611, + "Q": 722, + "R": 667, + "S": 556, + "T": 611, + "U": 722, + "V": 667, + "W": 889, + "X": 667, + "Y": 611, + "Z": 611, + "[": 333, + "\\": 278, + "]": 333, + "^": 570, + "_": 500, + "`": 333, + "a": 500, + "b": 500, + "c": 444, + "d": 500, + "e": 444, + "f": 333, + "g": 500, + "h": 556, + "i": 278, + "j": 278, + "k": 500, + "l": 278, + "m": 778, + "n": 556, + "o": 500, + "p": 500, + "q": 500, + "r": 389, + "s": 389, + "t": 278, + "u": 556, + "v": 444, + "w": 667, + "x": 500, + "y": 444, + "z": 389, + "{": 348, + "|": 220, + "}": 348, + "~": 570, + "\xa1": 389, + "\xa2": 500, + "\xa3": 500, + "\xa4": 500, + "\xa5": 500, + "\xa6": 220, + "\xa7": 500, + "\xa8": 333, + "\xa9": 747, + "\xaa": 266, + "\xab": 500, + "\xac": 606, + "\xae": 747, + "\xaf": 333, + "\xb0": 400, + "\xb1": 570, + "\xb2": 300, + "\xb3": 300, + "\xb4": 333, + "\xb5": 576, + "\xb6": 500, + "\xb7": 250, + "\xb8": 333, + "\xb9": 300, + "\xba": 300, + "\xbb": 500, + "\xbc": 750, + "\xbd": 750, + "\xbe": 750, + "\xbf": 500, + "\xc0": 667, + "\xc1": 667, + "\xc2": 667, + "\xc3": 667, + "\xc4": 667, + "\xc5": 667, + "\xc6": 944, + "\xc7": 667, + "\xc8": 667, + "\xc9": 667, + "\xca": 667, + "\xcb": 667, + "\xcc": 389, + "\xcd": 389, + "\xce": 389, + "\xcf": 389, + "\xd0": 722, + "\xd1": 722, + "\xd2": 722, + "\xd3": 722, + "\xd4": 722, + "\xd5": 722, + "\xd6": 722, + "\xd7": 570, + "\xd8": 722, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 611, + "\xde": 611, + "\xdf": 500, + "\xe0": 500, + "\xe1": 500, + "\xe2": 500, + "\xe3": 500, + "\xe4": 500, + "\xe5": 500, + "\xe6": 722, + "\xe7": 444, + "\xe8": 444, + "\xe9": 444, + "\xea": 444, + "\xeb": 444, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 500, + "\xf1": 556, + "\xf2": 500, + "\xf3": 500, + "\xf4": 500, + "\xf5": 500, + "\xf6": 500, + "\xf7": 570, + "\xf8": 500, + "\xf9": 556, + "\xfa": 556, + "\xfb": 556, + "\xfc": 556, + "\xfd": 444, + "\xfe": 500, + "\xff": 444, + "\u0100": 667, + "\u0101": 500, + "\u0102": 667, + "\u0103": 500, + "\u0104": 667, + "\u0105": 500, + "\u0106": 667, + "\u0107": 444, + "\u010c": 667, + "\u010d": 444, + "\u010e": 722, + "\u010f": 608, + "\u0110": 722, + "\u0111": 500, + "\u0112": 667, + "\u0113": 444, + "\u0116": 667, + "\u0117": 444, + "\u0118": 667, + "\u0119": 444, + "\u011a": 667, + "\u011b": 444, + "\u011e": 722, + "\u011f": 500, + "\u0122": 722, + "\u0123": 500, + "\u012a": 389, + "\u012b": 278, + "\u012e": 389, + "\u012f": 278, + "\u0130": 389, + "\u0131": 278, + "\u0136": 667, + "\u0137": 500, + "\u0139": 611, + "\u013a": 278, + "\u013b": 611, + "\u013c": 278, + "\u013d": 611, + "\u013e": 382, + "\u0141": 611, + "\u0142": 278, + "\u0143": 722, + "\u0144": 556, + "\u0145": 722, + "\u0146": 556, + "\u0147": 722, + "\u0148": 556, + "\u014c": 722, + "\u014d": 500, + "\u0150": 722, + "\u0151": 500, + "\u0152": 944, + "\u0153": 722, + "\u0154": 667, + "\u0155": 389, + "\u0156": 667, + "\u0157": 389, + "\u0158": 667, + "\u0159": 389, + "\u015a": 556, + "\u015b": 389, + "\u015e": 556, + "\u015f": 389, + "\u0160": 556, + "\u0161": 389, + "\u0162": 611, + "\u0163": 278, + "\u0164": 611, + "\u0165": 366, + "\u016a": 722, + "\u016b": 556, + "\u016e": 722, + "\u016f": 556, + "\u0170": 722, + "\u0171": 556, + "\u0172": 722, + "\u0173": 556, + "\u0178": 611, + "\u0179": 611, + "\u017a": 389, + "\u017b": 611, + "\u017c": 389, + "\u017d": 611, + "\u017e": 389, + "\u0192": 500, + "\u0218": 556, + "\u0219": 389, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 500, + "\u2014": 1000, + "\u2018": 333, + "\u2019": 333, + "\u201a": 333, + "\u201c": 500, + "\u201d": 500, + "\u201e": 500, + "\u2020": 500, + "\u2021": 500, + "\u2022": 350, + "\u2026": 1000, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 1000, + "\u2202": 494, + "\u2206": 612, + "\u2211": 600, + "\u2212": 606, + "\u221a": 549, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 494, + "\uf6c3": 250, + "\ufb01": 556, + "\ufb02": 556, + }, + ), + "Times-Italic": ( + { + "FontName": "Times-Italic", + "Descent": -217.0, + "FontBBox": (-169.0, -217.0, 1010.0, 883.0), + "FontWeight": "Medium", + "CapHeight": 653.0, + "FontFamily": "Times", + "Flags": 0, + "XHeight": 441.0, + "ItalicAngle": -15.5, + "Ascent": 683.0, + }, + { + " ": 250, + "!": 333, + '"': 420, + "#": 500, + "$": 500, + "%": 833, + "&": 778, + "'": 214, + "(": 333, + ")": 333, + "*": 500, + "+": 675, + ",": 250, + "-": 333, + ".": 250, + "/": 278, + "0": 500, + "1": 500, + "2": 500, + "3": 500, + "4": 500, + "5": 500, + "6": 500, + "7": 500, + "8": 500, + "9": 500, + ":": 333, + ";": 333, + "<": 675, + "=": 675, + ">": 675, + "?": 500, + "@": 920, + "A": 611, + "B": 611, + "C": 667, + "D": 722, + "E": 611, + "F": 611, + "G": 722, + "H": 722, + "I": 333, + "J": 444, + "K": 667, + "L": 556, + "M": 833, + "N": 667, + "O": 722, + "P": 611, + "Q": 722, + "R": 611, + "S": 500, + "T": 556, + "U": 722, + "V": 611, + "W": 833, + "X": 611, + "Y": 556, + "Z": 556, + "[": 389, + "\\": 278, + "]": 389, + "^": 422, + "_": 500, + "`": 333, + "a": 500, + "b": 500, + "c": 444, + "d": 500, + "e": 444, + "f": 278, + "g": 500, + "h": 500, + "i": 278, + "j": 278, + "k": 444, + "l": 278, + "m": 722, + "n": 500, + "o": 500, + "p": 500, + "q": 500, + "r": 389, + "s": 389, + "t": 278, + "u": 500, + "v": 444, + "w": 667, + "x": 444, + "y": 444, + "z": 389, + "{": 400, + "|": 275, + "}": 400, + "~": 541, + "\xa1": 389, + "\xa2": 500, + "\xa3": 500, + "\xa4": 500, + "\xa5": 500, + "\xa6": 275, + "\xa7": 500, + "\xa8": 333, + "\xa9": 760, + "\xaa": 276, + "\xab": 500, + "\xac": 675, + "\xae": 760, + "\xaf": 333, + "\xb0": 400, + "\xb1": 675, + "\xb2": 300, + "\xb3": 300, + "\xb4": 333, + "\xb5": 500, + "\xb6": 523, + "\xb7": 250, + "\xb8": 333, + "\xb9": 300, + "\xba": 310, + "\xbb": 500, + "\xbc": 750, + "\xbd": 750, + "\xbe": 750, + "\xbf": 500, + "\xc0": 611, + "\xc1": 611, + "\xc2": 611, + "\xc3": 611, + "\xc4": 611, + "\xc5": 611, + "\xc6": 889, + "\xc7": 667, + "\xc8": 611, + "\xc9": 611, + "\xca": 611, + "\xcb": 611, + "\xcc": 333, + "\xcd": 333, + "\xce": 333, + "\xcf": 333, + "\xd0": 722, + "\xd1": 667, + "\xd2": 722, + "\xd3": 722, + "\xd4": 722, + "\xd5": 722, + "\xd6": 722, + "\xd7": 675, + "\xd8": 722, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 556, + "\xde": 611, + "\xdf": 500, + "\xe0": 500, + "\xe1": 500, + "\xe2": 500, + "\xe3": 500, + "\xe4": 500, + "\xe5": 500, + "\xe6": 667, + "\xe7": 444, + "\xe8": 444, + "\xe9": 444, + "\xea": 444, + "\xeb": 444, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 500, + "\xf1": 500, + "\xf2": 500, + "\xf3": 500, + "\xf4": 500, + "\xf5": 500, + "\xf6": 500, + "\xf7": 675, + "\xf8": 500, + "\xf9": 500, + "\xfa": 500, + "\xfb": 500, + "\xfc": 500, + "\xfd": 444, + "\xfe": 500, + "\xff": 444, + "\u0100": 611, + "\u0101": 500, + "\u0102": 611, + "\u0103": 500, + "\u0104": 611, + "\u0105": 500, + "\u0106": 667, + "\u0107": 444, + "\u010c": 667, + "\u010d": 444, + "\u010e": 722, + "\u010f": 544, + "\u0110": 722, + "\u0111": 500, + "\u0112": 611, + "\u0113": 444, + "\u0116": 611, + "\u0117": 444, + "\u0118": 611, + "\u0119": 444, + "\u011a": 611, + "\u011b": 444, + "\u011e": 722, + "\u011f": 500, + "\u0122": 722, + "\u0123": 500, + "\u012a": 333, + "\u012b": 278, + "\u012e": 333, + "\u012f": 278, + "\u0130": 333, + "\u0131": 278, + "\u0136": 667, + "\u0137": 444, + "\u0139": 556, + "\u013a": 278, + "\u013b": 556, + "\u013c": 278, + "\u013d": 611, + "\u013e": 300, + "\u0141": 556, + "\u0142": 278, + "\u0143": 667, + "\u0144": 500, + "\u0145": 667, + "\u0146": 500, + "\u0147": 667, + "\u0148": 500, + "\u014c": 722, + "\u014d": 500, + "\u0150": 722, + "\u0151": 500, + "\u0152": 944, + "\u0153": 667, + "\u0154": 611, + "\u0155": 389, + "\u0156": 611, + "\u0157": 389, + "\u0158": 611, + "\u0159": 389, + "\u015a": 500, + "\u015b": 389, + "\u015e": 500, + "\u015f": 389, + "\u0160": 500, + "\u0161": 389, + "\u0162": 556, + "\u0163": 278, + "\u0164": 556, + "\u0165": 300, + "\u016a": 722, + "\u016b": 500, + "\u016e": 722, + "\u016f": 500, + "\u0170": 722, + "\u0171": 500, + "\u0172": 722, + "\u0173": 500, + "\u0178": 556, + "\u0179": 556, + "\u017a": 389, + "\u017b": 556, + "\u017c": 389, + "\u017d": 556, + "\u017e": 389, + "\u0192": 500, + "\u0218": 500, + "\u0219": 389, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 500, + "\u2014": 889, + "\u2018": 333, + "\u2019": 333, + "\u201a": 333, + "\u201c": 556, + "\u201d": 556, + "\u201e": 556, + "\u2020": 500, + "\u2021": 500, + "\u2022": 350, + "\u2026": 889, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 980, + "\u2202": 476, + "\u2206": 612, + "\u2211": 600, + "\u2212": 675, + "\u221a": 453, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 471, + "\uf6c3": 250, + "\ufb01": 500, + "\ufb02": 500, + }, + ), + "Times-Roman": ( + { + "FontName": "Times-Roman", + "Descent": -217.0, + "FontBBox": (-168.0, -218.0, 1000.0, 898.0), + "FontWeight": "Roman", + "CapHeight": 662.0, + "FontFamily": "Times", + "Flags": 0, + "XHeight": 450.0, + "ItalicAngle": 0.0, + "Ascent": 683.0, + }, + { + " ": 250, + "!": 333, + '"': 408, + "#": 500, + "$": 500, + "%": 833, + "&": 778, + "'": 180, + "(": 333, + ")": 333, + "*": 500, + "+": 564, + ",": 250, + "-": 333, + ".": 250, + "/": 278, + "0": 500, + "1": 500, + "2": 500, + "3": 500, + "4": 500, + "5": 500, + "6": 500, + "7": 500, + "8": 500, + "9": 500, + ":": 278, + ";": 278, + "<": 564, + "=": 564, + ">": 564, + "?": 444, + "@": 921, + "A": 722, + "B": 667, + "C": 667, + "D": 722, + "E": 611, + "F": 556, + "G": 722, + "H": 722, + "I": 333, + "J": 389, + "K": 722, + "L": 611, + "M": 889, + "N": 722, + "O": 722, + "P": 556, + "Q": 722, + "R": 667, + "S": 556, + "T": 611, + "U": 722, + "V": 722, + "W": 944, + "X": 722, + "Y": 722, + "Z": 611, + "[": 333, + "\\": 278, + "]": 333, + "^": 469, + "_": 500, + "`": 333, + "a": 444, + "b": 500, + "c": 444, + "d": 500, + "e": 444, + "f": 333, + "g": 500, + "h": 500, + "i": 278, + "j": 278, + "k": 500, + "l": 278, + "m": 778, + "n": 500, + "o": 500, + "p": 500, + "q": 500, + "r": 333, + "s": 389, + "t": 278, + "u": 500, + "v": 500, + "w": 722, + "x": 500, + "y": 500, + "z": 444, + "{": 480, + "|": 200, + "}": 480, + "~": 541, + "\xa1": 333, + "\xa2": 500, + "\xa3": 500, + "\xa4": 500, + "\xa5": 500, + "\xa6": 200, + "\xa7": 500, + "\xa8": 333, + "\xa9": 760, + "\xaa": 276, + "\xab": 500, + "\xac": 564, + "\xae": 760, + "\xaf": 333, + "\xb0": 400, + "\xb1": 564, + "\xb2": 300, + "\xb3": 300, + "\xb4": 333, + "\xb5": 500, + "\xb6": 453, + "\xb7": 250, + "\xb8": 333, + "\xb9": 300, + "\xba": 310, + "\xbb": 500, + "\xbc": 750, + "\xbd": 750, + "\xbe": 750, + "\xbf": 444, + "\xc0": 722, + "\xc1": 722, + "\xc2": 722, + "\xc3": 722, + "\xc4": 722, + "\xc5": 722, + "\xc6": 889, + "\xc7": 667, + "\xc8": 611, + "\xc9": 611, + "\xca": 611, + "\xcb": 611, + "\xcc": 333, + "\xcd": 333, + "\xce": 333, + "\xcf": 333, + "\xd0": 722, + "\xd1": 722, + "\xd2": 722, + "\xd3": 722, + "\xd4": 722, + "\xd5": 722, + "\xd6": 722, + "\xd7": 564, + "\xd8": 722, + "\xd9": 722, + "\xda": 722, + "\xdb": 722, + "\xdc": 722, + "\xdd": 722, + "\xde": 556, + "\xdf": 500, + "\xe0": 444, + "\xe1": 444, + "\xe2": 444, + "\xe3": 444, + "\xe4": 444, + "\xe5": 444, + "\xe6": 667, + "\xe7": 444, + "\xe8": 444, + "\xe9": 444, + "\xea": 444, + "\xeb": 444, + "\xec": 278, + "\xed": 278, + "\xee": 278, + "\xef": 278, + "\xf0": 500, + "\xf1": 500, + "\xf2": 500, + "\xf3": 500, + "\xf4": 500, + "\xf5": 500, + "\xf6": 500, + "\xf7": 564, + "\xf8": 500, + "\xf9": 500, + "\xfa": 500, + "\xfb": 500, + "\xfc": 500, + "\xfd": 500, + "\xfe": 500, + "\xff": 500, + "\u0100": 722, + "\u0101": 444, + "\u0102": 722, + "\u0103": 444, + "\u0104": 722, + "\u0105": 444, + "\u0106": 667, + "\u0107": 444, + "\u010c": 667, + "\u010d": 444, + "\u010e": 722, + "\u010f": 588, + "\u0110": 722, + "\u0111": 500, + "\u0112": 611, + "\u0113": 444, + "\u0116": 611, + "\u0117": 444, + "\u0118": 611, + "\u0119": 444, + "\u011a": 611, + "\u011b": 444, + "\u011e": 722, + "\u011f": 500, + "\u0122": 722, + "\u0123": 500, + "\u012a": 333, + "\u012b": 278, + "\u012e": 333, + "\u012f": 278, + "\u0130": 333, + "\u0131": 278, + "\u0136": 722, + "\u0137": 500, + "\u0139": 611, + "\u013a": 278, + "\u013b": 611, + "\u013c": 278, + "\u013d": 611, + "\u013e": 344, + "\u0141": 611, + "\u0142": 278, + "\u0143": 722, + "\u0144": 500, + "\u0145": 722, + "\u0146": 500, + "\u0147": 722, + "\u0148": 500, + "\u014c": 722, + "\u014d": 500, + "\u0150": 722, + "\u0151": 500, + "\u0152": 889, + "\u0153": 722, + "\u0154": 667, + "\u0155": 333, + "\u0156": 667, + "\u0157": 333, + "\u0158": 667, + "\u0159": 333, + "\u015a": 556, + "\u015b": 389, + "\u015e": 556, + "\u015f": 389, + "\u0160": 556, + "\u0161": 389, + "\u0162": 611, + "\u0163": 278, + "\u0164": 611, + "\u0165": 326, + "\u016a": 722, + "\u016b": 500, + "\u016e": 722, + "\u016f": 500, + "\u0170": 722, + "\u0171": 500, + "\u0172": 722, + "\u0173": 500, + "\u0178": 722, + "\u0179": 611, + "\u017a": 444, + "\u017b": 611, + "\u017c": 444, + "\u017d": 611, + "\u017e": 444, + "\u0192": 500, + "\u0218": 556, + "\u0219": 389, + "\u02c6": 333, + "\u02c7": 333, + "\u02d8": 333, + "\u02d9": 333, + "\u02da": 333, + "\u02db": 333, + "\u02dc": 333, + "\u02dd": 333, + "\u2013": 500, + "\u2014": 1000, + "\u2018": 333, + "\u2019": 333, + "\u201a": 333, + "\u201c": 444, + "\u201d": 444, + "\u201e": 444, + "\u2020": 500, + "\u2021": 500, + "\u2022": 350, + "\u2026": 1000, + "\u2030": 1000, + "\u2039": 333, + "\u203a": 333, + "\u2044": 167, + "\u2122": 980, + "\u2202": 476, + "\u2206": 612, + "\u2211": 600, + "\u2212": 564, + "\u221a": 453, + "\u2260": 549, + "\u2264": 549, + "\u2265": 549, + "\u25ca": 471, + "\uf6c3": 250, + "\ufb01": 556, + "\ufb02": 556, + }, + ), + "ZapfDingbats": ( + { + "FontName": "ZapfDingbats", + "FontBBox": (-1.0, -143.0, 981.0, 820.0), + "FontWeight": "Medium", + "FontFamily": "ITC", + "Flags": 0, + "ItalicAngle": 0.0, + }, + { + "\x01": 974, + "\x02": 961, + "\x03": 980, + "\x04": 719, + "\x05": 789, + "\x06": 494, + "\x07": 552, + "\x08": 537, + "\t": 577, + "\n": 692, + "\x0b": 960, + "\x0c": 939, + "\r": 549, + "\x0e": 855, + "\x0f": 911, + "\x10": 933, + "\x11": 945, + "\x12": 974, + "\x13": 755, + "\x14": 846, + "\x15": 762, + "\x16": 761, + "\x17": 571, + "\x18": 677, + "\x19": 763, + "\x1a": 760, + "\x1b": 759, + "\x1c": 754, + "\x1d": 786, + "\x1e": 788, + "\x1f": 788, + " ": 790, + "!": 793, + '"': 794, + "#": 816, + "$": 823, + "%": 789, + "&": 841, + "'": 823, + "(": 833, + ")": 816, + "*": 831, + "+": 923, + ",": 744, + "-": 723, + ".": 749, + "/": 790, + "0": 792, + "1": 695, + "2": 776, + "3": 768, + "4": 792, + "5": 759, + "6": 707, + "7": 708, + "8": 682, + "9": 701, + ":": 826, + ";": 815, + "<": 789, + "=": 789, + ">": 707, + "?": 687, + "@": 696, + "A": 689, + "B": 786, + "C": 787, + "D": 713, + "E": 791, + "F": 785, + "G": 791, + "H": 873, + "I": 761, + "J": 762, + "K": 759, + "L": 892, + "M": 892, + "N": 788, + "O": 784, + "Q": 438, + "R": 138, + "S": 277, + "T": 415, + "U": 509, + "V": 410, + "W": 234, + "X": 234, + "Y": 390, + "Z": 390, + "[": 276, + "\\": 276, + "]": 317, + "^": 317, + "_": 334, + "`": 334, + "a": 392, + "b": 392, + "c": 668, + "d": 668, + "e": 732, + "f": 544, + "g": 544, + "h": 910, + "i": 911, + "j": 667, + "k": 760, + "l": 760, + "m": 626, + "n": 694, + "o": 595, + "p": 776, + "u": 690, + "v": 791, + "w": 790, + "x": 788, + "y": 788, + "z": 788, + "{": 788, + "|": 788, + "}": 788, + "~": 788, + "\x7f": 788, + "\x80": 788, + "\x81": 788, + "\x82": 788, + "\x83": 788, + "\x84": 788, + "\x85": 788, + "\x86": 788, + "\x87": 788, + "\x88": 788, + "\x89": 788, + "\x8a": 788, + "\x8b": 788, + "\x8c": 788, + "\x8d": 788, + "\x8e": 788, + "\x8f": 788, + "\x90": 788, + "\x91": 788, + "\x92": 788, + "\x93": 788, + "\x94": 788, + "\x95": 788, + "\x96": 788, + "\x97": 788, + "\x98": 788, + "\x99": 788, + "\x9a": 788, + "\x9b": 788, + "\x9c": 788, + "\x9d": 788, + "\x9e": 788, + "\x9f": 788, + "\xa0": 894, + "\xa1": 838, + "\xa2": 924, + "\xa3": 1016, + "\xa4": 458, + "\xa5": 924, + "\xa6": 918, + "\xa7": 927, + "\xa8": 928, + "\xa9": 928, + "\xaa": 834, + "\xab": 873, + "\xac": 828, + "\xad": 924, + "\xae": 917, + "\xaf": 930, + "\xb0": 931, + "\xb1": 463, + "\xb2": 883, + "\xb3": 836, + "\xb4": 867, + "\xb5": 696, + "\xb6": 874, + "\xb7": 760, + "\xb8": 946, + "\xb9": 865, + "\xba": 967, + "\xbb": 831, + "\xbc": 873, + "\xbd": 927, + "\xbe": 970, + "\xbf": 918, + "\xc0": 748, + "\xc1": 836, + "\xc2": 771, + "\xc3": 888, + "\xc4": 748, + "\xc5": 771, + "\xc6": 888, + "\xc7": 867, + "\xc8": 696, + "\xc9": 874, + "\xca": 974, + "\xcb": 762, + "\xcc": 759, + "\xcd": 509, + "\xce": 410, + }, + ), } diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py index 6949ba8..46c3235 100644 --- a/pdfminer/glyphlist.py +++ b/pdfminer/glyphlist.py @@ -52,4286 +52,4286 @@ The following data was taken by # (2) Unicode scalar value glyphname2unicode = { - 'A': '\u0041', - 'AE': '\u00C6', - 'AEacute': '\u01FC', - 'AEmacron': '\u01E2', - 'AEsmall': '\uF7E6', - 'Aacute': '\u00C1', - 'Aacutesmall': '\uF7E1', - 'Abreve': '\u0102', - 'Abreveacute': '\u1EAE', - 'Abrevecyrillic': '\u04D0', - 'Abrevedotbelow': '\u1EB6', - 'Abrevegrave': '\u1EB0', - 'Abrevehookabove': '\u1EB2', - 'Abrevetilde': '\u1EB4', - 'Acaron': '\u01CD', - 'Acircle': '\u24B6', - 'Acircumflex': '\u00C2', - 'Acircumflexacute': '\u1EA4', - 'Acircumflexdotbelow': '\u1EAC', - 'Acircumflexgrave': '\u1EA6', - 'Acircumflexhookabove': '\u1EA8', - 'Acircumflexsmall': '\uF7E2', - 'Acircumflextilde': '\u1EAA', - 'Acute': '\uF6C9', - 'Acutesmall': '\uF7B4', - 'Acyrillic': '\u0410', - 'Adblgrave': '\u0200', - 'Adieresis': '\u00C4', - 'Adieresiscyrillic': '\u04D2', - 'Adieresismacron': '\u01DE', - 'Adieresissmall': '\uF7E4', - 'Adotbelow': '\u1EA0', - 'Adotmacron': '\u01E0', - 'Agrave': '\u00C0', - 'Agravesmall': '\uF7E0', - 'Ahookabove': '\u1EA2', - 'Aiecyrillic': '\u04D4', - 'Ainvertedbreve': '\u0202', - 'Alpha': '\u0391', - 'Alphatonos': '\u0386', - 'Amacron': '\u0100', - 'Amonospace': '\uFF21', - 'Aogonek': '\u0104', - 'Aring': '\u00C5', - 'Aringacute': '\u01FA', - 'Aringbelow': '\u1E00', - 'Aringsmall': '\uF7E5', - 'Asmall': '\uF761', - 'Atilde': '\u00C3', - 'Atildesmall': '\uF7E3', - 'Aybarmenian': '\u0531', - 'B': '\u0042', - 'Bcircle': '\u24B7', - 'Bdotaccent': '\u1E02', - 'Bdotbelow': '\u1E04', - 'Becyrillic': '\u0411', - 'Benarmenian': '\u0532', - 'Beta': '\u0392', - 'Bhook': '\u0181', - 'Blinebelow': '\u1E06', - 'Bmonospace': '\uFF22', - 'Brevesmall': '\uF6F4', - 'Bsmall': '\uF762', - 'Btopbar': '\u0182', - 'C': '\u0043', - 'Caarmenian': '\u053E', - 'Cacute': '\u0106', - 'Caron': '\uF6CA', - 'Caronsmall': '\uF6F5', - 'Ccaron': '\u010C', - 'Ccedilla': '\u00C7', - 'Ccedillaacute': '\u1E08', - 'Ccedillasmall': '\uF7E7', - 'Ccircle': '\u24B8', - 'Ccircumflex': '\u0108', - 'Cdot': '\u010A', - 'Cdotaccent': '\u010A', - 'Cedillasmall': '\uF7B8', - 'Chaarmenian': '\u0549', - 'Cheabkhasiancyrillic': '\u04BC', - 'Checyrillic': '\u0427', - 'Chedescenderabkhasiancyrillic': '\u04BE', - 'Chedescendercyrillic': '\u04B6', - 'Chedieresiscyrillic': '\u04F4', - 'Cheharmenian': '\u0543', - 'Chekhakassiancyrillic': '\u04CB', - 'Cheverticalstrokecyrillic': '\u04B8', - 'Chi': '\u03A7', - 'Chook': '\u0187', - 'Circumflexsmall': '\uF6F6', - 'Cmonospace': '\uFF23', - 'Coarmenian': '\u0551', - 'Csmall': '\uF763', - 'D': '\u0044', - 'DZ': '\u01F1', - 'DZcaron': '\u01C4', - 'Daarmenian': '\u0534', - 'Dafrican': '\u0189', - 'Dcaron': '\u010E', - 'Dcedilla': '\u1E10', - 'Dcircle': '\u24B9', - 'Dcircumflexbelow': '\u1E12', - 'Dcroat': '\u0110', - 'Ddotaccent': '\u1E0A', - 'Ddotbelow': '\u1E0C', - 'Decyrillic': '\u0414', - 'Deicoptic': '\u03EE', - 'Delta': '\u2206', - 'Deltagreek': '\u0394', - 'Dhook': '\u018A', - 'Dieresis': '\uF6CB', - 'DieresisAcute': '\uF6CC', - 'DieresisGrave': '\uF6CD', - 'Dieresissmall': '\uF7A8', - 'Digammagreek': '\u03DC', - 'Djecyrillic': '\u0402', - 'Dlinebelow': '\u1E0E', - 'Dmonospace': '\uFF24', - 'Dotaccentsmall': '\uF6F7', - 'Dslash': '\u0110', - 'Dsmall': '\uF764', - 'Dtopbar': '\u018B', - 'Dz': '\u01F2', - 'Dzcaron': '\u01C5', - 'Dzeabkhasiancyrillic': '\u04E0', - 'Dzecyrillic': '\u0405', - 'Dzhecyrillic': '\u040F', - 'E': '\u0045', - 'Eacute': '\u00C9', - 'Eacutesmall': '\uF7E9', - 'Ebreve': '\u0114', - 'Ecaron': '\u011A', - 'Ecedillabreve': '\u1E1C', - 'Echarmenian': '\u0535', - 'Ecircle': '\u24BA', - 'Ecircumflex': '\u00CA', - 'Ecircumflexacute': '\u1EBE', - 'Ecircumflexbelow': '\u1E18', - 'Ecircumflexdotbelow': '\u1EC6', - 'Ecircumflexgrave': '\u1EC0', - 'Ecircumflexhookabove': '\u1EC2', - 'Ecircumflexsmall': '\uF7EA', - 'Ecircumflextilde': '\u1EC4', - 'Ecyrillic': '\u0404', - 'Edblgrave': '\u0204', - 'Edieresis': '\u00CB', - 'Edieresissmall': '\uF7EB', - 'Edot': '\u0116', - 'Edotaccent': '\u0116', - 'Edotbelow': '\u1EB8', - 'Efcyrillic': '\u0424', - 'Egrave': '\u00C8', - 'Egravesmall': '\uF7E8', - 'Eharmenian': '\u0537', - 'Ehookabove': '\u1EBA', - 'Eightroman': '\u2167', - 'Einvertedbreve': '\u0206', - 'Eiotifiedcyrillic': '\u0464', - 'Elcyrillic': '\u041B', - 'Elevenroman': '\u216A', - 'Emacron': '\u0112', - 'Emacronacute': '\u1E16', - 'Emacrongrave': '\u1E14', - 'Emcyrillic': '\u041C', - 'Emonospace': '\uFF25', - 'Encyrillic': '\u041D', - 'Endescendercyrillic': '\u04A2', - 'Eng': '\u014A', - 'Enghecyrillic': '\u04A4', - 'Enhookcyrillic': '\u04C7', - 'Eogonek': '\u0118', - 'Eopen': '\u0190', - 'Epsilon': '\u0395', - 'Epsilontonos': '\u0388', - 'Ercyrillic': '\u0420', - 'Ereversed': '\u018E', - 'Ereversedcyrillic': '\u042D', - 'Escyrillic': '\u0421', - 'Esdescendercyrillic': '\u04AA', - 'Esh': '\u01A9', - 'Esmall': '\uF765', - 'Eta': '\u0397', - 'Etarmenian': '\u0538', - 'Etatonos': '\u0389', - 'Eth': '\u00D0', - 'Ethsmall': '\uF7F0', - 'Etilde': '\u1EBC', - 'Etildebelow': '\u1E1A', - 'Euro': '\u20AC', - 'Ezh': '\u01B7', - 'Ezhcaron': '\u01EE', - 'Ezhreversed': '\u01B8', - 'F': '\u0046', - 'Fcircle': '\u24BB', - 'Fdotaccent': '\u1E1E', - 'Feharmenian': '\u0556', - 'Feicoptic': '\u03E4', - 'Fhook': '\u0191', - 'Fitacyrillic': '\u0472', - 'Fiveroman': '\u2164', - 'Fmonospace': '\uFF26', - 'Fourroman': '\u2163', - 'Fsmall': '\uF766', - 'G': '\u0047', - 'GBsquare': '\u3387', - 'Gacute': '\u01F4', - 'Gamma': '\u0393', - 'Gammaafrican': '\u0194', - 'Gangiacoptic': '\u03EA', - 'Gbreve': '\u011E', - 'Gcaron': '\u01E6', - 'Gcedilla': '\u0122', - 'Gcircle': '\u24BC', - 'Gcircumflex': '\u011C', - 'Gcommaaccent': '\u0122', - 'Gdot': '\u0120', - 'Gdotaccent': '\u0120', - 'Gecyrillic': '\u0413', - 'Ghadarmenian': '\u0542', - 'Ghemiddlehookcyrillic': '\u0494', - 'Ghestrokecyrillic': '\u0492', - 'Gheupturncyrillic': '\u0490', - 'Ghook': '\u0193', - 'Gimarmenian': '\u0533', - 'Gjecyrillic': '\u0403', - 'Gmacron': '\u1E20', - 'Gmonospace': '\uFF27', - 'Grave': '\uF6CE', - 'Gravesmall': '\uF760', - 'Gsmall': '\uF767', - 'Gsmallhook': '\u029B', - 'Gstroke': '\u01E4', - 'H': '\u0048', - 'H18533': '\u25CF', - 'H18543': '\u25AA', - 'H18551': '\u25AB', - 'H22073': '\u25A1', - 'HPsquare': '\u33CB', - 'Haabkhasiancyrillic': '\u04A8', - 'Hadescendercyrillic': '\u04B2', - 'Hardsigncyrillic': '\u042A', - 'Hbar': '\u0126', - 'Hbrevebelow': '\u1E2A', - 'Hcedilla': '\u1E28', - 'Hcircle': '\u24BD', - 'Hcircumflex': '\u0124', - 'Hdieresis': '\u1E26', - 'Hdotaccent': '\u1E22', - 'Hdotbelow': '\u1E24', - 'Hmonospace': '\uFF28', - 'Hoarmenian': '\u0540', - 'Horicoptic': '\u03E8', - 'Hsmall': '\uF768', - 'Hungarumlaut': '\uF6CF', - 'Hungarumlautsmall': '\uF6F8', - 'Hzsquare': '\u3390', - 'I': '\u0049', - 'IAcyrillic': '\u042F', - 'IJ': '\u0132', - 'IUcyrillic': '\u042E', - 'Iacute': '\u00CD', - 'Iacutesmall': '\uF7ED', - 'Ibreve': '\u012C', - 'Icaron': '\u01CF', - 'Icircle': '\u24BE', - 'Icircumflex': '\u00CE', - 'Icircumflexsmall': '\uF7EE', - 'Icyrillic': '\u0406', - 'Idblgrave': '\u0208', - 'Idieresis': '\u00CF', - 'Idieresisacute': '\u1E2E', - 'Idieresiscyrillic': '\u04E4', - 'Idieresissmall': '\uF7EF', - 'Idot': '\u0130', - 'Idotaccent': '\u0130', - 'Idotbelow': '\u1ECA', - 'Iebrevecyrillic': '\u04D6', - 'Iecyrillic': '\u0415', - 'Ifraktur': '\u2111', - 'Igrave': '\u00CC', - 'Igravesmall': '\uF7EC', - 'Ihookabove': '\u1EC8', - 'Iicyrillic': '\u0418', - 'Iinvertedbreve': '\u020A', - 'Iishortcyrillic': '\u0419', - 'Imacron': '\u012A', - 'Imacroncyrillic': '\u04E2', - 'Imonospace': '\uFF29', - 'Iniarmenian': '\u053B', - 'Iocyrillic': '\u0401', - 'Iogonek': '\u012E', - 'Iota': '\u0399', - 'Iotaafrican': '\u0196', - 'Iotadieresis': '\u03AA', - 'Iotatonos': '\u038A', - 'Ismall': '\uF769', - 'Istroke': '\u0197', - 'Itilde': '\u0128', - 'Itildebelow': '\u1E2C', - 'Izhitsacyrillic': '\u0474', - 'Izhitsadblgravecyrillic': '\u0476', - 'J': '\u004A', - 'Jaarmenian': '\u0541', - 'Jcircle': '\u24BF', - 'Jcircumflex': '\u0134', - 'Jecyrillic': '\u0408', - 'Jheharmenian': '\u054B', - 'Jmonospace': '\uFF2A', - 'Jsmall': '\uF76A', - 'K': '\u004B', - 'KBsquare': '\u3385', - 'KKsquare': '\u33CD', - 'Kabashkircyrillic': '\u04A0', - 'Kacute': '\u1E30', - 'Kacyrillic': '\u041A', - 'Kadescendercyrillic': '\u049A', - 'Kahookcyrillic': '\u04C3', - 'Kappa': '\u039A', - 'Kastrokecyrillic': '\u049E', - 'Kaverticalstrokecyrillic': '\u049C', - 'Kcaron': '\u01E8', - 'Kcedilla': '\u0136', - 'Kcircle': '\u24C0', - 'Kcommaaccent': '\u0136', - 'Kdotbelow': '\u1E32', - 'Keharmenian': '\u0554', - 'Kenarmenian': '\u053F', - 'Khacyrillic': '\u0425', - 'Kheicoptic': '\u03E6', - 'Khook': '\u0198', - 'Kjecyrillic': '\u040C', - 'Klinebelow': '\u1E34', - 'Kmonospace': '\uFF2B', - 'Koppacyrillic': '\u0480', - 'Koppagreek': '\u03DE', - 'Ksicyrillic': '\u046E', - 'Ksmall': '\uF76B', - 'L': '\u004C', - 'LJ': '\u01C7', - 'LL': '\uF6BF', - 'Lacute': '\u0139', - 'Lambda': '\u039B', - 'Lcaron': '\u013D', - 'Lcedilla': '\u013B', - 'Lcircle': '\u24C1', - 'Lcircumflexbelow': '\u1E3C', - 'Lcommaaccent': '\u013B', - 'Ldot': '\u013F', - 'Ldotaccent': '\u013F', - 'Ldotbelow': '\u1E36', - 'Ldotbelowmacron': '\u1E38', - 'Liwnarmenian': '\u053C', - 'Lj': '\u01C8', - 'Ljecyrillic': '\u0409', - 'Llinebelow': '\u1E3A', - 'Lmonospace': '\uFF2C', - 'Lslash': '\u0141', - 'Lslashsmall': '\uF6F9', - 'Lsmall': '\uF76C', - 'M': '\u004D', - 'MBsquare': '\u3386', - 'Macron': '\uF6D0', - 'Macronsmall': '\uF7AF', - 'Macute': '\u1E3E', - 'Mcircle': '\u24C2', - 'Mdotaccent': '\u1E40', - 'Mdotbelow': '\u1E42', - 'Menarmenian': '\u0544', - 'Mmonospace': '\uFF2D', - 'Msmall': '\uF76D', - 'Mturned': '\u019C', - 'Mu': '\u039C', - 'N': '\u004E', - 'NJ': '\u01CA', - 'Nacute': '\u0143', - 'Ncaron': '\u0147', - 'Ncedilla': '\u0145', - 'Ncircle': '\u24C3', - 'Ncircumflexbelow': '\u1E4A', - 'Ncommaaccent': '\u0145', - 'Ndotaccent': '\u1E44', - 'Ndotbelow': '\u1E46', - 'Nhookleft': '\u019D', - 'Nineroman': '\u2168', - 'Nj': '\u01CB', - 'Njecyrillic': '\u040A', - 'Nlinebelow': '\u1E48', - 'Nmonospace': '\uFF2E', - 'Nowarmenian': '\u0546', - 'Nsmall': '\uF76E', - 'Ntilde': '\u00D1', - 'Ntildesmall': '\uF7F1', - 'Nu': '\u039D', - 'O': '\u004F', - 'OE': '\u0152', - 'OEsmall': '\uF6FA', - 'Oacute': '\u00D3', - 'Oacutesmall': '\uF7F3', - 'Obarredcyrillic': '\u04E8', - 'Obarreddieresiscyrillic': '\u04EA', - 'Obreve': '\u014E', - 'Ocaron': '\u01D1', - 'Ocenteredtilde': '\u019F', - 'Ocircle': '\u24C4', - 'Ocircumflex': '\u00D4', - 'Ocircumflexacute': '\u1ED0', - 'Ocircumflexdotbelow': '\u1ED8', - 'Ocircumflexgrave': '\u1ED2', - 'Ocircumflexhookabove': '\u1ED4', - 'Ocircumflexsmall': '\uF7F4', - 'Ocircumflextilde': '\u1ED6', - 'Ocyrillic': '\u041E', - 'Odblacute': '\u0150', - 'Odblgrave': '\u020C', - 'Odieresis': '\u00D6', - 'Odieresiscyrillic': '\u04E6', - 'Odieresissmall': '\uF7F6', - 'Odotbelow': '\u1ECC', - 'Ogoneksmall': '\uF6FB', - 'Ograve': '\u00D2', - 'Ogravesmall': '\uF7F2', - 'Oharmenian': '\u0555', - 'Ohm': '\u2126', - 'Ohookabove': '\u1ECE', - 'Ohorn': '\u01A0', - 'Ohornacute': '\u1EDA', - 'Ohorndotbelow': '\u1EE2', - 'Ohorngrave': '\u1EDC', - 'Ohornhookabove': '\u1EDE', - 'Ohorntilde': '\u1EE0', - 'Ohungarumlaut': '\u0150', - 'Oi': '\u01A2', - 'Oinvertedbreve': '\u020E', - 'Omacron': '\u014C', - 'Omacronacute': '\u1E52', - 'Omacrongrave': '\u1E50', - 'Omega': '\u2126', - 'Omegacyrillic': '\u0460', - 'Omegagreek': '\u03A9', - 'Omegaroundcyrillic': '\u047A', - 'Omegatitlocyrillic': '\u047C', - 'Omegatonos': '\u038F', - 'Omicron': '\u039F', - 'Omicrontonos': '\u038C', - 'Omonospace': '\uFF2F', - 'Oneroman': '\u2160', - 'Oogonek': '\u01EA', - 'Oogonekmacron': '\u01EC', - 'Oopen': '\u0186', - 'Oslash': '\u00D8', - 'Oslashacute': '\u01FE', - 'Oslashsmall': '\uF7F8', - 'Osmall': '\uF76F', - 'Ostrokeacute': '\u01FE', - 'Otcyrillic': '\u047E', - 'Otilde': '\u00D5', - 'Otildeacute': '\u1E4C', - 'Otildedieresis': '\u1E4E', - 'Otildesmall': '\uF7F5', - 'P': '\u0050', - 'Pacute': '\u1E54', - 'Pcircle': '\u24C5', - 'Pdotaccent': '\u1E56', - 'Pecyrillic': '\u041F', - 'Peharmenian': '\u054A', - 'Pemiddlehookcyrillic': '\u04A6', - 'Phi': '\u03A6', - 'Phook': '\u01A4', - 'Pi': '\u03A0', - 'Piwrarmenian': '\u0553', - 'Pmonospace': '\uFF30', - 'Psi': '\u03A8', - 'Psicyrillic': '\u0470', - 'Psmall': '\uF770', - 'Q': '\u0051', - 'Qcircle': '\u24C6', - 'Qmonospace': '\uFF31', - 'Qsmall': '\uF771', - 'R': '\u0052', - 'Raarmenian': '\u054C', - 'Racute': '\u0154', - 'Rcaron': '\u0158', - 'Rcedilla': '\u0156', - 'Rcircle': '\u24C7', - 'Rcommaaccent': '\u0156', - 'Rdblgrave': '\u0210', - 'Rdotaccent': '\u1E58', - 'Rdotbelow': '\u1E5A', - 'Rdotbelowmacron': '\u1E5C', - 'Reharmenian': '\u0550', - 'Rfraktur': '\u211C', - 'Rho': '\u03A1', - 'Ringsmall': '\uF6FC', - 'Rinvertedbreve': '\u0212', - 'Rlinebelow': '\u1E5E', - 'Rmonospace': '\uFF32', - 'Rsmall': '\uF772', - 'Rsmallinverted': '\u0281', - 'Rsmallinvertedsuperior': '\u02B6', - 'S': '\u0053', - 'SF010000': '\u250C', - 'SF020000': '\u2514', - 'SF030000': '\u2510', - 'SF040000': '\u2518', - 'SF050000': '\u253C', - 'SF060000': '\u252C', - 'SF070000': '\u2534', - 'SF080000': '\u251C', - 'SF090000': '\u2524', - 'SF100000': '\u2500', - 'SF110000': '\u2502', - 'SF190000': '\u2561', - 'SF200000': '\u2562', - 'SF210000': '\u2556', - 'SF220000': '\u2555', - 'SF230000': '\u2563', - 'SF240000': '\u2551', - 'SF250000': '\u2557', - 'SF260000': '\u255D', - 'SF270000': '\u255C', - 'SF280000': '\u255B', - 'SF360000': '\u255E', - 'SF370000': '\u255F', - 'SF380000': '\u255A', - 'SF390000': '\u2554', - 'SF400000': '\u2569', - 'SF410000': '\u2566', - 'SF420000': '\u2560', - 'SF430000': '\u2550', - 'SF440000': '\u256C', - 'SF450000': '\u2567', - 'SF460000': '\u2568', - 'SF470000': '\u2564', - 'SF480000': '\u2565', - 'SF490000': '\u2559', - 'SF500000': '\u2558', - 'SF510000': '\u2552', - 'SF520000': '\u2553', - 'SF530000': '\u256B', - 'SF540000': '\u256A', - 'Sacute': '\u015A', - 'Sacutedotaccent': '\u1E64', - 'Sampigreek': '\u03E0', - 'Scaron': '\u0160', - 'Scarondotaccent': '\u1E66', - 'Scaronsmall': '\uF6FD', - 'Scedilla': '\u015E', - 'Schwa': '\u018F', - 'Schwacyrillic': '\u04D8', - 'Schwadieresiscyrillic': '\u04DA', - 'Scircle': '\u24C8', - 'Scircumflex': '\u015C', - 'Scommaaccent': '\u0218', - 'Sdotaccent': '\u1E60', - 'Sdotbelow': '\u1E62', - 'Sdotbelowdotaccent': '\u1E68', - 'Seharmenian': '\u054D', - 'Sevenroman': '\u2166', - 'Shaarmenian': '\u0547', - 'Shacyrillic': '\u0428', - 'Shchacyrillic': '\u0429', - 'Sheicoptic': '\u03E2', - 'Shhacyrillic': '\u04BA', - 'Shimacoptic': '\u03EC', - 'Sigma': '\u03A3', - 'Sixroman': '\u2165', - 'Smonospace': '\uFF33', - 'Softsigncyrillic': '\u042C', - 'Ssmall': '\uF773', - 'Stigmagreek': '\u03DA', - 'T': '\u0054', - 'Tau': '\u03A4', - 'Tbar': '\u0166', - 'Tcaron': '\u0164', - 'Tcedilla': '\u0162', - 'Tcircle': '\u24C9', - 'Tcircumflexbelow': '\u1E70', - 'Tcommaaccent': '\u0162', - 'Tdotaccent': '\u1E6A', - 'Tdotbelow': '\u1E6C', - 'Tecyrillic': '\u0422', - 'Tedescendercyrillic': '\u04AC', - 'Tenroman': '\u2169', - 'Tetsecyrillic': '\u04B4', - 'Theta': '\u0398', - 'Thook': '\u01AC', - 'Thorn': '\u00DE', - 'Thornsmall': '\uF7FE', - 'Threeroman': '\u2162', - 'Tildesmall': '\uF6FE', - 'Tiwnarmenian': '\u054F', - 'Tlinebelow': '\u1E6E', - 'Tmonospace': '\uFF34', - 'Toarmenian': '\u0539', - 'Tonefive': '\u01BC', - 'Tonesix': '\u0184', - 'Tonetwo': '\u01A7', - 'Tretroflexhook': '\u01AE', - 'Tsecyrillic': '\u0426', - 'Tshecyrillic': '\u040B', - 'Tsmall': '\uF774', - 'Twelveroman': '\u216B', - 'Tworoman': '\u2161', - 'U': '\u0055', - 'Uacute': '\u00DA', - 'Uacutesmall': '\uF7FA', - 'Ubreve': '\u016C', - 'Ucaron': '\u01D3', - 'Ucircle': '\u24CA', - 'Ucircumflex': '\u00DB', - 'Ucircumflexbelow': '\u1E76', - 'Ucircumflexsmall': '\uF7FB', - 'Ucyrillic': '\u0423', - 'Udblacute': '\u0170', - 'Udblgrave': '\u0214', - 'Udieresis': '\u00DC', - 'Udieresisacute': '\u01D7', - 'Udieresisbelow': '\u1E72', - 'Udieresiscaron': '\u01D9', - 'Udieresiscyrillic': '\u04F0', - 'Udieresisgrave': '\u01DB', - 'Udieresismacron': '\u01D5', - 'Udieresissmall': '\uF7FC', - 'Udotbelow': '\u1EE4', - 'Ugrave': '\u00D9', - 'Ugravesmall': '\uF7F9', - 'Uhookabove': '\u1EE6', - 'Uhorn': '\u01AF', - 'Uhornacute': '\u1EE8', - 'Uhorndotbelow': '\u1EF0', - 'Uhorngrave': '\u1EEA', - 'Uhornhookabove': '\u1EEC', - 'Uhorntilde': '\u1EEE', - 'Uhungarumlaut': '\u0170', - 'Uhungarumlautcyrillic': '\u04F2', - 'Uinvertedbreve': '\u0216', - 'Ukcyrillic': '\u0478', - 'Umacron': '\u016A', - 'Umacroncyrillic': '\u04EE', - 'Umacrondieresis': '\u1E7A', - 'Umonospace': '\uFF35', - 'Uogonek': '\u0172', - 'Upsilon': '\u03A5', - 'Upsilon1': '\u03D2', - 'Upsilonacutehooksymbolgreek': '\u03D3', - 'Upsilonafrican': '\u01B1', - 'Upsilondieresis': '\u03AB', - 'Upsilondieresishooksymbolgreek': '\u03D4', - 'Upsilonhooksymbol': '\u03D2', - 'Upsilontonos': '\u038E', - 'Uring': '\u016E', - 'Ushortcyrillic': '\u040E', - 'Usmall': '\uF775', - 'Ustraightcyrillic': '\u04AE', - 'Ustraightstrokecyrillic': '\u04B0', - 'Utilde': '\u0168', - 'Utildeacute': '\u1E78', - 'Utildebelow': '\u1E74', - 'V': '\u0056', - 'Vcircle': '\u24CB', - 'Vdotbelow': '\u1E7E', - 'Vecyrillic': '\u0412', - 'Vewarmenian': '\u054E', - 'Vhook': '\u01B2', - 'Vmonospace': '\uFF36', - 'Voarmenian': '\u0548', - 'Vsmall': '\uF776', - 'Vtilde': '\u1E7C', - 'W': '\u0057', - 'Wacute': '\u1E82', - 'Wcircle': '\u24CC', - 'Wcircumflex': '\u0174', - 'Wdieresis': '\u1E84', - 'Wdotaccent': '\u1E86', - 'Wdotbelow': '\u1E88', - 'Wgrave': '\u1E80', - 'Wmonospace': '\uFF37', - 'Wsmall': '\uF777', - 'X': '\u0058', - 'Xcircle': '\u24CD', - 'Xdieresis': '\u1E8C', - 'Xdotaccent': '\u1E8A', - 'Xeharmenian': '\u053D', - 'Xi': '\u039E', - 'Xmonospace': '\uFF38', - 'Xsmall': '\uF778', - 'Y': '\u0059', - 'Yacute': '\u00DD', - 'Yacutesmall': '\uF7FD', - 'Yatcyrillic': '\u0462', - 'Ycircle': '\u24CE', - 'Ycircumflex': '\u0176', - 'Ydieresis': '\u0178', - 'Ydieresissmall': '\uF7FF', - 'Ydotaccent': '\u1E8E', - 'Ydotbelow': '\u1EF4', - 'Yericyrillic': '\u042B', - 'Yerudieresiscyrillic': '\u04F8', - 'Ygrave': '\u1EF2', - 'Yhook': '\u01B3', - 'Yhookabove': '\u1EF6', - 'Yiarmenian': '\u0545', - 'Yicyrillic': '\u0407', - 'Yiwnarmenian': '\u0552', - 'Ymonospace': '\uFF39', - 'Ysmall': '\uF779', - 'Ytilde': '\u1EF8', - 'Yusbigcyrillic': '\u046A', - 'Yusbigiotifiedcyrillic': '\u046C', - 'Yuslittlecyrillic': '\u0466', - 'Yuslittleiotifiedcyrillic': '\u0468', - 'Z': '\u005A', - 'Zaarmenian': '\u0536', - 'Zacute': '\u0179', - 'Zcaron': '\u017D', - 'Zcaronsmall': '\uF6FF', - 'Zcircle': '\u24CF', - 'Zcircumflex': '\u1E90', - 'Zdot': '\u017B', - 'Zdotaccent': '\u017B', - 'Zdotbelow': '\u1E92', - 'Zecyrillic': '\u0417', - 'Zedescendercyrillic': '\u0498', - 'Zedieresiscyrillic': '\u04DE', - 'Zeta': '\u0396', - 'Zhearmenian': '\u053A', - 'Zhebrevecyrillic': '\u04C1', - 'Zhecyrillic': '\u0416', - 'Zhedescendercyrillic': '\u0496', - 'Zhedieresiscyrillic': '\u04DC', - 'Zlinebelow': '\u1E94', - 'Zmonospace': '\uFF3A', - 'Zsmall': '\uF77A', - 'Zstroke': '\u01B5', - 'a': '\u0061', - 'aabengali': '\u0986', - 'aacute': '\u00E1', - 'aadeva': '\u0906', - 'aagujarati': '\u0A86', - 'aagurmukhi': '\u0A06', - 'aamatragurmukhi': '\u0A3E', - 'aarusquare': '\u3303', - 'aavowelsignbengali': '\u09BE', - 'aavowelsigndeva': '\u093E', - 'aavowelsigngujarati': '\u0ABE', - 'abbreviationmarkarmenian': '\u055F', - 'abbreviationsigndeva': '\u0970', - 'abengali': '\u0985', - 'abopomofo': '\u311A', - 'abreve': '\u0103', - 'abreveacute': '\u1EAF', - 'abrevecyrillic': '\u04D1', - 'abrevedotbelow': '\u1EB7', - 'abrevegrave': '\u1EB1', - 'abrevehookabove': '\u1EB3', - 'abrevetilde': '\u1EB5', - 'acaron': '\u01CE', - 'acircle': '\u24D0', - 'acircumflex': '\u00E2', - 'acircumflexacute': '\u1EA5', - 'acircumflexdotbelow': '\u1EAD', - 'acircumflexgrave': '\u1EA7', - 'acircumflexhookabove': '\u1EA9', - 'acircumflextilde': '\u1EAB', - 'acute': '\u00B4', - 'acutebelowcmb': '\u0317', - 'acutecmb': '\u0301', - 'acutecomb': '\u0301', - 'acutedeva': '\u0954', - 'acutelowmod': '\u02CF', - 'acutetonecmb': '\u0341', - 'acyrillic': '\u0430', - 'adblgrave': '\u0201', - 'addakgurmukhi': '\u0A71', - 'adeva': '\u0905', - 'adieresis': '\u00E4', - 'adieresiscyrillic': '\u04D3', - 'adieresismacron': '\u01DF', - 'adotbelow': '\u1EA1', - 'adotmacron': '\u01E1', - 'ae': '\u00E6', - 'aeacute': '\u01FD', - 'aekorean': '\u3150', - 'aemacron': '\u01E3', - 'afii00208': '\u2015', - 'afii08941': '\u20A4', - 'afii10017': '\u0410', - 'afii10018': '\u0411', - 'afii10019': '\u0412', - 'afii10020': '\u0413', - 'afii10021': '\u0414', - 'afii10022': '\u0415', - 'afii10023': '\u0401', - 'afii10024': '\u0416', - 'afii10025': '\u0417', - 'afii10026': '\u0418', - 'afii10027': '\u0419', - 'afii10028': '\u041A', - 'afii10029': '\u041B', - 'afii10030': '\u041C', - 'afii10031': '\u041D', - 'afii10032': '\u041E', - 'afii10033': '\u041F', - 'afii10034': '\u0420', - 'afii10035': '\u0421', - 'afii10036': '\u0422', - 'afii10037': '\u0423', - 'afii10038': '\u0424', - 'afii10039': '\u0425', - 'afii10040': '\u0426', - 'afii10041': '\u0427', - 'afii10042': '\u0428', - 'afii10043': '\u0429', - 'afii10044': '\u042A', - 'afii10045': '\u042B', - 'afii10046': '\u042C', - 'afii10047': '\u042D', - 'afii10048': '\u042E', - 'afii10049': '\u042F', - 'afii10050': '\u0490', - 'afii10051': '\u0402', - 'afii10052': '\u0403', - 'afii10053': '\u0404', - 'afii10054': '\u0405', - 'afii10055': '\u0406', - 'afii10056': '\u0407', - 'afii10057': '\u0408', - 'afii10058': '\u0409', - 'afii10059': '\u040A', - 'afii10060': '\u040B', - 'afii10061': '\u040C', - 'afii10062': '\u040E', - 'afii10063': '\uF6C4', - 'afii10064': '\uF6C5', - 'afii10065': '\u0430', - 'afii10066': '\u0431', - 'afii10067': '\u0432', - 'afii10068': '\u0433', - 'afii10069': '\u0434', - 'afii10070': '\u0435', - 'afii10071': '\u0451', - 'afii10072': '\u0436', - 'afii10073': '\u0437', - 'afii10074': '\u0438', - 'afii10075': '\u0439', - 'afii10076': '\u043A', - 'afii10077': '\u043B', - 'afii10078': '\u043C', - 'afii10079': '\u043D', - 'afii10080': '\u043E', - 'afii10081': '\u043F', - 'afii10082': '\u0440', - 'afii10083': '\u0441', - 'afii10084': '\u0442', - 'afii10085': '\u0443', - 'afii10086': '\u0444', - 'afii10087': '\u0445', - 'afii10088': '\u0446', - 'afii10089': '\u0447', - 'afii10090': '\u0448', - 'afii10091': '\u0449', - 'afii10092': '\u044A', - 'afii10093': '\u044B', - 'afii10094': '\u044C', - 'afii10095': '\u044D', - 'afii10096': '\u044E', - 'afii10097': '\u044F', - 'afii10098': '\u0491', - 'afii10099': '\u0452', - 'afii10100': '\u0453', - 'afii10101': '\u0454', - 'afii10102': '\u0455', - 'afii10103': '\u0456', - 'afii10104': '\u0457', - 'afii10105': '\u0458', - 'afii10106': '\u0459', - 'afii10107': '\u045A', - 'afii10108': '\u045B', - 'afii10109': '\u045C', - 'afii10110': '\u045E', - 'afii10145': '\u040F', - 'afii10146': '\u0462', - 'afii10147': '\u0472', - 'afii10148': '\u0474', - 'afii10192': '\uF6C6', - 'afii10193': '\u045F', - 'afii10194': '\u0463', - 'afii10195': '\u0473', - 'afii10196': '\u0475', - 'afii10831': '\uF6C7', - 'afii10832': '\uF6C8', - 'afii10846': '\u04D9', - 'afii299': '\u200E', - 'afii300': '\u200F', - 'afii301': '\u200D', - 'afii57381': '\u066A', - 'afii57388': '\u060C', - 'afii57392': '\u0660', - 'afii57393': '\u0661', - 'afii57394': '\u0662', - 'afii57395': '\u0663', - 'afii57396': '\u0664', - 'afii57397': '\u0665', - 'afii57398': '\u0666', - 'afii57399': '\u0667', - 'afii57400': '\u0668', - 'afii57401': '\u0669', - 'afii57403': '\u061B', - 'afii57407': '\u061F', - 'afii57409': '\u0621', - 'afii57410': '\u0622', - 'afii57411': '\u0623', - 'afii57412': '\u0624', - 'afii57413': '\u0625', - 'afii57414': '\u0626', - 'afii57415': '\u0627', - 'afii57416': '\u0628', - 'afii57417': '\u0629', - 'afii57418': '\u062A', - 'afii57419': '\u062B', - 'afii57420': '\u062C', - 'afii57421': '\u062D', - 'afii57422': '\u062E', - 'afii57423': '\u062F', - 'afii57424': '\u0630', - 'afii57425': '\u0631', - 'afii57426': '\u0632', - 'afii57427': '\u0633', - 'afii57428': '\u0634', - 'afii57429': '\u0635', - 'afii57430': '\u0636', - 'afii57431': '\u0637', - 'afii57432': '\u0638', - 'afii57433': '\u0639', - 'afii57434': '\u063A', - 'afii57440': '\u0640', - 'afii57441': '\u0641', - 'afii57442': '\u0642', - 'afii57443': '\u0643', - 'afii57444': '\u0644', - 'afii57445': '\u0645', - 'afii57446': '\u0646', - 'afii57448': '\u0648', - 'afii57449': '\u0649', - 'afii57450': '\u064A', - 'afii57451': '\u064B', - 'afii57452': '\u064C', - 'afii57453': '\u064D', - 'afii57454': '\u064E', - 'afii57455': '\u064F', - 'afii57456': '\u0650', - 'afii57457': '\u0651', - 'afii57458': '\u0652', - 'afii57470': '\u0647', - 'afii57505': '\u06A4', - 'afii57506': '\u067E', - 'afii57507': '\u0686', - 'afii57508': '\u0698', - 'afii57509': '\u06AF', - 'afii57511': '\u0679', - 'afii57512': '\u0688', - 'afii57513': '\u0691', - 'afii57514': '\u06BA', - 'afii57519': '\u06D2', - 'afii57534': '\u06D5', - 'afii57636': '\u20AA', - 'afii57645': '\u05BE', - 'afii57658': '\u05C3', - 'afii57664': '\u05D0', - 'afii57665': '\u05D1', - 'afii57666': '\u05D2', - 'afii57667': '\u05D3', - 'afii57668': '\u05D4', - 'afii57669': '\u05D5', - 'afii57670': '\u05D6', - 'afii57671': '\u05D7', - 'afii57672': '\u05D8', - 'afii57673': '\u05D9', - 'afii57674': '\u05DA', - 'afii57675': '\u05DB', - 'afii57676': '\u05DC', - 'afii57677': '\u05DD', - 'afii57678': '\u05DE', - 'afii57679': '\u05DF', - 'afii57680': '\u05E0', - 'afii57681': '\u05E1', - 'afii57682': '\u05E2', - 'afii57683': '\u05E3', - 'afii57684': '\u05E4', - 'afii57685': '\u05E5', - 'afii57686': '\u05E6', - 'afii57687': '\u05E7', - 'afii57688': '\u05E8', - 'afii57689': '\u05E9', - 'afii57690': '\u05EA', - 'afii57694': '\uFB2A', - 'afii57695': '\uFB2B', - 'afii57700': '\uFB4B', - 'afii57705': '\uFB1F', - 'afii57716': '\u05F0', - 'afii57717': '\u05F1', - 'afii57718': '\u05F2', - 'afii57723': '\uFB35', - 'afii57793': '\u05B4', - 'afii57794': '\u05B5', - 'afii57795': '\u05B6', - 'afii57796': '\u05BB', - 'afii57797': '\u05B8', - 'afii57798': '\u05B7', - 'afii57799': '\u05B0', - 'afii57800': '\u05B2', - 'afii57801': '\u05B1', - 'afii57802': '\u05B3', - 'afii57803': '\u05C2', - 'afii57804': '\u05C1', - 'afii57806': '\u05B9', - 'afii57807': '\u05BC', - 'afii57839': '\u05BD', - 'afii57841': '\u05BF', - 'afii57842': '\u05C0', - 'afii57929': '\u02BC', - 'afii61248': '\u2105', - 'afii61289': '\u2113', - 'afii61352': '\u2116', - 'afii61573': '\u202C', - 'afii61574': '\u202D', - 'afii61575': '\u202E', - 'afii61664': '\u200C', - 'afii63167': '\u066D', - 'afii64937': '\u02BD', - 'agrave': '\u00E0', - 'agujarati': '\u0A85', - 'agurmukhi': '\u0A05', - 'ahiragana': '\u3042', - 'ahookabove': '\u1EA3', - 'aibengali': '\u0990', - 'aibopomofo': '\u311E', - 'aideva': '\u0910', - 'aiecyrillic': '\u04D5', - 'aigujarati': '\u0A90', - 'aigurmukhi': '\u0A10', - 'aimatragurmukhi': '\u0A48', - 'ainarabic': '\u0639', - 'ainfinalarabic': '\uFECA', - 'aininitialarabic': '\uFECB', - 'ainmedialarabic': '\uFECC', - 'ainvertedbreve': '\u0203', - 'aivowelsignbengali': '\u09C8', - 'aivowelsigndeva': '\u0948', - 'aivowelsigngujarati': '\u0AC8', - 'akatakana': '\u30A2', - 'akatakanahalfwidth': '\uFF71', - 'akorean': '\u314F', - 'alef': '\u05D0', - 'alefarabic': '\u0627', - 'alefdageshhebrew': '\uFB30', - 'aleffinalarabic': '\uFE8E', - 'alefhamzaabovearabic': '\u0623', - 'alefhamzaabovefinalarabic': '\uFE84', - 'alefhamzabelowarabic': '\u0625', - 'alefhamzabelowfinalarabic': '\uFE88', - 'alefhebrew': '\u05D0', - 'aleflamedhebrew': '\uFB4F', - 'alefmaddaabovearabic': '\u0622', - 'alefmaddaabovefinalarabic': '\uFE82', - 'alefmaksuraarabic': '\u0649', - 'alefmaksurafinalarabic': '\uFEF0', - 'alefmaksurainitialarabic': '\uFEF3', - 'alefmaksuramedialarabic': '\uFEF4', - 'alefpatahhebrew': '\uFB2E', - 'alefqamatshebrew': '\uFB2F', - 'aleph': '\u2135', - 'allequal': '\u224C', - 'alpha': '\u03B1', - 'alphatonos': '\u03AC', - 'amacron': '\u0101', - 'amonospace': '\uFF41', - 'ampersand': '\u0026', - 'ampersandmonospace': '\uFF06', - 'ampersandsmall': '\uF726', - 'amsquare': '\u33C2', - 'anbopomofo': '\u3122', - 'angbopomofo': '\u3124', - 'angkhankhuthai': '\u0E5A', - 'angle': '\u2220', - 'anglebracketleft': '\u3008', - 'anglebracketleftvertical': '\uFE3F', - 'anglebracketright': '\u3009', - 'anglebracketrightvertical': '\uFE40', - 'angleleft': '\u2329', - 'angleright': '\u232A', - 'angstrom': '\u212B', - 'anoteleia': '\u0387', - 'anudattadeva': '\u0952', - 'anusvarabengali': '\u0982', - 'anusvaradeva': '\u0902', - 'anusvaragujarati': '\u0A82', - 'aogonek': '\u0105', - 'apaatosquare': '\u3300', - 'aparen': '\u249C', - 'apostrophearmenian': '\u055A', - 'apostrophemod': '\u02BC', - 'apple': '\uF8FF', - 'approaches': '\u2250', - 'approxequal': '\u2248', - 'approxequalorimage': '\u2252', - 'approximatelyequal': '\u2245', - 'araeaekorean': '\u318E', - 'araeakorean': '\u318D', - 'arc': '\u2312', - 'arighthalfring': '\u1E9A', - 'aring': '\u00E5', - 'aringacute': '\u01FB', - 'aringbelow': '\u1E01', - 'arrowboth': '\u2194', - 'arrowdashdown': '\u21E3', - 'arrowdashleft': '\u21E0', - 'arrowdashright': '\u21E2', - 'arrowdashup': '\u21E1', - 'arrowdblboth': '\u21D4', - 'arrowdbldown': '\u21D3', - 'arrowdblleft': '\u21D0', - 'arrowdblright': '\u21D2', - 'arrowdblup': '\u21D1', - 'arrowdown': '\u2193', - 'arrowdownleft': '\u2199', - 'arrowdownright': '\u2198', - 'arrowdownwhite': '\u21E9', - 'arrowheaddownmod': '\u02C5', - 'arrowheadleftmod': '\u02C2', - 'arrowheadrightmod': '\u02C3', - 'arrowheadupmod': '\u02C4', - 'arrowhorizex': '\uF8E7', - 'arrowleft': '\u2190', - 'arrowleftdbl': '\u21D0', - 'arrowleftdblstroke': '\u21CD', - 'arrowleftoverright': '\u21C6', - 'arrowleftwhite': '\u21E6', - 'arrowright': '\u2192', - 'arrowrightdblstroke': '\u21CF', - 'arrowrightheavy': '\u279E', - 'arrowrightoverleft': '\u21C4', - 'arrowrightwhite': '\u21E8', - 'arrowtableft': '\u21E4', - 'arrowtabright': '\u21E5', - 'arrowup': '\u2191', - 'arrowupdn': '\u2195', - 'arrowupdnbse': '\u21A8', - 'arrowupdownbase': '\u21A8', - 'arrowupleft': '\u2196', - 'arrowupleftofdown': '\u21C5', - 'arrowupright': '\u2197', - 'arrowupwhite': '\u21E7', - 'arrowvertex': '\uF8E6', - 'asciicircum': '\u005E', - 'asciicircummonospace': '\uFF3E', - 'asciitilde': '\u007E', - 'asciitildemonospace': '\uFF5E', - 'ascript': '\u0251', - 'ascriptturned': '\u0252', - 'asmallhiragana': '\u3041', - 'asmallkatakana': '\u30A1', - 'asmallkatakanahalfwidth': '\uFF67', - 'asterisk': '\u002A', - 'asteriskaltonearabic': '\u066D', - 'asteriskarabic': '\u066D', - 'asteriskmath': '\u2217', - 'asteriskmonospace': '\uFF0A', - 'asterisksmall': '\uFE61', - 'asterism': '\u2042', - 'asuperior': '\uF6E9', - 'asymptoticallyequal': '\u2243', - 'at': '\u0040', - 'atilde': '\u00E3', - 'atmonospace': '\uFF20', - 'atsmall': '\uFE6B', - 'aturned': '\u0250', - 'aubengali': '\u0994', - 'aubopomofo': '\u3120', - 'audeva': '\u0914', - 'augujarati': '\u0A94', - 'augurmukhi': '\u0A14', - 'aulengthmarkbengali': '\u09D7', - 'aumatragurmukhi': '\u0A4C', - 'auvowelsignbengali': '\u09CC', - 'auvowelsigndeva': '\u094C', - 'auvowelsigngujarati': '\u0ACC', - 'avagrahadeva': '\u093D', - 'aybarmenian': '\u0561', - 'ayin': '\u05E2', - 'ayinaltonehebrew': '\uFB20', - 'ayinhebrew': '\u05E2', - 'b': '\u0062', - 'babengali': '\u09AC', - 'backslash': '\u005C', - 'backslashmonospace': '\uFF3C', - 'badeva': '\u092C', - 'bagujarati': '\u0AAC', - 'bagurmukhi': '\u0A2C', - 'bahiragana': '\u3070', - 'bahtthai': '\u0E3F', - 'bakatakana': '\u30D0', - 'bar': '\u007C', - 'barmonospace': '\uFF5C', - 'bbopomofo': '\u3105', - 'bcircle': '\u24D1', - 'bdotaccent': '\u1E03', - 'bdotbelow': '\u1E05', - 'beamedsixteenthnotes': '\u266C', - 'because': '\u2235', - 'becyrillic': '\u0431', - 'beharabic': '\u0628', - 'behfinalarabic': '\uFE90', - 'behinitialarabic': '\uFE91', - 'behiragana': '\u3079', - 'behmedialarabic': '\uFE92', - 'behmeeminitialarabic': '\uFC9F', - 'behmeemisolatedarabic': '\uFC08', - 'behnoonfinalarabic': '\uFC6D', - 'bekatakana': '\u30D9', - 'benarmenian': '\u0562', - 'bet': '\u05D1', - 'beta': '\u03B2', - 'betasymbolgreek': '\u03D0', - 'betdagesh': '\uFB31', - 'betdageshhebrew': '\uFB31', - 'bethebrew': '\u05D1', - 'betrafehebrew': '\uFB4C', - 'bhabengali': '\u09AD', - 'bhadeva': '\u092D', - 'bhagujarati': '\u0AAD', - 'bhagurmukhi': '\u0A2D', - 'bhook': '\u0253', - 'bihiragana': '\u3073', - 'bikatakana': '\u30D3', - 'bilabialclick': '\u0298', - 'bindigurmukhi': '\u0A02', - 'birusquare': '\u3331', - 'blackcircle': '\u25CF', - 'blackdiamond': '\u25C6', - 'blackdownpointingtriangle': '\u25BC', - 'blackleftpointingpointer': '\u25C4', - 'blackleftpointingtriangle': '\u25C0', - 'blacklenticularbracketleft': '\u3010', - 'blacklenticularbracketleftvertical': '\uFE3B', - 'blacklenticularbracketright': '\u3011', - 'blacklenticularbracketrightvertical': '\uFE3C', - 'blacklowerlefttriangle': '\u25E3', - 'blacklowerrighttriangle': '\u25E2', - 'blackrectangle': '\u25AC', - 'blackrightpointingpointer': '\u25BA', - 'blackrightpointingtriangle': '\u25B6', - 'blacksmallsquare': '\u25AA', - 'blacksmilingface': '\u263B', - 'blacksquare': '\u25A0', - 'blackstar': '\u2605', - 'blackupperlefttriangle': '\u25E4', - 'blackupperrighttriangle': '\u25E5', - 'blackuppointingsmalltriangle': '\u25B4', - 'blackuppointingtriangle': '\u25B2', - 'blank': '\u2423', - 'blinebelow': '\u1E07', - 'block': '\u2588', - 'bmonospace': '\uFF42', - 'bobaimaithai': '\u0E1A', - 'bohiragana': '\u307C', - 'bokatakana': '\u30DC', - 'bparen': '\u249D', - 'bqsquare': '\u33C3', - 'braceex': '\uF8F4', - 'braceleft': '\u007B', - 'braceleftbt': '\uF8F3', - 'braceleftmid': '\uF8F2', - 'braceleftmonospace': '\uFF5B', - 'braceleftsmall': '\uFE5B', - 'bracelefttp': '\uF8F1', - 'braceleftvertical': '\uFE37', - 'braceright': '\u007D', - 'bracerightbt': '\uF8FE', - 'bracerightmid': '\uF8FD', - 'bracerightmonospace': '\uFF5D', - 'bracerightsmall': '\uFE5C', - 'bracerighttp': '\uF8FC', - 'bracerightvertical': '\uFE38', - 'bracketleft': '\u005B', - 'bracketleftbt': '\uF8F0', - 'bracketleftex': '\uF8EF', - 'bracketleftmonospace': '\uFF3B', - 'bracketlefttp': '\uF8EE', - 'bracketright': '\u005D', - 'bracketrightbt': '\uF8FB', - 'bracketrightex': '\uF8FA', - 'bracketrightmonospace': '\uFF3D', - 'bracketrighttp': '\uF8F9', - 'breve': '\u02D8', - 'brevebelowcmb': '\u032E', - 'brevecmb': '\u0306', - 'breveinvertedbelowcmb': '\u032F', - 'breveinvertedcmb': '\u0311', - 'breveinverteddoublecmb': '\u0361', - 'bridgebelowcmb': '\u032A', - 'bridgeinvertedbelowcmb': '\u033A', - 'brokenbar': '\u00A6', - 'bstroke': '\u0180', - 'bsuperior': '\uF6EA', - 'btopbar': '\u0183', - 'buhiragana': '\u3076', - 'bukatakana': '\u30D6', - 'bullet': '\u2022', - 'bulletinverse': '\u25D8', - 'bulletoperator': '\u2219', - 'bullseye': '\u25CE', - 'c': '\u0063', - 'caarmenian': '\u056E', - 'cabengali': '\u099A', - 'cacute': '\u0107', - 'cadeva': '\u091A', - 'cagujarati': '\u0A9A', - 'cagurmukhi': '\u0A1A', - 'calsquare': '\u3388', - 'candrabindubengali': '\u0981', - 'candrabinducmb': '\u0310', - 'candrabindudeva': '\u0901', - 'candrabindugujarati': '\u0A81', - 'capslock': '\u21EA', - 'careof': '\u2105', - 'caron': '\u02C7', - 'caronbelowcmb': '\u032C', - 'caroncmb': '\u030C', - 'carriagereturn': '\u21B5', - 'cbopomofo': '\u3118', - 'ccaron': '\u010D', - 'ccedilla': '\u00E7', - 'ccedillaacute': '\u1E09', - 'ccircle': '\u24D2', - 'ccircumflex': '\u0109', - 'ccurl': '\u0255', - 'cdot': '\u010B', - 'cdotaccent': '\u010B', - 'cdsquare': '\u33C5', - 'cedilla': '\u00B8', - 'cedillacmb': '\u0327', - 'cent': '\u00A2', - 'centigrade': '\u2103', - 'centinferior': '\uF6DF', - 'centmonospace': '\uFFE0', - 'centoldstyle': '\uF7A2', - 'centsuperior': '\uF6E0', - 'chaarmenian': '\u0579', - 'chabengali': '\u099B', - 'chadeva': '\u091B', - 'chagujarati': '\u0A9B', - 'chagurmukhi': '\u0A1B', - 'chbopomofo': '\u3114', - 'cheabkhasiancyrillic': '\u04BD', - 'checkmark': '\u2713', - 'checyrillic': '\u0447', - 'chedescenderabkhasiancyrillic': '\u04BF', - 'chedescendercyrillic': '\u04B7', - 'chedieresiscyrillic': '\u04F5', - 'cheharmenian': '\u0573', - 'chekhakassiancyrillic': '\u04CC', - 'cheverticalstrokecyrillic': '\u04B9', - 'chi': '\u03C7', - 'chieuchacirclekorean': '\u3277', - 'chieuchaparenkorean': '\u3217', - 'chieuchcirclekorean': '\u3269', - 'chieuchkorean': '\u314A', - 'chieuchparenkorean': '\u3209', - 'chochangthai': '\u0E0A', - 'chochanthai': '\u0E08', - 'chochingthai': '\u0E09', - 'chochoethai': '\u0E0C', - 'chook': '\u0188', - 'cieucacirclekorean': '\u3276', - 'cieucaparenkorean': '\u3216', - 'cieuccirclekorean': '\u3268', - 'cieuckorean': '\u3148', - 'cieucparenkorean': '\u3208', - 'cieucuparenkorean': '\u321C', - 'circle': '\u25CB', - 'circlemultiply': '\u2297', - 'circleot': '\u2299', - 'circleplus': '\u2295', - 'circlepostalmark': '\u3036', - 'circlewithlefthalfblack': '\u25D0', - 'circlewithrighthalfblack': '\u25D1', - 'circumflex': '\u02C6', - 'circumflexbelowcmb': '\u032D', - 'circumflexcmb': '\u0302', - 'clear': '\u2327', - 'clickalveolar': '\u01C2', - 'clickdental': '\u01C0', - 'clicklateral': '\u01C1', - 'clickretroflex': '\u01C3', - 'club': '\u2663', - 'clubsuitblack': '\u2663', - 'clubsuitwhite': '\u2667', - 'cmcubedsquare': '\u33A4', - 'cmonospace': '\uFF43', - 'cmsquaredsquare': '\u33A0', - 'coarmenian': '\u0581', - 'colon': '\u003A', - 'colonmonetary': '\u20A1', - 'colonmonospace': '\uFF1A', - 'colonsign': '\u20A1', - 'colonsmall': '\uFE55', - 'colontriangularhalfmod': '\u02D1', - 'colontriangularmod': '\u02D0', - 'comma': '\u002C', - 'commaabovecmb': '\u0313', - 'commaaboverightcmb': '\u0315', - 'commaaccent': '\uF6C3', - 'commaarabic': '\u060C', - 'commaarmenian': '\u055D', - 'commainferior': '\uF6E1', - 'commamonospace': '\uFF0C', - 'commareversedabovecmb': '\u0314', - 'commareversedmod': '\u02BD', - 'commasmall': '\uFE50', - 'commasuperior': '\uF6E2', - 'commaturnedabovecmb': '\u0312', - 'commaturnedmod': '\u02BB', - 'compass': '\u263C', - 'congruent': '\u2245', - 'contourintegral': '\u222E', - 'control': '\u2303', - 'controlACK': '\u0006', - 'controlBEL': '\u0007', - 'controlBS': '\u0008', - 'controlCAN': '\u0018', - 'controlCR': '\u000D', - 'controlDC1': '\u0011', - 'controlDC2': '\u0012', - 'controlDC3': '\u0013', - 'controlDC4': '\u0014', - 'controlDEL': '\u007F', - 'controlDLE': '\u0010', - 'controlEM': '\u0019', - 'controlENQ': '\u0005', - 'controlEOT': '\u0004', - 'controlESC': '\u001B', - 'controlETB': '\u0017', - 'controlETX': '\u0003', - 'controlFF': '\u000C', - 'controlFS': '\u001C', - 'controlGS': '\u001D', - 'controlHT': '\u0009', - 'controlLF': '\u000A', - 'controlNAK': '\u0015', - 'controlRS': '\u001E', - 'controlSI': '\u000F', - 'controlSO': '\u000E', - 'controlSOT': '\u0002', - 'controlSTX': '\u0001', - 'controlSUB': '\u001A', - 'controlSYN': '\u0016', - 'controlUS': '\u001F', - 'controlVT': '\u000B', - 'copyright': '\u00A9', - 'copyrightsans': '\uF8E9', - 'copyrightserif': '\uF6D9', - 'cornerbracketleft': '\u300C', - 'cornerbracketlefthalfwidth': '\uFF62', - 'cornerbracketleftvertical': '\uFE41', - 'cornerbracketright': '\u300D', - 'cornerbracketrighthalfwidth': '\uFF63', - 'cornerbracketrightvertical': '\uFE42', - 'corporationsquare': '\u337F', - 'cosquare': '\u33C7', - 'coverkgsquare': '\u33C6', - 'cparen': '\u249E', - 'cruzeiro': '\u20A2', - 'cstretched': '\u0297', - 'curlyand': '\u22CF', - 'curlyor': '\u22CE', - 'currency': '\u00A4', - 'cyrBreve': '\uF6D1', - 'cyrFlex': '\uF6D2', - 'cyrbreve': '\uF6D4', - 'cyrflex': '\uF6D5', - 'd': '\u0064', - 'daarmenian': '\u0564', - 'dabengali': '\u09A6', - 'dadarabic': '\u0636', - 'dadeva': '\u0926', - 'dadfinalarabic': '\uFEBE', - 'dadinitialarabic': '\uFEBF', - 'dadmedialarabic': '\uFEC0', - 'dagesh': '\u05BC', - 'dageshhebrew': '\u05BC', - 'dagger': '\u2020', - 'daggerdbl': '\u2021', - 'dagujarati': '\u0AA6', - 'dagurmukhi': '\u0A26', - 'dahiragana': '\u3060', - 'dakatakana': '\u30C0', - 'dalarabic': '\u062F', - 'dalet': '\u05D3', - 'daletdagesh': '\uFB33', - 'daletdageshhebrew': '\uFB33', - 'dalethatafpatah': '\u05D3\u05B2', - 'dalethatafpatahhebrew': '\u05D3\u05B2', - 'dalethatafsegol': '\u05D3\u05B1', - 'dalethatafsegolhebrew': '\u05D3\u05B1', - 'dalethebrew': '\u05D3', - 'dalethiriq': '\u05D3\u05B4', - 'dalethiriqhebrew': '\u05D3\u05B4', - 'daletholam': '\u05D3\u05B9', - 'daletholamhebrew': '\u05D3\u05B9', - 'daletpatah': '\u05D3\u05B7', - 'daletpatahhebrew': '\u05D3\u05B7', - 'daletqamats': '\u05D3\u05B8', - 'daletqamatshebrew': '\u05D3\u05B8', - 'daletqubuts': '\u05D3\u05BB', - 'daletqubutshebrew': '\u05D3\u05BB', - 'daletsegol': '\u05D3\u05B6', - 'daletsegolhebrew': '\u05D3\u05B6', - 'daletsheva': '\u05D3\u05B0', - 'daletshevahebrew': '\u05D3\u05B0', - 'dalettsere': '\u05D3\u05B5', - 'dalettserehebrew': '\u05D3\u05B5', - 'dalfinalarabic': '\uFEAA', - 'dammaarabic': '\u064F', - 'dammalowarabic': '\u064F', - 'dammatanaltonearabic': '\u064C', - 'dammatanarabic': '\u064C', - 'danda': '\u0964', - 'dargahebrew': '\u05A7', - 'dargalefthebrew': '\u05A7', - 'dasiapneumatacyrilliccmb': '\u0485', - 'dblGrave': '\uF6D3', - 'dblanglebracketleft': '\u300A', - 'dblanglebracketleftvertical': '\uFE3D', - 'dblanglebracketright': '\u300B', - 'dblanglebracketrightvertical': '\uFE3E', - 'dblarchinvertedbelowcmb': '\u032B', - 'dblarrowleft': '\u21D4', - 'dblarrowright': '\u21D2', - 'dbldanda': '\u0965', - 'dblgrave': '\uF6D6', - 'dblgravecmb': '\u030F', - 'dblintegral': '\u222C', - 'dbllowline': '\u2017', - 'dbllowlinecmb': '\u0333', - 'dbloverlinecmb': '\u033F', - 'dblprimemod': '\u02BA', - 'dblverticalbar': '\u2016', - 'dblverticallineabovecmb': '\u030E', - 'dbopomofo': '\u3109', - 'dbsquare': '\u33C8', - 'dcaron': '\u010F', - 'dcedilla': '\u1E11', - 'dcircle': '\u24D3', - 'dcircumflexbelow': '\u1E13', - 'dcroat': '\u0111', - 'ddabengali': '\u09A1', - 'ddadeva': '\u0921', - 'ddagujarati': '\u0AA1', - 'ddagurmukhi': '\u0A21', - 'ddalarabic': '\u0688', - 'ddalfinalarabic': '\uFB89', - 'dddhadeva': '\u095C', - 'ddhabengali': '\u09A2', - 'ddhadeva': '\u0922', - 'ddhagujarati': '\u0AA2', - 'ddhagurmukhi': '\u0A22', - 'ddotaccent': '\u1E0B', - 'ddotbelow': '\u1E0D', - 'decimalseparatorarabic': '\u066B', - 'decimalseparatorpersian': '\u066B', - 'decyrillic': '\u0434', - 'degree': '\u00B0', - 'dehihebrew': '\u05AD', - 'dehiragana': '\u3067', - 'deicoptic': '\u03EF', - 'dekatakana': '\u30C7', - 'deleteleft': '\u232B', - 'deleteright': '\u2326', - 'delta': '\u03B4', - 'deltaturned': '\u018D', - 'denominatorminusonenumeratorbengali': '\u09F8', - 'dezh': '\u02A4', - 'dhabengali': '\u09A7', - 'dhadeva': '\u0927', - 'dhagujarati': '\u0AA7', - 'dhagurmukhi': '\u0A27', - 'dhook': '\u0257', - 'dialytikatonos': '\u0385', - 'dialytikatonoscmb': '\u0344', - 'diamond': '\u2666', - 'diamondsuitwhite': '\u2662', - 'dieresis': '\u00A8', - 'dieresisacute': '\uF6D7', - 'dieresisbelowcmb': '\u0324', - 'dieresiscmb': '\u0308', - 'dieresisgrave': '\uF6D8', - 'dieresistonos': '\u0385', - 'dihiragana': '\u3062', - 'dikatakana': '\u30C2', - 'dittomark': '\u3003', - 'divide': '\u00F7', - 'divides': '\u2223', - 'divisionslash': '\u2215', - 'djecyrillic': '\u0452', - 'dkshade': '\u2593', - 'dlinebelow': '\u1E0F', - 'dlsquare': '\u3397', - 'dmacron': '\u0111', - 'dmonospace': '\uFF44', - 'dnblock': '\u2584', - 'dochadathai': '\u0E0E', - 'dodekthai': '\u0E14', - 'dohiragana': '\u3069', - 'dokatakana': '\u30C9', - 'dollar': '\u0024', - 'dollarinferior': '\uF6E3', - 'dollarmonospace': '\uFF04', - 'dollaroldstyle': '\uF724', - 'dollarsmall': '\uFE69', - 'dollarsuperior': '\uF6E4', - 'dong': '\u20AB', - 'dorusquare': '\u3326', - 'dotaccent': '\u02D9', - 'dotaccentcmb': '\u0307', - 'dotbelowcmb': '\u0323', - 'dotbelowcomb': '\u0323', - 'dotkatakana': '\u30FB', - 'dotlessi': '\u0131', - 'dotlessj': '\uF6BE', - 'dotlessjstrokehook': '\u0284', - 'dotmath': '\u22C5', - 'dottedcircle': '\u25CC', - 'doubleyodpatah': '\uFB1F', - 'doubleyodpatahhebrew': '\uFB1F', - 'downtackbelowcmb': '\u031E', - 'downtackmod': '\u02D5', - 'dparen': '\u249F', - 'dsuperior': '\uF6EB', - 'dtail': '\u0256', - 'dtopbar': '\u018C', - 'duhiragana': '\u3065', - 'dukatakana': '\u30C5', - 'dz': '\u01F3', - 'dzaltone': '\u02A3', - 'dzcaron': '\u01C6', - 'dzcurl': '\u02A5', - 'dzeabkhasiancyrillic': '\u04E1', - 'dzecyrillic': '\u0455', - 'dzhecyrillic': '\u045F', - 'e': '\u0065', - 'eacute': '\u00E9', - 'earth': '\u2641', - 'ebengali': '\u098F', - 'ebopomofo': '\u311C', - 'ebreve': '\u0115', - 'ecandradeva': '\u090D', - 'ecandragujarati': '\u0A8D', - 'ecandravowelsigndeva': '\u0945', - 'ecandravowelsigngujarati': '\u0AC5', - 'ecaron': '\u011B', - 'ecedillabreve': '\u1E1D', - 'echarmenian': '\u0565', - 'echyiwnarmenian': '\u0587', - 'ecircle': '\u24D4', - 'ecircumflex': '\u00EA', - 'ecircumflexacute': '\u1EBF', - 'ecircumflexbelow': '\u1E19', - 'ecircumflexdotbelow': '\u1EC7', - 'ecircumflexgrave': '\u1EC1', - 'ecircumflexhookabove': '\u1EC3', - 'ecircumflextilde': '\u1EC5', - 'ecyrillic': '\u0454', - 'edblgrave': '\u0205', - 'edeva': '\u090F', - 'edieresis': '\u00EB', - 'edot': '\u0117', - 'edotaccent': '\u0117', - 'edotbelow': '\u1EB9', - 'eegurmukhi': '\u0A0F', - 'eematragurmukhi': '\u0A47', - 'efcyrillic': '\u0444', - 'egrave': '\u00E8', - 'egujarati': '\u0A8F', - 'eharmenian': '\u0567', - 'ehbopomofo': '\u311D', - 'ehiragana': '\u3048', - 'ehookabove': '\u1EBB', - 'eibopomofo': '\u311F', - 'eight': '\u0038', - 'eightarabic': '\u0668', - 'eightbengali': '\u09EE', - 'eightcircle': '\u2467', - 'eightcircleinversesansserif': '\u2791', - 'eightdeva': '\u096E', - 'eighteencircle': '\u2471', - 'eighteenparen': '\u2485', - 'eighteenperiod': '\u2499', - 'eightgujarati': '\u0AEE', - 'eightgurmukhi': '\u0A6E', - 'eighthackarabic': '\u0668', - 'eighthangzhou': '\u3028', - 'eighthnotebeamed': '\u266B', - 'eightideographicparen': '\u3227', - 'eightinferior': '\u2088', - 'eightmonospace': '\uFF18', - 'eightoldstyle': '\uF738', - 'eightparen': '\u247B', - 'eightperiod': '\u248F', - 'eightpersian': '\u06F8', - 'eightroman': '\u2177', - 'eightsuperior': '\u2078', - 'eightthai': '\u0E58', - 'einvertedbreve': '\u0207', - 'eiotifiedcyrillic': '\u0465', - 'ekatakana': '\u30A8', - 'ekatakanahalfwidth': '\uFF74', - 'ekonkargurmukhi': '\u0A74', - 'ekorean': '\u3154', - 'elcyrillic': '\u043B', - 'element': '\u2208', - 'elevencircle': '\u246A', - 'elevenparen': '\u247E', - 'elevenperiod': '\u2492', - 'elevenroman': '\u217A', - 'ellipsis': '\u2026', - 'ellipsisvertical': '\u22EE', - 'emacron': '\u0113', - 'emacronacute': '\u1E17', - 'emacrongrave': '\u1E15', - 'emcyrillic': '\u043C', - 'emdash': '\u2014', - 'emdashvertical': '\uFE31', - 'emonospace': '\uFF45', - 'emphasismarkarmenian': '\u055B', - 'emptyset': '\u2205', - 'enbopomofo': '\u3123', - 'encyrillic': '\u043D', - 'endash': '\u2013', - 'endashvertical': '\uFE32', - 'endescendercyrillic': '\u04A3', - 'eng': '\u014B', - 'engbopomofo': '\u3125', - 'enghecyrillic': '\u04A5', - 'enhookcyrillic': '\u04C8', - 'enspace': '\u2002', - 'eogonek': '\u0119', - 'eokorean': '\u3153', - 'eopen': '\u025B', - 'eopenclosed': '\u029A', - 'eopenreversed': '\u025C', - 'eopenreversedclosed': '\u025E', - 'eopenreversedhook': '\u025D', - 'eparen': '\u24A0', - 'epsilon': '\u03B5', - 'epsilontonos': '\u03AD', - 'equal': '\u003D', - 'equalmonospace': '\uFF1D', - 'equalsmall': '\uFE66', - 'equalsuperior': '\u207C', - 'equivalence': '\u2261', - 'erbopomofo': '\u3126', - 'ercyrillic': '\u0440', - 'ereversed': '\u0258', - 'ereversedcyrillic': '\u044D', - 'escyrillic': '\u0441', - 'esdescendercyrillic': '\u04AB', - 'esh': '\u0283', - 'eshcurl': '\u0286', - 'eshortdeva': '\u090E', - 'eshortvowelsigndeva': '\u0946', - 'eshreversedloop': '\u01AA', - 'eshsquatreversed': '\u0285', - 'esmallhiragana': '\u3047', - 'esmallkatakana': '\u30A7', - 'esmallkatakanahalfwidth': '\uFF6A', - 'estimated': '\u212E', - 'esuperior': '\uF6EC', - 'eta': '\u03B7', - 'etarmenian': '\u0568', - 'etatonos': '\u03AE', - 'eth': '\u00F0', - 'etilde': '\u1EBD', - 'etildebelow': '\u1E1B', - 'etnahtafoukhhebrew': '\u0591', - 'etnahtafoukhlefthebrew': '\u0591', - 'etnahtahebrew': '\u0591', - 'etnahtalefthebrew': '\u0591', - 'eturned': '\u01DD', - 'eukorean': '\u3161', - 'euro': '\u20AC', - 'evowelsignbengali': '\u09C7', - 'evowelsigndeva': '\u0947', - 'evowelsigngujarati': '\u0AC7', - 'exclam': '\u0021', - 'exclamarmenian': '\u055C', - 'exclamdbl': '\u203C', - 'exclamdown': '\u00A1', - 'exclamdownsmall': '\uF7A1', - 'exclammonospace': '\uFF01', - 'exclamsmall': '\uF721', - 'existential': '\u2203', - 'ezh': '\u0292', - 'ezhcaron': '\u01EF', - 'ezhcurl': '\u0293', - 'ezhreversed': '\u01B9', - 'ezhtail': '\u01BA', - 'f': '\u0066', - 'fadeva': '\u095E', - 'fagurmukhi': '\u0A5E', - 'fahrenheit': '\u2109', - 'fathaarabic': '\u064E', - 'fathalowarabic': '\u064E', - 'fathatanarabic': '\u064B', - 'fbopomofo': '\u3108', - 'fcircle': '\u24D5', - 'fdotaccent': '\u1E1F', - 'feharabic': '\u0641', - 'feharmenian': '\u0586', - 'fehfinalarabic': '\uFED2', - 'fehinitialarabic': '\uFED3', - 'fehmedialarabic': '\uFED4', - 'feicoptic': '\u03E5', - 'female': '\u2640', - 'ff': '\uFB00', - 'ffi': '\uFB03', - 'ffl': '\uFB04', - 'fi': '\uFB01', - 'fifteencircle': '\u246E', - 'fifteenparen': '\u2482', - 'fifteenperiod': '\u2496', - 'figuredash': '\u2012', - 'filledbox': '\u25A0', - 'filledrect': '\u25AC', - 'finalkaf': '\u05DA', - 'finalkafdagesh': '\uFB3A', - 'finalkafdageshhebrew': '\uFB3A', - 'finalkafhebrew': '\u05DA', - 'finalkafqamats': '\u05DA\u05B8', - 'finalkafqamatshebrew': '\u05DA\u05B8', - 'finalkafsheva': '\u05DA\u05B0', - 'finalkafshevahebrew': '\u05DA\u05B0', - 'finalmem': '\u05DD', - 'finalmemhebrew': '\u05DD', - 'finalnun': '\u05DF', - 'finalnunhebrew': '\u05DF', - 'finalpe': '\u05E3', - 'finalpehebrew': '\u05E3', - 'finaltsadi': '\u05E5', - 'finaltsadihebrew': '\u05E5', - 'firsttonechinese': '\u02C9', - 'fisheye': '\u25C9', - 'fitacyrillic': '\u0473', - 'five': '\u0035', - 'fivearabic': '\u0665', - 'fivebengali': '\u09EB', - 'fivecircle': '\u2464', - 'fivecircleinversesansserif': '\u278E', - 'fivedeva': '\u096B', - 'fiveeighths': '\u215D', - 'fivegujarati': '\u0AEB', - 'fivegurmukhi': '\u0A6B', - 'fivehackarabic': '\u0665', - 'fivehangzhou': '\u3025', - 'fiveideographicparen': '\u3224', - 'fiveinferior': '\u2085', - 'fivemonospace': '\uFF15', - 'fiveoldstyle': '\uF735', - 'fiveparen': '\u2478', - 'fiveperiod': '\u248C', - 'fivepersian': '\u06F5', - 'fiveroman': '\u2174', - 'fivesuperior': '\u2075', - 'fivethai': '\u0E55', - 'fl': '\uFB02', - 'florin': '\u0192', - 'fmonospace': '\uFF46', - 'fmsquare': '\u3399', - 'fofanthai': '\u0E1F', - 'fofathai': '\u0E1D', - 'fongmanthai': '\u0E4F', - 'forall': '\u2200', - 'four': '\u0034', - 'fourarabic': '\u0664', - 'fourbengali': '\u09EA', - 'fourcircle': '\u2463', - 'fourcircleinversesansserif': '\u278D', - 'fourdeva': '\u096A', - 'fourgujarati': '\u0AEA', - 'fourgurmukhi': '\u0A6A', - 'fourhackarabic': '\u0664', - 'fourhangzhou': '\u3024', - 'fourideographicparen': '\u3223', - 'fourinferior': '\u2084', - 'fourmonospace': '\uFF14', - 'fournumeratorbengali': '\u09F7', - 'fouroldstyle': '\uF734', - 'fourparen': '\u2477', - 'fourperiod': '\u248B', - 'fourpersian': '\u06F4', - 'fourroman': '\u2173', - 'foursuperior': '\u2074', - 'fourteencircle': '\u246D', - 'fourteenparen': '\u2481', - 'fourteenperiod': '\u2495', - 'fourthai': '\u0E54', - 'fourthtonechinese': '\u02CB', - 'fparen': '\u24A1', - 'fraction': '\u2044', - 'franc': '\u20A3', - 'g': '\u0067', - 'gabengali': '\u0997', - 'gacute': '\u01F5', - 'gadeva': '\u0917', - 'gafarabic': '\u06AF', - 'gaffinalarabic': '\uFB93', - 'gafinitialarabic': '\uFB94', - 'gafmedialarabic': '\uFB95', - 'gagujarati': '\u0A97', - 'gagurmukhi': '\u0A17', - 'gahiragana': '\u304C', - 'gakatakana': '\u30AC', - 'gamma': '\u03B3', - 'gammalatinsmall': '\u0263', - 'gammasuperior': '\u02E0', - 'gangiacoptic': '\u03EB', - 'gbopomofo': '\u310D', - 'gbreve': '\u011F', - 'gcaron': '\u01E7', - 'gcedilla': '\u0123', - 'gcircle': '\u24D6', - 'gcircumflex': '\u011D', - 'gcommaaccent': '\u0123', - 'gdot': '\u0121', - 'gdotaccent': '\u0121', - 'gecyrillic': '\u0433', - 'gehiragana': '\u3052', - 'gekatakana': '\u30B2', - 'geometricallyequal': '\u2251', - 'gereshaccenthebrew': '\u059C', - 'gereshhebrew': '\u05F3', - 'gereshmuqdamhebrew': '\u059D', - 'germandbls': '\u00DF', - 'gershayimaccenthebrew': '\u059E', - 'gershayimhebrew': '\u05F4', - 'getamark': '\u3013', - 'ghabengali': '\u0998', - 'ghadarmenian': '\u0572', - 'ghadeva': '\u0918', - 'ghagujarati': '\u0A98', - 'ghagurmukhi': '\u0A18', - 'ghainarabic': '\u063A', - 'ghainfinalarabic': '\uFECE', - 'ghaininitialarabic': '\uFECF', - 'ghainmedialarabic': '\uFED0', - 'ghemiddlehookcyrillic': '\u0495', - 'ghestrokecyrillic': '\u0493', - 'gheupturncyrillic': '\u0491', - 'ghhadeva': '\u095A', - 'ghhagurmukhi': '\u0A5A', - 'ghook': '\u0260', - 'ghzsquare': '\u3393', - 'gihiragana': '\u304E', - 'gikatakana': '\u30AE', - 'gimarmenian': '\u0563', - 'gimel': '\u05D2', - 'gimeldagesh': '\uFB32', - 'gimeldageshhebrew': '\uFB32', - 'gimelhebrew': '\u05D2', - 'gjecyrillic': '\u0453', - 'glottalinvertedstroke': '\u01BE', - 'glottalstop': '\u0294', - 'glottalstopinverted': '\u0296', - 'glottalstopmod': '\u02C0', - 'glottalstopreversed': '\u0295', - 'glottalstopreversedmod': '\u02C1', - 'glottalstopreversedsuperior': '\u02E4', - 'glottalstopstroke': '\u02A1', - 'glottalstopstrokereversed': '\u02A2', - 'gmacron': '\u1E21', - 'gmonospace': '\uFF47', - 'gohiragana': '\u3054', - 'gokatakana': '\u30B4', - 'gparen': '\u24A2', - 'gpasquare': '\u33AC', - 'gradient': '\u2207', - 'grave': '\u0060', - 'gravebelowcmb': '\u0316', - 'gravecmb': '\u0300', - 'gravecomb': '\u0300', - 'gravedeva': '\u0953', - 'gravelowmod': '\u02CE', - 'gravemonospace': '\uFF40', - 'gravetonecmb': '\u0340', - 'greater': '\u003E', - 'greaterequal': '\u2265', - 'greaterequalorless': '\u22DB', - 'greatermonospace': '\uFF1E', - 'greaterorequivalent': '\u2273', - 'greaterorless': '\u2277', - 'greateroverequal': '\u2267', - 'greatersmall': '\uFE65', - 'gscript': '\u0261', - 'gstroke': '\u01E5', - 'guhiragana': '\u3050', - 'guillemotleft': '\u00AB', - 'guillemotright': '\u00BB', - 'guilsinglleft': '\u2039', - 'guilsinglright': '\u203A', - 'gukatakana': '\u30B0', - 'guramusquare': '\u3318', - 'gysquare': '\u33C9', - 'h': '\u0068', - 'haabkhasiancyrillic': '\u04A9', - 'haaltonearabic': '\u06C1', - 'habengali': '\u09B9', - 'hadescendercyrillic': '\u04B3', - 'hadeva': '\u0939', - 'hagujarati': '\u0AB9', - 'hagurmukhi': '\u0A39', - 'haharabic': '\u062D', - 'hahfinalarabic': '\uFEA2', - 'hahinitialarabic': '\uFEA3', - 'hahiragana': '\u306F', - 'hahmedialarabic': '\uFEA4', - 'haitusquare': '\u332A', - 'hakatakana': '\u30CF', - 'hakatakanahalfwidth': '\uFF8A', - 'halantgurmukhi': '\u0A4D', - 'hamzaarabic': '\u0621', - 'hamzadammaarabic': '\u0621\u064F', - 'hamzadammatanarabic': '\u0621\u064C', - 'hamzafathaarabic': '\u0621\u064E', - 'hamzafathatanarabic': '\u0621\u064B', - 'hamzalowarabic': '\u0621', - 'hamzalowkasraarabic': '\u0621\u0650', - 'hamzalowkasratanarabic': '\u0621\u064D', - 'hamzasukunarabic': '\u0621\u0652', - 'hangulfiller': '\u3164', - 'hardsigncyrillic': '\u044A', - 'harpoonleftbarbup': '\u21BC', - 'harpoonrightbarbup': '\u21C0', - 'hasquare': '\u33CA', - 'hatafpatah': '\u05B2', - 'hatafpatah16': '\u05B2', - 'hatafpatah23': '\u05B2', - 'hatafpatah2f': '\u05B2', - 'hatafpatahhebrew': '\u05B2', - 'hatafpatahnarrowhebrew': '\u05B2', - 'hatafpatahquarterhebrew': '\u05B2', - 'hatafpatahwidehebrew': '\u05B2', - 'hatafqamats': '\u05B3', - 'hatafqamats1b': '\u05B3', - 'hatafqamats28': '\u05B3', - 'hatafqamats34': '\u05B3', - 'hatafqamatshebrew': '\u05B3', - 'hatafqamatsnarrowhebrew': '\u05B3', - 'hatafqamatsquarterhebrew': '\u05B3', - 'hatafqamatswidehebrew': '\u05B3', - 'hatafsegol': '\u05B1', - 'hatafsegol17': '\u05B1', - 'hatafsegol24': '\u05B1', - 'hatafsegol30': '\u05B1', - 'hatafsegolhebrew': '\u05B1', - 'hatafsegolnarrowhebrew': '\u05B1', - 'hatafsegolquarterhebrew': '\u05B1', - 'hatafsegolwidehebrew': '\u05B1', - 'hbar': '\u0127', - 'hbopomofo': '\u310F', - 'hbrevebelow': '\u1E2B', - 'hcedilla': '\u1E29', - 'hcircle': '\u24D7', - 'hcircumflex': '\u0125', - 'hdieresis': '\u1E27', - 'hdotaccent': '\u1E23', - 'hdotbelow': '\u1E25', - 'he': '\u05D4', - 'heart': '\u2665', - 'heartsuitblack': '\u2665', - 'heartsuitwhite': '\u2661', - 'hedagesh': '\uFB34', - 'hedageshhebrew': '\uFB34', - 'hehaltonearabic': '\u06C1', - 'heharabic': '\u0647', - 'hehebrew': '\u05D4', - 'hehfinalaltonearabic': '\uFBA7', - 'hehfinalalttwoarabic': '\uFEEA', - 'hehfinalarabic': '\uFEEA', - 'hehhamzaabovefinalarabic': '\uFBA5', - 'hehhamzaaboveisolatedarabic': '\uFBA4', - 'hehinitialaltonearabic': '\uFBA8', - 'hehinitialarabic': '\uFEEB', - 'hehiragana': '\u3078', - 'hehmedialaltonearabic': '\uFBA9', - 'hehmedialarabic': '\uFEEC', - 'heiseierasquare': '\u337B', - 'hekatakana': '\u30D8', - 'hekatakanahalfwidth': '\uFF8D', - 'hekutaarusquare': '\u3336', - 'henghook': '\u0267', - 'herutusquare': '\u3339', - 'het': '\u05D7', - 'hethebrew': '\u05D7', - 'hhook': '\u0266', - 'hhooksuperior': '\u02B1', - 'hieuhacirclekorean': '\u327B', - 'hieuhaparenkorean': '\u321B', - 'hieuhcirclekorean': '\u326D', - 'hieuhkorean': '\u314E', - 'hieuhparenkorean': '\u320D', - 'hihiragana': '\u3072', - 'hikatakana': '\u30D2', - 'hikatakanahalfwidth': '\uFF8B', - 'hiriq': '\u05B4', - 'hiriq14': '\u05B4', - 'hiriq21': '\u05B4', - 'hiriq2d': '\u05B4', - 'hiriqhebrew': '\u05B4', - 'hiriqnarrowhebrew': '\u05B4', - 'hiriqquarterhebrew': '\u05B4', - 'hiriqwidehebrew': '\u05B4', - 'hlinebelow': '\u1E96', - 'hmonospace': '\uFF48', - 'hoarmenian': '\u0570', - 'hohipthai': '\u0E2B', - 'hohiragana': '\u307B', - 'hokatakana': '\u30DB', - 'hokatakanahalfwidth': '\uFF8E', - 'holam': '\u05B9', - 'holam19': '\u05B9', - 'holam26': '\u05B9', - 'holam32': '\u05B9', - 'holamhebrew': '\u05B9', - 'holamnarrowhebrew': '\u05B9', - 'holamquarterhebrew': '\u05B9', - 'holamwidehebrew': '\u05B9', - 'honokhukthai': '\u0E2E', - 'hookabovecomb': '\u0309', - 'hookcmb': '\u0309', - 'hookpalatalizedbelowcmb': '\u0321', - 'hookretroflexbelowcmb': '\u0322', - 'hoonsquare': '\u3342', - 'horicoptic': '\u03E9', - 'horizontalbar': '\u2015', - 'horncmb': '\u031B', - 'hotsprings': '\u2668', - 'house': '\u2302', - 'hparen': '\u24A3', - 'hsuperior': '\u02B0', - 'hturned': '\u0265', - 'huhiragana': '\u3075', - 'huiitosquare': '\u3333', - 'hukatakana': '\u30D5', - 'hukatakanahalfwidth': '\uFF8C', - 'hungarumlaut': '\u02DD', - 'hungarumlautcmb': '\u030B', - 'hv': '\u0195', - 'hyphen': '\u002D', - 'hypheninferior': '\uF6E5', - 'hyphenmonospace': '\uFF0D', - 'hyphensmall': '\uFE63', - 'hyphensuperior': '\uF6E6', - 'hyphentwo': '\u2010', - 'i': '\u0069', - 'iacute': '\u00ED', - 'iacyrillic': '\u044F', - 'ibengali': '\u0987', - 'ibopomofo': '\u3127', - 'ibreve': '\u012D', - 'icaron': '\u01D0', - 'icircle': '\u24D8', - 'icircumflex': '\u00EE', - 'icyrillic': '\u0456', - 'idblgrave': '\u0209', - 'ideographearthcircle': '\u328F', - 'ideographfirecircle': '\u328B', - 'ideographicallianceparen': '\u323F', - 'ideographiccallparen': '\u323A', - 'ideographiccentrecircle': '\u32A5', - 'ideographicclose': '\u3006', - 'ideographiccomma': '\u3001', - 'ideographiccommaleft': '\uFF64', - 'ideographiccongratulationparen': '\u3237', - 'ideographiccorrectcircle': '\u32A3', - 'ideographicearthparen': '\u322F', - 'ideographicenterpriseparen': '\u323D', - 'ideographicexcellentcircle': '\u329D', - 'ideographicfestivalparen': '\u3240', - 'ideographicfinancialcircle': '\u3296', - 'ideographicfinancialparen': '\u3236', - 'ideographicfireparen': '\u322B', - 'ideographichaveparen': '\u3232', - 'ideographichighcircle': '\u32A4', - 'ideographiciterationmark': '\u3005', - 'ideographiclaborcircle': '\u3298', - 'ideographiclaborparen': '\u3238', - 'ideographicleftcircle': '\u32A7', - 'ideographiclowcircle': '\u32A6', - 'ideographicmedicinecircle': '\u32A9', - 'ideographicmetalparen': '\u322E', - 'ideographicmoonparen': '\u322A', - 'ideographicnameparen': '\u3234', - 'ideographicperiod': '\u3002', - 'ideographicprintcircle': '\u329E', - 'ideographicreachparen': '\u3243', - 'ideographicrepresentparen': '\u3239', - 'ideographicresourceparen': '\u323E', - 'ideographicrightcircle': '\u32A8', - 'ideographicsecretcircle': '\u3299', - 'ideographicselfparen': '\u3242', - 'ideographicsocietyparen': '\u3233', - 'ideographicspace': '\u3000', - 'ideographicspecialparen': '\u3235', - 'ideographicstockparen': '\u3231', - 'ideographicstudyparen': '\u323B', - 'ideographicsunparen': '\u3230', - 'ideographicsuperviseparen': '\u323C', - 'ideographicwaterparen': '\u322C', - 'ideographicwoodparen': '\u322D', - 'ideographiczero': '\u3007', - 'ideographmetalcircle': '\u328E', - 'ideographmooncircle': '\u328A', - 'ideographnamecircle': '\u3294', - 'ideographsuncircle': '\u3290', - 'ideographwatercircle': '\u328C', - 'ideographwoodcircle': '\u328D', - 'ideva': '\u0907', - 'idieresis': '\u00EF', - 'idieresisacute': '\u1E2F', - 'idieresiscyrillic': '\u04E5', - 'idotbelow': '\u1ECB', - 'iebrevecyrillic': '\u04D7', - 'iecyrillic': '\u0435', - 'ieungacirclekorean': '\u3275', - 'ieungaparenkorean': '\u3215', - 'ieungcirclekorean': '\u3267', - 'ieungkorean': '\u3147', - 'ieungparenkorean': '\u3207', - 'igrave': '\u00EC', - 'igujarati': '\u0A87', - 'igurmukhi': '\u0A07', - 'ihiragana': '\u3044', - 'ihookabove': '\u1EC9', - 'iibengali': '\u0988', - 'iicyrillic': '\u0438', - 'iideva': '\u0908', - 'iigujarati': '\u0A88', - 'iigurmukhi': '\u0A08', - 'iimatragurmukhi': '\u0A40', - 'iinvertedbreve': '\u020B', - 'iishortcyrillic': '\u0439', - 'iivowelsignbengali': '\u09C0', - 'iivowelsigndeva': '\u0940', - 'iivowelsigngujarati': '\u0AC0', - 'ij': '\u0133', - 'ikatakana': '\u30A4', - 'ikatakanahalfwidth': '\uFF72', - 'ikorean': '\u3163', - 'ilde': '\u02DC', - 'iluyhebrew': '\u05AC', - 'imacron': '\u012B', - 'imacroncyrillic': '\u04E3', - 'imageorapproximatelyequal': '\u2253', - 'imatragurmukhi': '\u0A3F', - 'imonospace': '\uFF49', - 'increment': '\u2206', - 'infinity': '\u221E', - 'iniarmenian': '\u056B', - 'integral': '\u222B', - 'integralbottom': '\u2321', - 'integralbt': '\u2321', - 'integralex': '\uF8F5', - 'integraltop': '\u2320', - 'integraltp': '\u2320', - 'intersection': '\u2229', - 'intisquare': '\u3305', - 'invbullet': '\u25D8', - 'invcircle': '\u25D9', - 'invsmileface': '\u263B', - 'iocyrillic': '\u0451', - 'iogonek': '\u012F', - 'iota': '\u03B9', - 'iotadieresis': '\u03CA', - 'iotadieresistonos': '\u0390', - 'iotalatin': '\u0269', - 'iotatonos': '\u03AF', - 'iparen': '\u24A4', - 'irigurmukhi': '\u0A72', - 'ismallhiragana': '\u3043', - 'ismallkatakana': '\u30A3', - 'ismallkatakanahalfwidth': '\uFF68', - 'issharbengali': '\u09FA', - 'istroke': '\u0268', - 'isuperior': '\uF6ED', - 'iterationhiragana': '\u309D', - 'iterationkatakana': '\u30FD', - 'itilde': '\u0129', - 'itildebelow': '\u1E2D', - 'iubopomofo': '\u3129', - 'iucyrillic': '\u044E', - 'ivowelsignbengali': '\u09BF', - 'ivowelsigndeva': '\u093F', - 'ivowelsigngujarati': '\u0ABF', - 'izhitsacyrillic': '\u0475', - 'izhitsadblgravecyrillic': '\u0477', - 'j': '\u006A', - 'jaarmenian': '\u0571', - 'jabengali': '\u099C', - 'jadeva': '\u091C', - 'jagujarati': '\u0A9C', - 'jagurmukhi': '\u0A1C', - 'jbopomofo': '\u3110', - 'jcaron': '\u01F0', - 'jcircle': '\u24D9', - 'jcircumflex': '\u0135', - 'jcrossedtail': '\u029D', - 'jdotlessstroke': '\u025F', - 'jecyrillic': '\u0458', - 'jeemarabic': '\u062C', - 'jeemfinalarabic': '\uFE9E', - 'jeeminitialarabic': '\uFE9F', - 'jeemmedialarabic': '\uFEA0', - 'jeharabic': '\u0698', - 'jehfinalarabic': '\uFB8B', - 'jhabengali': '\u099D', - 'jhadeva': '\u091D', - 'jhagujarati': '\u0A9D', - 'jhagurmukhi': '\u0A1D', - 'jheharmenian': '\u057B', - 'jis': '\u3004', - 'jmonospace': '\uFF4A', - 'jparen': '\u24A5', - 'jsuperior': '\u02B2', - 'k': '\u006B', - 'kabashkircyrillic': '\u04A1', - 'kabengali': '\u0995', - 'kacute': '\u1E31', - 'kacyrillic': '\u043A', - 'kadescendercyrillic': '\u049B', - 'kadeva': '\u0915', - 'kaf': '\u05DB', - 'kafarabic': '\u0643', - 'kafdagesh': '\uFB3B', - 'kafdageshhebrew': '\uFB3B', - 'kaffinalarabic': '\uFEDA', - 'kafhebrew': '\u05DB', - 'kafinitialarabic': '\uFEDB', - 'kafmedialarabic': '\uFEDC', - 'kafrafehebrew': '\uFB4D', - 'kagujarati': '\u0A95', - 'kagurmukhi': '\u0A15', - 'kahiragana': '\u304B', - 'kahookcyrillic': '\u04C4', - 'kakatakana': '\u30AB', - 'kakatakanahalfwidth': '\uFF76', - 'kappa': '\u03BA', - 'kappasymbolgreek': '\u03F0', - 'kapyeounmieumkorean': '\u3171', - 'kapyeounphieuphkorean': '\u3184', - 'kapyeounpieupkorean': '\u3178', - 'kapyeounssangpieupkorean': '\u3179', - 'karoriisquare': '\u330D', - 'kashidaautoarabic': '\u0640', - 'kashidaautonosidebearingarabic': '\u0640', - 'kasmallkatakana': '\u30F5', - 'kasquare': '\u3384', - 'kasraarabic': '\u0650', - 'kasratanarabic': '\u064D', - 'kastrokecyrillic': '\u049F', - 'katahiraprolongmarkhalfwidth': '\uFF70', - 'kaverticalstrokecyrillic': '\u049D', - 'kbopomofo': '\u310E', - 'kcalsquare': '\u3389', - 'kcaron': '\u01E9', - 'kcedilla': '\u0137', - 'kcircle': '\u24DA', - 'kcommaaccent': '\u0137', - 'kdotbelow': '\u1E33', - 'keharmenian': '\u0584', - 'kehiragana': '\u3051', - 'kekatakana': '\u30B1', - 'kekatakanahalfwidth': '\uFF79', - 'kenarmenian': '\u056F', - 'kesmallkatakana': '\u30F6', - 'kgreenlandic': '\u0138', - 'khabengali': '\u0996', - 'khacyrillic': '\u0445', - 'khadeva': '\u0916', - 'khagujarati': '\u0A96', - 'khagurmukhi': '\u0A16', - 'khaharabic': '\u062E', - 'khahfinalarabic': '\uFEA6', - 'khahinitialarabic': '\uFEA7', - 'khahmedialarabic': '\uFEA8', - 'kheicoptic': '\u03E7', - 'khhadeva': '\u0959', - 'khhagurmukhi': '\u0A59', - 'khieukhacirclekorean': '\u3278', - 'khieukhaparenkorean': '\u3218', - 'khieukhcirclekorean': '\u326A', - 'khieukhkorean': '\u314B', - 'khieukhparenkorean': '\u320A', - 'khokhaithai': '\u0E02', - 'khokhonthai': '\u0E05', - 'khokhuatthai': '\u0E03', - 'khokhwaithai': '\u0E04', - 'khomutthai': '\u0E5B', - 'khook': '\u0199', - 'khorakhangthai': '\u0E06', - 'khzsquare': '\u3391', - 'kihiragana': '\u304D', - 'kikatakana': '\u30AD', - 'kikatakanahalfwidth': '\uFF77', - 'kiroguramusquare': '\u3315', - 'kiromeetorusquare': '\u3316', - 'kirosquare': '\u3314', - 'kiyeokacirclekorean': '\u326E', - 'kiyeokaparenkorean': '\u320E', - 'kiyeokcirclekorean': '\u3260', - 'kiyeokkorean': '\u3131', - 'kiyeokparenkorean': '\u3200', - 'kiyeoksioskorean': '\u3133', - 'kjecyrillic': '\u045C', - 'klinebelow': '\u1E35', - 'klsquare': '\u3398', - 'kmcubedsquare': '\u33A6', - 'kmonospace': '\uFF4B', - 'kmsquaredsquare': '\u33A2', - 'kohiragana': '\u3053', - 'kohmsquare': '\u33C0', - 'kokaithai': '\u0E01', - 'kokatakana': '\u30B3', - 'kokatakanahalfwidth': '\uFF7A', - 'kooposquare': '\u331E', - 'koppacyrillic': '\u0481', - 'koreanstandardsymbol': '\u327F', - 'koroniscmb': '\u0343', - 'kparen': '\u24A6', - 'kpasquare': '\u33AA', - 'ksicyrillic': '\u046F', - 'ktsquare': '\u33CF', - 'kturned': '\u029E', - 'kuhiragana': '\u304F', - 'kukatakana': '\u30AF', - 'kukatakanahalfwidth': '\uFF78', - 'kvsquare': '\u33B8', - 'kwsquare': '\u33BE', - 'l': '\u006C', - 'labengali': '\u09B2', - 'lacute': '\u013A', - 'ladeva': '\u0932', - 'lagujarati': '\u0AB2', - 'lagurmukhi': '\u0A32', - 'lakkhangyaothai': '\u0E45', - 'lamaleffinalarabic': '\uFEFC', - 'lamalefhamzaabovefinalarabic': '\uFEF8', - 'lamalefhamzaaboveisolatedarabic': '\uFEF7', - 'lamalefhamzabelowfinalarabic': '\uFEFA', - 'lamalefhamzabelowisolatedarabic': '\uFEF9', - 'lamalefisolatedarabic': '\uFEFB', - 'lamalefmaddaabovefinalarabic': '\uFEF6', - 'lamalefmaddaaboveisolatedarabic': '\uFEF5', - 'lamarabic': '\u0644', - 'lambda': '\u03BB', - 'lambdastroke': '\u019B', - 'lamed': '\u05DC', - 'lameddagesh': '\uFB3C', - 'lameddageshhebrew': '\uFB3C', - 'lamedhebrew': '\u05DC', - 'lamedholam': '\u05DC\u05B9', - 'lamedholamdagesh': '\u05DC\u05B9\u05BC', - 'lamedholamdageshhebrew': '\u05DC\u05B9\u05BC', - 'lamedholamhebrew': '\u05DC\u05B9', - 'lamfinalarabic': '\uFEDE', - 'lamhahinitialarabic': '\uFCCA', - 'laminitialarabic': '\uFEDF', - 'lamjeeminitialarabic': '\uFCC9', - 'lamkhahinitialarabic': '\uFCCB', - 'lamlamhehisolatedarabic': '\uFDF2', - 'lammedialarabic': '\uFEE0', - 'lammeemhahinitialarabic': '\uFD88', - 'lammeeminitialarabic': '\uFCCC', - 'lammeemjeeminitialarabic': '\uFEDF\uFEE4\uFEA0', - 'lammeemkhahinitialarabic': '\uFEDF\uFEE4\uFEA8', - 'largecircle': '\u25EF', - 'lbar': '\u019A', - 'lbelt': '\u026C', - 'lbopomofo': '\u310C', - 'lcaron': '\u013E', - 'lcedilla': '\u013C', - 'lcircle': '\u24DB', - 'lcircumflexbelow': '\u1E3D', - 'lcommaaccent': '\u013C', - 'ldot': '\u0140', - 'ldotaccent': '\u0140', - 'ldotbelow': '\u1E37', - 'ldotbelowmacron': '\u1E39', - 'leftangleabovecmb': '\u031A', - 'lefttackbelowcmb': '\u0318', - 'less': '\u003C', - 'lessequal': '\u2264', - 'lessequalorgreater': '\u22DA', - 'lessmonospace': '\uFF1C', - 'lessorequivalent': '\u2272', - 'lessorgreater': '\u2276', - 'lessoverequal': '\u2266', - 'lesssmall': '\uFE64', - 'lezh': '\u026E', - 'lfblock': '\u258C', - 'lhookretroflex': '\u026D', - 'lira': '\u20A4', - 'liwnarmenian': '\u056C', - 'lj': '\u01C9', - 'ljecyrillic': '\u0459', - 'll': '\uF6C0', - 'lladeva': '\u0933', - 'llagujarati': '\u0AB3', - 'llinebelow': '\u1E3B', - 'llladeva': '\u0934', - 'llvocalicbengali': '\u09E1', - 'llvocalicdeva': '\u0961', - 'llvocalicvowelsignbengali': '\u09E3', - 'llvocalicvowelsigndeva': '\u0963', - 'lmiddletilde': '\u026B', - 'lmonospace': '\uFF4C', - 'lmsquare': '\u33D0', - 'lochulathai': '\u0E2C', - 'logicaland': '\u2227', - 'logicalnot': '\u00AC', - 'logicalnotreversed': '\u2310', - 'logicalor': '\u2228', - 'lolingthai': '\u0E25', - 'longs': '\u017F', - 'lowlinecenterline': '\uFE4E', - 'lowlinecmb': '\u0332', - 'lowlinedashed': '\uFE4D', - 'lozenge': '\u25CA', - 'lparen': '\u24A7', - 'lslash': '\u0142', - 'lsquare': '\u2113', - 'lsuperior': '\uF6EE', - 'ltshade': '\u2591', - 'luthai': '\u0E26', - 'lvocalicbengali': '\u098C', - 'lvocalicdeva': '\u090C', - 'lvocalicvowelsignbengali': '\u09E2', - 'lvocalicvowelsigndeva': '\u0962', - 'lxsquare': '\u33D3', - 'm': '\u006D', - 'mabengali': '\u09AE', - 'macron': '\u00AF', - 'macronbelowcmb': '\u0331', - 'macroncmb': '\u0304', - 'macronlowmod': '\u02CD', - 'macronmonospace': '\uFFE3', - 'macute': '\u1E3F', - 'madeva': '\u092E', - 'magujarati': '\u0AAE', - 'magurmukhi': '\u0A2E', - 'mahapakhhebrew': '\u05A4', - 'mahapakhlefthebrew': '\u05A4', - 'mahiragana': '\u307E', - 'maichattawalowleftthai': '\uF895', - 'maichattawalowrightthai': '\uF894', - 'maichattawathai': '\u0E4B', - 'maichattawaupperleftthai': '\uF893', - 'maieklowleftthai': '\uF88C', - 'maieklowrightthai': '\uF88B', - 'maiekthai': '\u0E48', - 'maiekupperleftthai': '\uF88A', - 'maihanakatleftthai': '\uF884', - 'maihanakatthai': '\u0E31', - 'maitaikhuleftthai': '\uF889', - 'maitaikhuthai': '\u0E47', - 'maitholowleftthai': '\uF88F', - 'maitholowrightthai': '\uF88E', - 'maithothai': '\u0E49', - 'maithoupperleftthai': '\uF88D', - 'maitrilowleftthai': '\uF892', - 'maitrilowrightthai': '\uF891', - 'maitrithai': '\u0E4A', - 'maitriupperleftthai': '\uF890', - 'maiyamokthai': '\u0E46', - 'makatakana': '\u30DE', - 'makatakanahalfwidth': '\uFF8F', - 'male': '\u2642', - 'mansyonsquare': '\u3347', - 'maqafhebrew': '\u05BE', - 'mars': '\u2642', - 'masoracirclehebrew': '\u05AF', - 'masquare': '\u3383', - 'mbopomofo': '\u3107', - 'mbsquare': '\u33D4', - 'mcircle': '\u24DC', - 'mcubedsquare': '\u33A5', - 'mdotaccent': '\u1E41', - 'mdotbelow': '\u1E43', - 'meemarabic': '\u0645', - 'meemfinalarabic': '\uFEE2', - 'meeminitialarabic': '\uFEE3', - 'meemmedialarabic': '\uFEE4', - 'meemmeeminitialarabic': '\uFCD1', - 'meemmeemisolatedarabic': '\uFC48', - 'meetorusquare': '\u334D', - 'mehiragana': '\u3081', - 'meizierasquare': '\u337E', - 'mekatakana': '\u30E1', - 'mekatakanahalfwidth': '\uFF92', - 'mem': '\u05DE', - 'memdagesh': '\uFB3E', - 'memdageshhebrew': '\uFB3E', - 'memhebrew': '\u05DE', - 'menarmenian': '\u0574', - 'merkhahebrew': '\u05A5', - 'merkhakefulahebrew': '\u05A6', - 'merkhakefulalefthebrew': '\u05A6', - 'merkhalefthebrew': '\u05A5', - 'mhook': '\u0271', - 'mhzsquare': '\u3392', - 'middledotkatakanahalfwidth': '\uFF65', - 'middot': '\u00B7', - 'mieumacirclekorean': '\u3272', - 'mieumaparenkorean': '\u3212', - 'mieumcirclekorean': '\u3264', - 'mieumkorean': '\u3141', - 'mieumpansioskorean': '\u3170', - 'mieumparenkorean': '\u3204', - 'mieumpieupkorean': '\u316E', - 'mieumsioskorean': '\u316F', - 'mihiragana': '\u307F', - 'mikatakana': '\u30DF', - 'mikatakanahalfwidth': '\uFF90', - 'minus': '\u2212', - 'minusbelowcmb': '\u0320', - 'minuscircle': '\u2296', - 'minusmod': '\u02D7', - 'minusplus': '\u2213', - 'minute': '\u2032', - 'miribaarusquare': '\u334A', - 'mirisquare': '\u3349', - 'mlonglegturned': '\u0270', - 'mlsquare': '\u3396', - 'mmcubedsquare': '\u33A3', - 'mmonospace': '\uFF4D', - 'mmsquaredsquare': '\u339F', - 'mohiragana': '\u3082', - 'mohmsquare': '\u33C1', - 'mokatakana': '\u30E2', - 'mokatakanahalfwidth': '\uFF93', - 'molsquare': '\u33D6', - 'momathai': '\u0E21', - 'moverssquare': '\u33A7', - 'moverssquaredsquare': '\u33A8', - 'mparen': '\u24A8', - 'mpasquare': '\u33AB', - 'mssquare': '\u33B3', - 'msuperior': '\uF6EF', - 'mturned': '\u026F', - 'mu': '\u00B5', - 'mu1': '\u00B5', - 'muasquare': '\u3382', - 'muchgreater': '\u226B', - 'muchless': '\u226A', - 'mufsquare': '\u338C', - 'mugreek': '\u03BC', - 'mugsquare': '\u338D', - 'muhiragana': '\u3080', - 'mukatakana': '\u30E0', - 'mukatakanahalfwidth': '\uFF91', - 'mulsquare': '\u3395', - 'multiply': '\u00D7', - 'mumsquare': '\u339B', - 'munahhebrew': '\u05A3', - 'munahlefthebrew': '\u05A3', - 'musicalnote': '\u266A', - 'musicalnotedbl': '\u266B', - 'musicflatsign': '\u266D', - 'musicsharpsign': '\u266F', - 'mussquare': '\u33B2', - 'muvsquare': '\u33B6', - 'muwsquare': '\u33BC', - 'mvmegasquare': '\u33B9', - 'mvsquare': '\u33B7', - 'mwmegasquare': '\u33BF', - 'mwsquare': '\u33BD', - 'n': '\u006E', - 'nabengali': '\u09A8', - 'nabla': '\u2207', - 'nacute': '\u0144', - 'nadeva': '\u0928', - 'nagujarati': '\u0AA8', - 'nagurmukhi': '\u0A28', - 'nahiragana': '\u306A', - 'nakatakana': '\u30CA', - 'nakatakanahalfwidth': '\uFF85', - 'napostrophe': '\u0149', - 'nasquare': '\u3381', - 'nbopomofo': '\u310B', - 'nbspace': '\u00A0', - 'ncaron': '\u0148', - 'ncedilla': '\u0146', - 'ncircle': '\u24DD', - 'ncircumflexbelow': '\u1E4B', - 'ncommaaccent': '\u0146', - 'ndotaccent': '\u1E45', - 'ndotbelow': '\u1E47', - 'nehiragana': '\u306D', - 'nekatakana': '\u30CD', - 'nekatakanahalfwidth': '\uFF88', - 'newsheqelsign': '\u20AA', - 'nfsquare': '\u338B', - 'ngabengali': '\u0999', - 'ngadeva': '\u0919', - 'ngagujarati': '\u0A99', - 'ngagurmukhi': '\u0A19', - 'ngonguthai': '\u0E07', - 'nhiragana': '\u3093', - 'nhookleft': '\u0272', - 'nhookretroflex': '\u0273', - 'nieunacirclekorean': '\u326F', - 'nieunaparenkorean': '\u320F', - 'nieuncieuckorean': '\u3135', - 'nieuncirclekorean': '\u3261', - 'nieunhieuhkorean': '\u3136', - 'nieunkorean': '\u3134', - 'nieunpansioskorean': '\u3168', - 'nieunparenkorean': '\u3201', - 'nieunsioskorean': '\u3167', - 'nieuntikeutkorean': '\u3166', - 'nihiragana': '\u306B', - 'nikatakana': '\u30CB', - 'nikatakanahalfwidth': '\uFF86', - 'nikhahitleftthai': '\uF899', - 'nikhahitthai': '\u0E4D', - 'nine': '\u0039', - 'ninearabic': '\u0669', - 'ninebengali': '\u09EF', - 'ninecircle': '\u2468', - 'ninecircleinversesansserif': '\u2792', - 'ninedeva': '\u096F', - 'ninegujarati': '\u0AEF', - 'ninegurmukhi': '\u0A6F', - 'ninehackarabic': '\u0669', - 'ninehangzhou': '\u3029', - 'nineideographicparen': '\u3228', - 'nineinferior': '\u2089', - 'ninemonospace': '\uFF19', - 'nineoldstyle': '\uF739', - 'nineparen': '\u247C', - 'nineperiod': '\u2490', - 'ninepersian': '\u06F9', - 'nineroman': '\u2178', - 'ninesuperior': '\u2079', - 'nineteencircle': '\u2472', - 'nineteenparen': '\u2486', - 'nineteenperiod': '\u249A', - 'ninethai': '\u0E59', - 'nj': '\u01CC', - 'njecyrillic': '\u045A', - 'nkatakana': '\u30F3', - 'nkatakanahalfwidth': '\uFF9D', - 'nlegrightlong': '\u019E', - 'nlinebelow': '\u1E49', - 'nmonospace': '\uFF4E', - 'nmsquare': '\u339A', - 'nnabengali': '\u09A3', - 'nnadeva': '\u0923', - 'nnagujarati': '\u0AA3', - 'nnagurmukhi': '\u0A23', - 'nnnadeva': '\u0929', - 'nohiragana': '\u306E', - 'nokatakana': '\u30CE', - 'nokatakanahalfwidth': '\uFF89', - 'nonbreakingspace': '\u00A0', - 'nonenthai': '\u0E13', - 'nonuthai': '\u0E19', - 'noonarabic': '\u0646', - 'noonfinalarabic': '\uFEE6', - 'noonghunnaarabic': '\u06BA', - 'noonghunnafinalarabic': '\uFB9F', - 'noonhehinitialarabic': '\uFEE7\uFEEC', - 'nooninitialarabic': '\uFEE7', - 'noonjeeminitialarabic': '\uFCD2', - 'noonjeemisolatedarabic': '\uFC4B', - 'noonmedialarabic': '\uFEE8', - 'noonmeeminitialarabic': '\uFCD5', - 'noonmeemisolatedarabic': '\uFC4E', - 'noonnoonfinalarabic': '\uFC8D', - 'notcontains': '\u220C', - 'notelement': '\u2209', - 'notelementof': '\u2209', - 'notequal': '\u2260', - 'notgreater': '\u226F', - 'notgreaternorequal': '\u2271', - 'notgreaternorless': '\u2279', - 'notidentical': '\u2262', - 'notless': '\u226E', - 'notlessnorequal': '\u2270', - 'notparallel': '\u2226', - 'notprecedes': '\u2280', - 'notsubset': '\u2284', - 'notsucceeds': '\u2281', - 'notsuperset': '\u2285', - 'nowarmenian': '\u0576', - 'nparen': '\u24A9', - 'nssquare': '\u33B1', - 'nsuperior': '\u207F', - 'ntilde': '\u00F1', - 'nu': '\u03BD', - 'nuhiragana': '\u306C', - 'nukatakana': '\u30CC', - 'nukatakanahalfwidth': '\uFF87', - 'nuktabengali': '\u09BC', - 'nuktadeva': '\u093C', - 'nuktagujarati': '\u0ABC', - 'nuktagurmukhi': '\u0A3C', - 'numbersign': '\u0023', - 'numbersignmonospace': '\uFF03', - 'numbersignsmall': '\uFE5F', - 'numeralsigngreek': '\u0374', - 'numeralsignlowergreek': '\u0375', - 'numero': '\u2116', - 'nun': '\u05E0', - 'nundagesh': '\uFB40', - 'nundageshhebrew': '\uFB40', - 'nunhebrew': '\u05E0', - 'nvsquare': '\u33B5', - 'nwsquare': '\u33BB', - 'nyabengali': '\u099E', - 'nyadeva': '\u091E', - 'nyagujarati': '\u0A9E', - 'nyagurmukhi': '\u0A1E', - 'o': '\u006F', - 'oacute': '\u00F3', - 'oangthai': '\u0E2D', - 'obarred': '\u0275', - 'obarredcyrillic': '\u04E9', - 'obarreddieresiscyrillic': '\u04EB', - 'obengali': '\u0993', - 'obopomofo': '\u311B', - 'obreve': '\u014F', - 'ocandradeva': '\u0911', - 'ocandragujarati': '\u0A91', - 'ocandravowelsigndeva': '\u0949', - 'ocandravowelsigngujarati': '\u0AC9', - 'ocaron': '\u01D2', - 'ocircle': '\u24DE', - 'ocircumflex': '\u00F4', - 'ocircumflexacute': '\u1ED1', - 'ocircumflexdotbelow': '\u1ED9', - 'ocircumflexgrave': '\u1ED3', - 'ocircumflexhookabove': '\u1ED5', - 'ocircumflextilde': '\u1ED7', - 'ocyrillic': '\u043E', - 'odblacute': '\u0151', - 'odblgrave': '\u020D', - 'odeva': '\u0913', - 'odieresis': '\u00F6', - 'odieresiscyrillic': '\u04E7', - 'odotbelow': '\u1ECD', - 'oe': '\u0153', - 'oekorean': '\u315A', - 'ogonek': '\u02DB', - 'ogonekcmb': '\u0328', - 'ograve': '\u00F2', - 'ogujarati': '\u0A93', - 'oharmenian': '\u0585', - 'ohiragana': '\u304A', - 'ohookabove': '\u1ECF', - 'ohorn': '\u01A1', - 'ohornacute': '\u1EDB', - 'ohorndotbelow': '\u1EE3', - 'ohorngrave': '\u1EDD', - 'ohornhookabove': '\u1EDF', - 'ohorntilde': '\u1EE1', - 'ohungarumlaut': '\u0151', - 'oi': '\u01A3', - 'oinvertedbreve': '\u020F', - 'okatakana': '\u30AA', - 'okatakanahalfwidth': '\uFF75', - 'okorean': '\u3157', - 'olehebrew': '\u05AB', - 'omacron': '\u014D', - 'omacronacute': '\u1E53', - 'omacrongrave': '\u1E51', - 'omdeva': '\u0950', - 'omega': '\u03C9', - 'omega1': '\u03D6', - 'omegacyrillic': '\u0461', - 'omegalatinclosed': '\u0277', - 'omegaroundcyrillic': '\u047B', - 'omegatitlocyrillic': '\u047D', - 'omegatonos': '\u03CE', - 'omgujarati': '\u0AD0', - 'omicron': '\u03BF', - 'omicrontonos': '\u03CC', - 'omonospace': '\uFF4F', - 'one': '\u0031', - 'onearabic': '\u0661', - 'onebengali': '\u09E7', - 'onecircle': '\u2460', - 'onecircleinversesansserif': '\u278A', - 'onedeva': '\u0967', - 'onedotenleader': '\u2024', - 'oneeighth': '\u215B', - 'onefitted': '\uF6DC', - 'onegujarati': '\u0AE7', - 'onegurmukhi': '\u0A67', - 'onehackarabic': '\u0661', - 'onehalf': '\u00BD', - 'onehangzhou': '\u3021', - 'oneideographicparen': '\u3220', - 'oneinferior': '\u2081', - 'onemonospace': '\uFF11', - 'onenumeratorbengali': '\u09F4', - 'oneoldstyle': '\uF731', - 'oneparen': '\u2474', - 'oneperiod': '\u2488', - 'onepersian': '\u06F1', - 'onequarter': '\u00BC', - 'oneroman': '\u2170', - 'onesuperior': '\u00B9', - 'onethai': '\u0E51', - 'onethird': '\u2153', - 'oogonek': '\u01EB', - 'oogonekmacron': '\u01ED', - 'oogurmukhi': '\u0A13', - 'oomatragurmukhi': '\u0A4B', - 'oopen': '\u0254', - 'oparen': '\u24AA', - 'openbullet': '\u25E6', - 'option': '\u2325', - 'ordfeminine': '\u00AA', - 'ordmasculine': '\u00BA', - 'orthogonal': '\u221F', - 'oshortdeva': '\u0912', - 'oshortvowelsigndeva': '\u094A', - 'oslash': '\u00F8', - 'oslashacute': '\u01FF', - 'osmallhiragana': '\u3049', - 'osmallkatakana': '\u30A9', - 'osmallkatakanahalfwidth': '\uFF6B', - 'ostrokeacute': '\u01FF', - 'osuperior': '\uF6F0', - 'otcyrillic': '\u047F', - 'otilde': '\u00F5', - 'otildeacute': '\u1E4D', - 'otildedieresis': '\u1E4F', - 'oubopomofo': '\u3121', - 'overline': '\u203E', - 'overlinecenterline': '\uFE4A', - 'overlinecmb': '\u0305', - 'overlinedashed': '\uFE49', - 'overlinedblwavy': '\uFE4C', - 'overlinewavy': '\uFE4B', - 'overscore': '\u00AF', - 'ovowelsignbengali': '\u09CB', - 'ovowelsigndeva': '\u094B', - 'ovowelsigngujarati': '\u0ACB', - 'p': '\u0070', - 'paampssquare': '\u3380', - 'paasentosquare': '\u332B', - 'pabengali': '\u09AA', - 'pacute': '\u1E55', - 'padeva': '\u092A', - 'pagedown': '\u21DF', - 'pageup': '\u21DE', - 'pagujarati': '\u0AAA', - 'pagurmukhi': '\u0A2A', - 'pahiragana': '\u3071', - 'paiyannoithai': '\u0E2F', - 'pakatakana': '\u30D1', - 'palatalizationcyrilliccmb': '\u0484', - 'palochkacyrillic': '\u04C0', - 'pansioskorean': '\u317F', - 'paragraph': '\u00B6', - 'parallel': '\u2225', - 'parenleft': '\u0028', - 'parenleftaltonearabic': '\uFD3E', - 'parenleftbt': '\uF8ED', - 'parenleftex': '\uF8EC', - 'parenleftinferior': '\u208D', - 'parenleftmonospace': '\uFF08', - 'parenleftsmall': '\uFE59', - 'parenleftsuperior': '\u207D', - 'parenlefttp': '\uF8EB', - 'parenleftvertical': '\uFE35', - 'parenright': '\u0029', - 'parenrightaltonearabic': '\uFD3F', - 'parenrightbt': '\uF8F8', - 'parenrightex': '\uF8F7', - 'parenrightinferior': '\u208E', - 'parenrightmonospace': '\uFF09', - 'parenrightsmall': '\uFE5A', - 'parenrightsuperior': '\u207E', - 'parenrighttp': '\uF8F6', - 'parenrightvertical': '\uFE36', - 'partialdiff': '\u2202', - 'paseqhebrew': '\u05C0', - 'pashtahebrew': '\u0599', - 'pasquare': '\u33A9', - 'patah': '\u05B7', - 'patah11': '\u05B7', - 'patah1d': '\u05B7', - 'patah2a': '\u05B7', - 'patahhebrew': '\u05B7', - 'patahnarrowhebrew': '\u05B7', - 'patahquarterhebrew': '\u05B7', - 'patahwidehebrew': '\u05B7', - 'pazerhebrew': '\u05A1', - 'pbopomofo': '\u3106', - 'pcircle': '\u24DF', - 'pdotaccent': '\u1E57', - 'pe': '\u05E4', - 'pecyrillic': '\u043F', - 'pedagesh': '\uFB44', - 'pedageshhebrew': '\uFB44', - 'peezisquare': '\u333B', - 'pefinaldageshhebrew': '\uFB43', - 'peharabic': '\u067E', - 'peharmenian': '\u057A', - 'pehebrew': '\u05E4', - 'pehfinalarabic': '\uFB57', - 'pehinitialarabic': '\uFB58', - 'pehiragana': '\u307A', - 'pehmedialarabic': '\uFB59', - 'pekatakana': '\u30DA', - 'pemiddlehookcyrillic': '\u04A7', - 'perafehebrew': '\uFB4E', - 'percent': '\u0025', - 'percentarabic': '\u066A', - 'percentmonospace': '\uFF05', - 'percentsmall': '\uFE6A', - 'period': '\u002E', - 'periodarmenian': '\u0589', - 'periodcentered': '\u00B7', - 'periodhalfwidth': '\uFF61', - 'periodinferior': '\uF6E7', - 'periodmonospace': '\uFF0E', - 'periodsmall': '\uFE52', - 'periodsuperior': '\uF6E8', - 'perispomenigreekcmb': '\u0342', - 'perpendicular': '\u22A5', - 'perthousand': '\u2030', - 'peseta': '\u20A7', - 'pfsquare': '\u338A', - 'phabengali': '\u09AB', - 'phadeva': '\u092B', - 'phagujarati': '\u0AAB', - 'phagurmukhi': '\u0A2B', - 'phi': '\u03C6', - 'phi1': '\u03D5', - 'phieuphacirclekorean': '\u327A', - 'phieuphaparenkorean': '\u321A', - 'phieuphcirclekorean': '\u326C', - 'phieuphkorean': '\u314D', - 'phieuphparenkorean': '\u320C', - 'philatin': '\u0278', - 'phinthuthai': '\u0E3A', - 'phisymbolgreek': '\u03D5', - 'phook': '\u01A5', - 'phophanthai': '\u0E1E', - 'phophungthai': '\u0E1C', - 'phosamphaothai': '\u0E20', - 'pi': '\u03C0', - 'pieupacirclekorean': '\u3273', - 'pieupaparenkorean': '\u3213', - 'pieupcieuckorean': '\u3176', - 'pieupcirclekorean': '\u3265', - 'pieupkiyeokkorean': '\u3172', - 'pieupkorean': '\u3142', - 'pieupparenkorean': '\u3205', - 'pieupsioskiyeokkorean': '\u3174', - 'pieupsioskorean': '\u3144', - 'pieupsiostikeutkorean': '\u3175', - 'pieupthieuthkorean': '\u3177', - 'pieuptikeutkorean': '\u3173', - 'pihiragana': '\u3074', - 'pikatakana': '\u30D4', - 'pisymbolgreek': '\u03D6', - 'piwrarmenian': '\u0583', - 'plus': '\u002B', - 'plusbelowcmb': '\u031F', - 'pluscircle': '\u2295', - 'plusminus': '\u00B1', - 'plusmod': '\u02D6', - 'plusmonospace': '\uFF0B', - 'plussmall': '\uFE62', - 'plussuperior': '\u207A', - 'pmonospace': '\uFF50', - 'pmsquare': '\u33D8', - 'pohiragana': '\u307D', - 'pointingindexdownwhite': '\u261F', - 'pointingindexleftwhite': '\u261C', - 'pointingindexrightwhite': '\u261E', - 'pointingindexupwhite': '\u261D', - 'pokatakana': '\u30DD', - 'poplathai': '\u0E1B', - 'postalmark': '\u3012', - 'postalmarkface': '\u3020', - 'pparen': '\u24AB', - 'precedes': '\u227A', - 'prescription': '\u211E', - 'primemod': '\u02B9', - 'primereversed': '\u2035', - 'product': '\u220F', - 'projective': '\u2305', - 'prolongedkana': '\u30FC', - 'propellor': '\u2318', - 'propersubset': '\u2282', - 'propersuperset': '\u2283', - 'proportion': '\u2237', - 'proportional': '\u221D', - 'psi': '\u03C8', - 'psicyrillic': '\u0471', - 'psilipneumatacyrilliccmb': '\u0486', - 'pssquare': '\u33B0', - 'puhiragana': '\u3077', - 'pukatakana': '\u30D7', - 'pvsquare': '\u33B4', - 'pwsquare': '\u33BA', - 'q': '\u0071', - 'qadeva': '\u0958', - 'qadmahebrew': '\u05A8', - 'qafarabic': '\u0642', - 'qaffinalarabic': '\uFED6', - 'qafinitialarabic': '\uFED7', - 'qafmedialarabic': '\uFED8', - 'qamats': '\u05B8', - 'qamats10': '\u05B8', - 'qamats1a': '\u05B8', - 'qamats1c': '\u05B8', - 'qamats27': '\u05B8', - 'qamats29': '\u05B8', - 'qamats33': '\u05B8', - 'qamatsde': '\u05B8', - 'qamatshebrew': '\u05B8', - 'qamatsnarrowhebrew': '\u05B8', - 'qamatsqatanhebrew': '\u05B8', - 'qamatsqatannarrowhebrew': '\u05B8', - 'qamatsqatanquarterhebrew': '\u05B8', - 'qamatsqatanwidehebrew': '\u05B8', - 'qamatsquarterhebrew': '\u05B8', - 'qamatswidehebrew': '\u05B8', - 'qarneyparahebrew': '\u059F', - 'qbopomofo': '\u3111', - 'qcircle': '\u24E0', - 'qhook': '\u02A0', - 'qmonospace': '\uFF51', - 'qof': '\u05E7', - 'qofdagesh': '\uFB47', - 'qofdageshhebrew': '\uFB47', - 'qofhatafpatah': '\u05E7\u05B2', - 'qofhatafpatahhebrew': '\u05E7\u05B2', - 'qofhatafsegol': '\u05E7\u05B1', - 'qofhatafsegolhebrew': '\u05E7\u05B1', - 'qofhebrew': '\u05E7', - 'qofhiriq': '\u05E7\u05B4', - 'qofhiriqhebrew': '\u05E7\u05B4', - 'qofholam': '\u05E7\u05B9', - 'qofholamhebrew': '\u05E7\u05B9', - 'qofpatah': '\u05E7\u05B7', - 'qofpatahhebrew': '\u05E7\u05B7', - 'qofqamats': '\u05E7\u05B8', - 'qofqamatshebrew': '\u05E7\u05B8', - 'qofqubuts': '\u05E7\u05BB', - 'qofqubutshebrew': '\u05E7\u05BB', - 'qofsegol': '\u05E7\u05B6', - 'qofsegolhebrew': '\u05E7\u05B6', - 'qofsheva': '\u05E7\u05B0', - 'qofshevahebrew': '\u05E7\u05B0', - 'qoftsere': '\u05E7\u05B5', - 'qoftserehebrew': '\u05E7\u05B5', - 'qparen': '\u24AC', - 'quarternote': '\u2669', - 'qubuts': '\u05BB', - 'qubuts18': '\u05BB', - 'qubuts25': '\u05BB', - 'qubuts31': '\u05BB', - 'qubutshebrew': '\u05BB', - 'qubutsnarrowhebrew': '\u05BB', - 'qubutsquarterhebrew': '\u05BB', - 'qubutswidehebrew': '\u05BB', - 'question': '\u003F', - 'questionarabic': '\u061F', - 'questionarmenian': '\u055E', - 'questiondown': '\u00BF', - 'questiondownsmall': '\uF7BF', - 'questiongreek': '\u037E', - 'questionmonospace': '\uFF1F', - 'questionsmall': '\uF73F', - 'quotedbl': '\u0022', - 'quotedblbase': '\u201E', - 'quotedblleft': '\u201C', - 'quotedblmonospace': '\uFF02', - 'quotedblprime': '\u301E', - 'quotedblprimereversed': '\u301D', - 'quotedblright': '\u201D', - 'quoteleft': '\u2018', - 'quoteleftreversed': '\u201B', - 'quotereversed': '\u201B', - 'quoteright': '\u2019', - 'quoterightn': '\u0149', - 'quotesinglbase': '\u201A', - 'quotesingle': '\u0027', - 'quotesinglemonospace': '\uFF07', - 'r': '\u0072', - 'raarmenian': '\u057C', - 'rabengali': '\u09B0', - 'racute': '\u0155', - 'radeva': '\u0930', - 'radical': '\u221A', - 'radicalex': '\uF8E5', - 'radoverssquare': '\u33AE', - 'radoverssquaredsquare': '\u33AF', - 'radsquare': '\u33AD', - 'rafe': '\u05BF', - 'rafehebrew': '\u05BF', - 'ragujarati': '\u0AB0', - 'ragurmukhi': '\u0A30', - 'rahiragana': '\u3089', - 'rakatakana': '\u30E9', - 'rakatakanahalfwidth': '\uFF97', - 'ralowerdiagonalbengali': '\u09F1', - 'ramiddlediagonalbengali': '\u09F0', - 'ramshorn': '\u0264', - 'ratio': '\u2236', - 'rbopomofo': '\u3116', - 'rcaron': '\u0159', - 'rcedilla': '\u0157', - 'rcircle': '\u24E1', - 'rcommaaccent': '\u0157', - 'rdblgrave': '\u0211', - 'rdotaccent': '\u1E59', - 'rdotbelow': '\u1E5B', - 'rdotbelowmacron': '\u1E5D', - 'referencemark': '\u203B', - 'reflexsubset': '\u2286', - 'reflexsuperset': '\u2287', - 'registered': '\u00AE', - 'registersans': '\uF8E8', - 'registerserif': '\uF6DA', - 'reharabic': '\u0631', - 'reharmenian': '\u0580', - 'rehfinalarabic': '\uFEAE', - 'rehiragana': '\u308C', - 'rehyehaleflamarabic': '\u0631\uFEF3\uFE8E\u0644', - 'rekatakana': '\u30EC', - 'rekatakanahalfwidth': '\uFF9A', - 'resh': '\u05E8', - 'reshdageshhebrew': '\uFB48', - 'reshhatafpatah': '\u05E8\u05B2', - 'reshhatafpatahhebrew': '\u05E8\u05B2', - 'reshhatafsegol': '\u05E8\u05B1', - 'reshhatafsegolhebrew': '\u05E8\u05B1', - 'reshhebrew': '\u05E8', - 'reshhiriq': '\u05E8\u05B4', - 'reshhiriqhebrew': '\u05E8\u05B4', - 'reshholam': '\u05E8\u05B9', - 'reshholamhebrew': '\u05E8\u05B9', - 'reshpatah': '\u05E8\u05B7', - 'reshpatahhebrew': '\u05E8\u05B7', - 'reshqamats': '\u05E8\u05B8', - 'reshqamatshebrew': '\u05E8\u05B8', - 'reshqubuts': '\u05E8\u05BB', - 'reshqubutshebrew': '\u05E8\u05BB', - 'reshsegol': '\u05E8\u05B6', - 'reshsegolhebrew': '\u05E8\u05B6', - 'reshsheva': '\u05E8\u05B0', - 'reshshevahebrew': '\u05E8\u05B0', - 'reshtsere': '\u05E8\u05B5', - 'reshtserehebrew': '\u05E8\u05B5', - 'reversedtilde': '\u223D', - 'reviahebrew': '\u0597', - 'reviamugrashhebrew': '\u0597', - 'revlogicalnot': '\u2310', - 'rfishhook': '\u027E', - 'rfishhookreversed': '\u027F', - 'rhabengali': '\u09DD', - 'rhadeva': '\u095D', - 'rho': '\u03C1', - 'rhook': '\u027D', - 'rhookturned': '\u027B', - 'rhookturnedsuperior': '\u02B5', - 'rhosymbolgreek': '\u03F1', - 'rhotichookmod': '\u02DE', - 'rieulacirclekorean': '\u3271', - 'rieulaparenkorean': '\u3211', - 'rieulcirclekorean': '\u3263', - 'rieulhieuhkorean': '\u3140', - 'rieulkiyeokkorean': '\u313A', - 'rieulkiyeoksioskorean': '\u3169', - 'rieulkorean': '\u3139', - 'rieulmieumkorean': '\u313B', - 'rieulpansioskorean': '\u316C', - 'rieulparenkorean': '\u3203', - 'rieulphieuphkorean': '\u313F', - 'rieulpieupkorean': '\u313C', - 'rieulpieupsioskorean': '\u316B', - 'rieulsioskorean': '\u313D', - 'rieulthieuthkorean': '\u313E', - 'rieultikeutkorean': '\u316A', - 'rieulyeorinhieuhkorean': '\u316D', - 'rightangle': '\u221F', - 'righttackbelowcmb': '\u0319', - 'righttriangle': '\u22BF', - 'rihiragana': '\u308A', - 'rikatakana': '\u30EA', - 'rikatakanahalfwidth': '\uFF98', - 'ring': '\u02DA', - 'ringbelowcmb': '\u0325', - 'ringcmb': '\u030A', - 'ringhalfleft': '\u02BF', - 'ringhalfleftarmenian': '\u0559', - 'ringhalfleftbelowcmb': '\u031C', - 'ringhalfleftcentered': '\u02D3', - 'ringhalfright': '\u02BE', - 'ringhalfrightbelowcmb': '\u0339', - 'ringhalfrightcentered': '\u02D2', - 'rinvertedbreve': '\u0213', - 'rittorusquare': '\u3351', - 'rlinebelow': '\u1E5F', - 'rlongleg': '\u027C', - 'rlonglegturned': '\u027A', - 'rmonospace': '\uFF52', - 'rohiragana': '\u308D', - 'rokatakana': '\u30ED', - 'rokatakanahalfwidth': '\uFF9B', - 'roruathai': '\u0E23', - 'rparen': '\u24AD', - 'rrabengali': '\u09DC', - 'rradeva': '\u0931', - 'rragurmukhi': '\u0A5C', - 'rreharabic': '\u0691', - 'rrehfinalarabic': '\uFB8D', - 'rrvocalicbengali': '\u09E0', - 'rrvocalicdeva': '\u0960', - 'rrvocalicgujarati': '\u0AE0', - 'rrvocalicvowelsignbengali': '\u09C4', - 'rrvocalicvowelsigndeva': '\u0944', - 'rrvocalicvowelsigngujarati': '\u0AC4', - 'rsuperior': '\uF6F1', - 'rtblock': '\u2590', - 'rturned': '\u0279', - 'rturnedsuperior': '\u02B4', - 'ruhiragana': '\u308B', - 'rukatakana': '\u30EB', - 'rukatakanahalfwidth': '\uFF99', - 'rupeemarkbengali': '\u09F2', - 'rupeesignbengali': '\u09F3', - 'rupiah': '\uF6DD', - 'ruthai': '\u0E24', - 'rvocalicbengali': '\u098B', - 'rvocalicdeva': '\u090B', - 'rvocalicgujarati': '\u0A8B', - 'rvocalicvowelsignbengali': '\u09C3', - 'rvocalicvowelsigndeva': '\u0943', - 'rvocalicvowelsigngujarati': '\u0AC3', - 's': '\u0073', - 'sabengali': '\u09B8', - 'sacute': '\u015B', - 'sacutedotaccent': '\u1E65', - 'sadarabic': '\u0635', - 'sadeva': '\u0938', - 'sadfinalarabic': '\uFEBA', - 'sadinitialarabic': '\uFEBB', - 'sadmedialarabic': '\uFEBC', - 'sagujarati': '\u0AB8', - 'sagurmukhi': '\u0A38', - 'sahiragana': '\u3055', - 'sakatakana': '\u30B5', - 'sakatakanahalfwidth': '\uFF7B', - 'sallallahoualayhewasallamarabic': '\uFDFA', - 'samekh': '\u05E1', - 'samekhdagesh': '\uFB41', - 'samekhdageshhebrew': '\uFB41', - 'samekhhebrew': '\u05E1', - 'saraaathai': '\u0E32', - 'saraaethai': '\u0E41', - 'saraaimaimalaithai': '\u0E44', - 'saraaimaimuanthai': '\u0E43', - 'saraamthai': '\u0E33', - 'saraathai': '\u0E30', - 'saraethai': '\u0E40', - 'saraiileftthai': '\uF886', - 'saraiithai': '\u0E35', - 'saraileftthai': '\uF885', - 'saraithai': '\u0E34', - 'saraothai': '\u0E42', - 'saraueeleftthai': '\uF888', - 'saraueethai': '\u0E37', - 'saraueleftthai': '\uF887', - 'sarauethai': '\u0E36', - 'sarauthai': '\u0E38', - 'sarauuthai': '\u0E39', - 'sbopomofo': '\u3119', - 'scaron': '\u0161', - 'scarondotaccent': '\u1E67', - 'scedilla': '\u015F', - 'schwa': '\u0259', - 'schwacyrillic': '\u04D9', - 'schwadieresiscyrillic': '\u04DB', - 'schwahook': '\u025A', - 'scircle': '\u24E2', - 'scircumflex': '\u015D', - 'scommaaccent': '\u0219', - 'sdotaccent': '\u1E61', - 'sdotbelow': '\u1E63', - 'sdotbelowdotaccent': '\u1E69', - 'seagullbelowcmb': '\u033C', - 'second': '\u2033', - 'secondtonechinese': '\u02CA', - 'section': '\u00A7', - 'seenarabic': '\u0633', - 'seenfinalarabic': '\uFEB2', - 'seeninitialarabic': '\uFEB3', - 'seenmedialarabic': '\uFEB4', - 'segol': '\u05B6', - 'segol13': '\u05B6', - 'segol1f': '\u05B6', - 'segol2c': '\u05B6', - 'segolhebrew': '\u05B6', - 'segolnarrowhebrew': '\u05B6', - 'segolquarterhebrew': '\u05B6', - 'segoltahebrew': '\u0592', - 'segolwidehebrew': '\u05B6', - 'seharmenian': '\u057D', - 'sehiragana': '\u305B', - 'sekatakana': '\u30BB', - 'sekatakanahalfwidth': '\uFF7E', - 'semicolon': '\u003B', - 'semicolonarabic': '\u061B', - 'semicolonmonospace': '\uFF1B', - 'semicolonsmall': '\uFE54', - 'semivoicedmarkkana': '\u309C', - 'semivoicedmarkkanahalfwidth': '\uFF9F', - 'sentisquare': '\u3322', - 'sentosquare': '\u3323', - 'seven': '\u0037', - 'sevenarabic': '\u0667', - 'sevenbengali': '\u09ED', - 'sevencircle': '\u2466', - 'sevencircleinversesansserif': '\u2790', - 'sevendeva': '\u096D', - 'seveneighths': '\u215E', - 'sevengujarati': '\u0AED', - 'sevengurmukhi': '\u0A6D', - 'sevenhackarabic': '\u0667', - 'sevenhangzhou': '\u3027', - 'sevenideographicparen': '\u3226', - 'seveninferior': '\u2087', - 'sevenmonospace': '\uFF17', - 'sevenoldstyle': '\uF737', - 'sevenparen': '\u247A', - 'sevenperiod': '\u248E', - 'sevenpersian': '\u06F7', - 'sevenroman': '\u2176', - 'sevensuperior': '\u2077', - 'seventeencircle': '\u2470', - 'seventeenparen': '\u2484', - 'seventeenperiod': '\u2498', - 'seventhai': '\u0E57', - 'sfthyphen': '\u00AD', - 'shaarmenian': '\u0577', - 'shabengali': '\u09B6', - 'shacyrillic': '\u0448', - 'shaddaarabic': '\u0651', - 'shaddadammaarabic': '\uFC61', - 'shaddadammatanarabic': '\uFC5E', - 'shaddafathaarabic': '\uFC60', - 'shaddafathatanarabic': '\u0651\u064B', - 'shaddakasraarabic': '\uFC62', - 'shaddakasratanarabic': '\uFC5F', - 'shade': '\u2592', - 'shadedark': '\u2593', - 'shadelight': '\u2591', - 'shademedium': '\u2592', - 'shadeva': '\u0936', - 'shagujarati': '\u0AB6', - 'shagurmukhi': '\u0A36', - 'shalshelethebrew': '\u0593', - 'shbopomofo': '\u3115', - 'shchacyrillic': '\u0449', - 'sheenarabic': '\u0634', - 'sheenfinalarabic': '\uFEB6', - 'sheeninitialarabic': '\uFEB7', - 'sheenmedialarabic': '\uFEB8', - 'sheicoptic': '\u03E3', - 'sheqel': '\u20AA', - 'sheqelhebrew': '\u20AA', - 'sheva': '\u05B0', - 'sheva115': '\u05B0', - 'sheva15': '\u05B0', - 'sheva22': '\u05B0', - 'sheva2e': '\u05B0', - 'shevahebrew': '\u05B0', - 'shevanarrowhebrew': '\u05B0', - 'shevaquarterhebrew': '\u05B0', - 'shevawidehebrew': '\u05B0', - 'shhacyrillic': '\u04BB', - 'shimacoptic': '\u03ED', - 'shin': '\u05E9', - 'shindagesh': '\uFB49', - 'shindageshhebrew': '\uFB49', - 'shindageshshindot': '\uFB2C', - 'shindageshshindothebrew': '\uFB2C', - 'shindageshsindot': '\uFB2D', - 'shindageshsindothebrew': '\uFB2D', - 'shindothebrew': '\u05C1', - 'shinhebrew': '\u05E9', - 'shinshindot': '\uFB2A', - 'shinshindothebrew': '\uFB2A', - 'shinsindot': '\uFB2B', - 'shinsindothebrew': '\uFB2B', - 'shook': '\u0282', - 'sigma': '\u03C3', - 'sigma1': '\u03C2', - 'sigmafinal': '\u03C2', - 'sigmalunatesymbolgreek': '\u03F2', - 'sihiragana': '\u3057', - 'sikatakana': '\u30B7', - 'sikatakanahalfwidth': '\uFF7C', - 'siluqhebrew': '\u05BD', - 'siluqlefthebrew': '\u05BD', - 'similar': '\u223C', - 'sindothebrew': '\u05C2', - 'siosacirclekorean': '\u3274', - 'siosaparenkorean': '\u3214', - 'sioscieuckorean': '\u317E', - 'sioscirclekorean': '\u3266', - 'sioskiyeokkorean': '\u317A', - 'sioskorean': '\u3145', - 'siosnieunkorean': '\u317B', - 'siosparenkorean': '\u3206', - 'siospieupkorean': '\u317D', - 'siostikeutkorean': '\u317C', - 'six': '\u0036', - 'sixarabic': '\u0666', - 'sixbengali': '\u09EC', - 'sixcircle': '\u2465', - 'sixcircleinversesansserif': '\u278F', - 'sixdeva': '\u096C', - 'sixgujarati': '\u0AEC', - 'sixgurmukhi': '\u0A6C', - 'sixhackarabic': '\u0666', - 'sixhangzhou': '\u3026', - 'sixideographicparen': '\u3225', - 'sixinferior': '\u2086', - 'sixmonospace': '\uFF16', - 'sixoldstyle': '\uF736', - 'sixparen': '\u2479', - 'sixperiod': '\u248D', - 'sixpersian': '\u06F6', - 'sixroman': '\u2175', - 'sixsuperior': '\u2076', - 'sixteencircle': '\u246F', - 'sixteencurrencydenominatorbengali': '\u09F9', - 'sixteenparen': '\u2483', - 'sixteenperiod': '\u2497', - 'sixthai': '\u0E56', - 'slash': '\u002F', - 'slashmonospace': '\uFF0F', - 'slong': '\u017F', - 'slongdotaccent': '\u1E9B', - 'smileface': '\u263A', - 'smonospace': '\uFF53', - 'sofpasuqhebrew': '\u05C3', - 'softhyphen': '\u00AD', - 'softsigncyrillic': '\u044C', - 'sohiragana': '\u305D', - 'sokatakana': '\u30BD', - 'sokatakanahalfwidth': '\uFF7F', - 'soliduslongoverlaycmb': '\u0338', - 'solidusshortoverlaycmb': '\u0337', - 'sorusithai': '\u0E29', - 'sosalathai': '\u0E28', - 'sosothai': '\u0E0B', - 'sosuathai': '\u0E2A', - 'space': '\u0020', - 'spacehackarabic': '\u0020', - 'spade': '\u2660', - 'spadesuitblack': '\u2660', - 'spadesuitwhite': '\u2664', - 'sparen': '\u24AE', - 'squarebelowcmb': '\u033B', - 'squarecc': '\u33C4', - 'squarecm': '\u339D', - 'squarediagonalcrosshatchfill': '\u25A9', - 'squarehorizontalfill': '\u25A4', - 'squarekg': '\u338F', - 'squarekm': '\u339E', - 'squarekmcapital': '\u33CE', - 'squareln': '\u33D1', - 'squarelog': '\u33D2', - 'squaremg': '\u338E', - 'squaremil': '\u33D5', - 'squaremm': '\u339C', - 'squaremsquared': '\u33A1', - 'squareorthogonalcrosshatchfill': '\u25A6', - 'squareupperlefttolowerrightfill': '\u25A7', - 'squareupperrighttolowerleftfill': '\u25A8', - 'squareverticalfill': '\u25A5', - 'squarewhitewithsmallblack': '\u25A3', - 'srsquare': '\u33DB', - 'ssabengali': '\u09B7', - 'ssadeva': '\u0937', - 'ssagujarati': '\u0AB7', - 'ssangcieuckorean': '\u3149', - 'ssanghieuhkorean': '\u3185', - 'ssangieungkorean': '\u3180', - 'ssangkiyeokkorean': '\u3132', - 'ssangnieunkorean': '\u3165', - 'ssangpieupkorean': '\u3143', - 'ssangsioskorean': '\u3146', - 'ssangtikeutkorean': '\u3138', - 'ssuperior': '\uF6F2', - 'sterling': '\u00A3', - 'sterlingmonospace': '\uFFE1', - 'strokelongoverlaycmb': '\u0336', - 'strokeshortoverlaycmb': '\u0335', - 'subset': '\u2282', - 'subsetnotequal': '\u228A', - 'subsetorequal': '\u2286', - 'succeeds': '\u227B', - 'suchthat': '\u220B', - 'suhiragana': '\u3059', - 'sukatakana': '\u30B9', - 'sukatakanahalfwidth': '\uFF7D', - 'sukunarabic': '\u0652', - 'summation': '\u2211', - 'sun': '\u263C', - 'superset': '\u2283', - 'supersetnotequal': '\u228B', - 'supersetorequal': '\u2287', - 'svsquare': '\u33DC', - 'syouwaerasquare': '\u337C', - 't': '\u0074', - 'tabengali': '\u09A4', - 'tackdown': '\u22A4', - 'tackleft': '\u22A3', - 'tadeva': '\u0924', - 'tagujarati': '\u0AA4', - 'tagurmukhi': '\u0A24', - 'taharabic': '\u0637', - 'tahfinalarabic': '\uFEC2', - 'tahinitialarabic': '\uFEC3', - 'tahiragana': '\u305F', - 'tahmedialarabic': '\uFEC4', - 'taisyouerasquare': '\u337D', - 'takatakana': '\u30BF', - 'takatakanahalfwidth': '\uFF80', - 'tatweelarabic': '\u0640', - 'tau': '\u03C4', - 'tav': '\u05EA', - 'tavdages': '\uFB4A', - 'tavdagesh': '\uFB4A', - 'tavdageshhebrew': '\uFB4A', - 'tavhebrew': '\u05EA', - 'tbar': '\u0167', - 'tbopomofo': '\u310A', - 'tcaron': '\u0165', - 'tccurl': '\u02A8', - 'tcedilla': '\u0163', - 'tcheharabic': '\u0686', - 'tchehfinalarabic': '\uFB7B', - 'tchehinitialarabic': '\uFB7C', - 'tchehmedialarabic': '\uFB7D', - 'tchehmeeminitialarabic': '\uFB7C\uFEE4', - 'tcircle': '\u24E3', - 'tcircumflexbelow': '\u1E71', - 'tcommaaccent': '\u0163', - 'tdieresis': '\u1E97', - 'tdotaccent': '\u1E6B', - 'tdotbelow': '\u1E6D', - 'tecyrillic': '\u0442', - 'tedescendercyrillic': '\u04AD', - 'teharabic': '\u062A', - 'tehfinalarabic': '\uFE96', - 'tehhahinitialarabic': '\uFCA2', - 'tehhahisolatedarabic': '\uFC0C', - 'tehinitialarabic': '\uFE97', - 'tehiragana': '\u3066', - 'tehjeeminitialarabic': '\uFCA1', - 'tehjeemisolatedarabic': '\uFC0B', - 'tehmarbutaarabic': '\u0629', - 'tehmarbutafinalarabic': '\uFE94', - 'tehmedialarabic': '\uFE98', - 'tehmeeminitialarabic': '\uFCA4', - 'tehmeemisolatedarabic': '\uFC0E', - 'tehnoonfinalarabic': '\uFC73', - 'tekatakana': '\u30C6', - 'tekatakanahalfwidth': '\uFF83', - 'telephone': '\u2121', - 'telephoneblack': '\u260E', - 'telishagedolahebrew': '\u05A0', - 'telishaqetanahebrew': '\u05A9', - 'tencircle': '\u2469', - 'tenideographicparen': '\u3229', - 'tenparen': '\u247D', - 'tenperiod': '\u2491', - 'tenroman': '\u2179', - 'tesh': '\u02A7', - 'tet': '\u05D8', - 'tetdagesh': '\uFB38', - 'tetdageshhebrew': '\uFB38', - 'tethebrew': '\u05D8', - 'tetsecyrillic': '\u04B5', - 'tevirhebrew': '\u059B', - 'tevirlefthebrew': '\u059B', - 'thabengali': '\u09A5', - 'thadeva': '\u0925', - 'thagujarati': '\u0AA5', - 'thagurmukhi': '\u0A25', - 'thalarabic': '\u0630', - 'thalfinalarabic': '\uFEAC', - 'thanthakhatlowleftthai': '\uF898', - 'thanthakhatlowrightthai': '\uF897', - 'thanthakhatthai': '\u0E4C', - 'thanthakhatupperleftthai': '\uF896', - 'theharabic': '\u062B', - 'thehfinalarabic': '\uFE9A', - 'thehinitialarabic': '\uFE9B', - 'thehmedialarabic': '\uFE9C', - 'thereexists': '\u2203', - 'therefore': '\u2234', - 'theta': '\u03B8', - 'theta1': '\u03D1', - 'thetasymbolgreek': '\u03D1', - 'thieuthacirclekorean': '\u3279', - 'thieuthaparenkorean': '\u3219', - 'thieuthcirclekorean': '\u326B', - 'thieuthkorean': '\u314C', - 'thieuthparenkorean': '\u320B', - 'thirteencircle': '\u246C', - 'thirteenparen': '\u2480', - 'thirteenperiod': '\u2494', - 'thonangmonthothai': '\u0E11', - 'thook': '\u01AD', - 'thophuthaothai': '\u0E12', - 'thorn': '\u00FE', - 'thothahanthai': '\u0E17', - 'thothanthai': '\u0E10', - 'thothongthai': '\u0E18', - 'thothungthai': '\u0E16', - 'thousandcyrillic': '\u0482', - 'thousandsseparatorarabic': '\u066C', - 'thousandsseparatorpersian': '\u066C', - 'three': '\u0033', - 'threearabic': '\u0663', - 'threebengali': '\u09E9', - 'threecircle': '\u2462', - 'threecircleinversesansserif': '\u278C', - 'threedeva': '\u0969', - 'threeeighths': '\u215C', - 'threegujarati': '\u0AE9', - 'threegurmukhi': '\u0A69', - 'threehackarabic': '\u0663', - 'threehangzhou': '\u3023', - 'threeideographicparen': '\u3222', - 'threeinferior': '\u2083', - 'threemonospace': '\uFF13', - 'threenumeratorbengali': '\u09F6', - 'threeoldstyle': '\uF733', - 'threeparen': '\u2476', - 'threeperiod': '\u248A', - 'threepersian': '\u06F3', - 'threequarters': '\u00BE', - 'threequartersemdash': '\uF6DE', - 'threeroman': '\u2172', - 'threesuperior': '\u00B3', - 'threethai': '\u0E53', - 'thzsquare': '\u3394', - 'tihiragana': '\u3061', - 'tikatakana': '\u30C1', - 'tikatakanahalfwidth': '\uFF81', - 'tikeutacirclekorean': '\u3270', - 'tikeutaparenkorean': '\u3210', - 'tikeutcirclekorean': '\u3262', - 'tikeutkorean': '\u3137', - 'tikeutparenkorean': '\u3202', - 'tilde': '\u02DC', - 'tildebelowcmb': '\u0330', - 'tildecmb': '\u0303', - 'tildecomb': '\u0303', - 'tildedoublecmb': '\u0360', - 'tildeoperator': '\u223C', - 'tildeoverlaycmb': '\u0334', - 'tildeverticalcmb': '\u033E', - 'timescircle': '\u2297', - 'tipehahebrew': '\u0596', - 'tipehalefthebrew': '\u0596', - 'tippigurmukhi': '\u0A70', - 'titlocyrilliccmb': '\u0483', - 'tiwnarmenian': '\u057F', - 'tlinebelow': '\u1E6F', - 'tmonospace': '\uFF54', - 'toarmenian': '\u0569', - 'tohiragana': '\u3068', - 'tokatakana': '\u30C8', - 'tokatakanahalfwidth': '\uFF84', - 'tonebarextrahighmod': '\u02E5', - 'tonebarextralowmod': '\u02E9', - 'tonebarhighmod': '\u02E6', - 'tonebarlowmod': '\u02E8', - 'tonebarmidmod': '\u02E7', - 'tonefive': '\u01BD', - 'tonesix': '\u0185', - 'tonetwo': '\u01A8', - 'tonos': '\u0384', - 'tonsquare': '\u3327', - 'topatakthai': '\u0E0F', - 'tortoiseshellbracketleft': '\u3014', - 'tortoiseshellbracketleftsmall': '\uFE5D', - 'tortoiseshellbracketleftvertical': '\uFE39', - 'tortoiseshellbracketright': '\u3015', - 'tortoiseshellbracketrightsmall': '\uFE5E', - 'tortoiseshellbracketrightvertical': '\uFE3A', - 'totaothai': '\u0E15', - 'tpalatalhook': '\u01AB', - 'tparen': '\u24AF', - 'trademark': '\u2122', - 'trademarksans': '\uF8EA', - 'trademarkserif': '\uF6DB', - 'tretroflexhook': '\u0288', - 'triagdn': '\u25BC', - 'triaglf': '\u25C4', - 'triagrt': '\u25BA', - 'triagup': '\u25B2', - 'ts': '\u02A6', - 'tsadi': '\u05E6', - 'tsadidagesh': '\uFB46', - 'tsadidageshhebrew': '\uFB46', - 'tsadihebrew': '\u05E6', - 'tsecyrillic': '\u0446', - 'tsere': '\u05B5', - 'tsere12': '\u05B5', - 'tsere1e': '\u05B5', - 'tsere2b': '\u05B5', - 'tserehebrew': '\u05B5', - 'tserenarrowhebrew': '\u05B5', - 'tserequarterhebrew': '\u05B5', - 'tserewidehebrew': '\u05B5', - 'tshecyrillic': '\u045B', - 'tsuperior': '\uF6F3', - 'ttabengali': '\u099F', - 'ttadeva': '\u091F', - 'ttagujarati': '\u0A9F', - 'ttagurmukhi': '\u0A1F', - 'tteharabic': '\u0679', - 'ttehfinalarabic': '\uFB67', - 'ttehinitialarabic': '\uFB68', - 'ttehmedialarabic': '\uFB69', - 'tthabengali': '\u09A0', - 'tthadeva': '\u0920', - 'tthagujarati': '\u0AA0', - 'tthagurmukhi': '\u0A20', - 'tturned': '\u0287', - 'tuhiragana': '\u3064', - 'tukatakana': '\u30C4', - 'tukatakanahalfwidth': '\uFF82', - 'tusmallhiragana': '\u3063', - 'tusmallkatakana': '\u30C3', - 'tusmallkatakanahalfwidth': '\uFF6F', - 'twelvecircle': '\u246B', - 'twelveparen': '\u247F', - 'twelveperiod': '\u2493', - 'twelveroman': '\u217B', - 'twentycircle': '\u2473', - 'twentyhangzhou': '\u5344', - 'twentyparen': '\u2487', - 'twentyperiod': '\u249B', - 'two': '\u0032', - 'twoarabic': '\u0662', - 'twobengali': '\u09E8', - 'twocircle': '\u2461', - 'twocircleinversesansserif': '\u278B', - 'twodeva': '\u0968', - 'twodotenleader': '\u2025', - 'twodotleader': '\u2025', - 'twodotleadervertical': '\uFE30', - 'twogujarati': '\u0AE8', - 'twogurmukhi': '\u0A68', - 'twohackarabic': '\u0662', - 'twohangzhou': '\u3022', - 'twoideographicparen': '\u3221', - 'twoinferior': '\u2082', - 'twomonospace': '\uFF12', - 'twonumeratorbengali': '\u09F5', - 'twooldstyle': '\uF732', - 'twoparen': '\u2475', - 'twoperiod': '\u2489', - 'twopersian': '\u06F2', - 'tworoman': '\u2171', - 'twostroke': '\u01BB', - 'twosuperior': '\u00B2', - 'twothai': '\u0E52', - 'twothirds': '\u2154', - 'u': '\u0075', - 'uacute': '\u00FA', - 'ubar': '\u0289', - 'ubengali': '\u0989', - 'ubopomofo': '\u3128', - 'ubreve': '\u016D', - 'ucaron': '\u01D4', - 'ucircle': '\u24E4', - 'ucircumflex': '\u00FB', - 'ucircumflexbelow': '\u1E77', - 'ucyrillic': '\u0443', - 'udattadeva': '\u0951', - 'udblacute': '\u0171', - 'udblgrave': '\u0215', - 'udeva': '\u0909', - 'udieresis': '\u00FC', - 'udieresisacute': '\u01D8', - 'udieresisbelow': '\u1E73', - 'udieresiscaron': '\u01DA', - 'udieresiscyrillic': '\u04F1', - 'udieresisgrave': '\u01DC', - 'udieresismacron': '\u01D6', - 'udotbelow': '\u1EE5', - 'ugrave': '\u00F9', - 'ugujarati': '\u0A89', - 'ugurmukhi': '\u0A09', - 'uhiragana': '\u3046', - 'uhookabove': '\u1EE7', - 'uhorn': '\u01B0', - 'uhornacute': '\u1EE9', - 'uhorndotbelow': '\u1EF1', - 'uhorngrave': '\u1EEB', - 'uhornhookabove': '\u1EED', - 'uhorntilde': '\u1EEF', - 'uhungarumlaut': '\u0171', - 'uhungarumlautcyrillic': '\u04F3', - 'uinvertedbreve': '\u0217', - 'ukatakana': '\u30A6', - 'ukatakanahalfwidth': '\uFF73', - 'ukcyrillic': '\u0479', - 'ukorean': '\u315C', - 'umacron': '\u016B', - 'umacroncyrillic': '\u04EF', - 'umacrondieresis': '\u1E7B', - 'umatragurmukhi': '\u0A41', - 'umonospace': '\uFF55', - 'underscore': '\u005F', - 'underscoredbl': '\u2017', - 'underscoremonospace': '\uFF3F', - 'underscorevertical': '\uFE33', - 'underscorewavy': '\uFE4F', - 'union': '\u222A', - 'universal': '\u2200', - 'uogonek': '\u0173', - 'uparen': '\u24B0', - 'upblock': '\u2580', - 'upperdothebrew': '\u05C4', - 'upsilon': '\u03C5', - 'upsilondieresis': '\u03CB', - 'upsilondieresistonos': '\u03B0', - 'upsilonlatin': '\u028A', - 'upsilontonos': '\u03CD', - 'uptackbelowcmb': '\u031D', - 'uptackmod': '\u02D4', - 'uragurmukhi': '\u0A73', - 'uring': '\u016F', - 'ushortcyrillic': '\u045E', - 'usmallhiragana': '\u3045', - 'usmallkatakana': '\u30A5', - 'usmallkatakanahalfwidth': '\uFF69', - 'ustraightcyrillic': '\u04AF', - 'ustraightstrokecyrillic': '\u04B1', - 'utilde': '\u0169', - 'utildeacute': '\u1E79', - 'utildebelow': '\u1E75', - 'uubengali': '\u098A', - 'uudeva': '\u090A', - 'uugujarati': '\u0A8A', - 'uugurmukhi': '\u0A0A', - 'uumatragurmukhi': '\u0A42', - 'uuvowelsignbengali': '\u09C2', - 'uuvowelsigndeva': '\u0942', - 'uuvowelsigngujarati': '\u0AC2', - 'uvowelsignbengali': '\u09C1', - 'uvowelsigndeva': '\u0941', - 'uvowelsigngujarati': '\u0AC1', - 'v': '\u0076', - 'vadeva': '\u0935', - 'vagujarati': '\u0AB5', - 'vagurmukhi': '\u0A35', - 'vakatakana': '\u30F7', - 'vav': '\u05D5', - 'vavdagesh': '\uFB35', - 'vavdagesh65': '\uFB35', - 'vavdageshhebrew': '\uFB35', - 'vavhebrew': '\u05D5', - 'vavholam': '\uFB4B', - 'vavholamhebrew': '\uFB4B', - 'vavvavhebrew': '\u05F0', - 'vavyodhebrew': '\u05F1', - 'vcircle': '\u24E5', - 'vdotbelow': '\u1E7F', - 'vecyrillic': '\u0432', - 'veharabic': '\u06A4', - 'vehfinalarabic': '\uFB6B', - 'vehinitialarabic': '\uFB6C', - 'vehmedialarabic': '\uFB6D', - 'vekatakana': '\u30F9', - 'venus': '\u2640', - 'verticalbar': '\u007C', - 'verticallineabovecmb': '\u030D', - 'verticallinebelowcmb': '\u0329', - 'verticallinelowmod': '\u02CC', - 'verticallinemod': '\u02C8', - 'vewarmenian': '\u057E', - 'vhook': '\u028B', - 'vikatakana': '\u30F8', - 'viramabengali': '\u09CD', - 'viramadeva': '\u094D', - 'viramagujarati': '\u0ACD', - 'visargabengali': '\u0983', - 'visargadeva': '\u0903', - 'visargagujarati': '\u0A83', - 'vmonospace': '\uFF56', - 'voarmenian': '\u0578', - 'voicediterationhiragana': '\u309E', - 'voicediterationkatakana': '\u30FE', - 'voicedmarkkana': '\u309B', - 'voicedmarkkanahalfwidth': '\uFF9E', - 'vokatakana': '\u30FA', - 'vparen': '\u24B1', - 'vtilde': '\u1E7D', - 'vturned': '\u028C', - 'vuhiragana': '\u3094', - 'vukatakana': '\u30F4', - 'w': '\u0077', - 'wacute': '\u1E83', - 'waekorean': '\u3159', - 'wahiragana': '\u308F', - 'wakatakana': '\u30EF', - 'wakatakanahalfwidth': '\uFF9C', - 'wakorean': '\u3158', - 'wasmallhiragana': '\u308E', - 'wasmallkatakana': '\u30EE', - 'wattosquare': '\u3357', - 'wavedash': '\u301C', - 'wavyunderscorevertical': '\uFE34', - 'wawarabic': '\u0648', - 'wawfinalarabic': '\uFEEE', - 'wawhamzaabovearabic': '\u0624', - 'wawhamzaabovefinalarabic': '\uFE86', - 'wbsquare': '\u33DD', - 'wcircle': '\u24E6', - 'wcircumflex': '\u0175', - 'wdieresis': '\u1E85', - 'wdotaccent': '\u1E87', - 'wdotbelow': '\u1E89', - 'wehiragana': '\u3091', - 'weierstrass': '\u2118', - 'wekatakana': '\u30F1', - 'wekorean': '\u315E', - 'weokorean': '\u315D', - 'wgrave': '\u1E81', - 'whitebullet': '\u25E6', - 'whitecircle': '\u25CB', - 'whitecircleinverse': '\u25D9', - 'whitecornerbracketleft': '\u300E', - 'whitecornerbracketleftvertical': '\uFE43', - 'whitecornerbracketright': '\u300F', - 'whitecornerbracketrightvertical': '\uFE44', - 'whitediamond': '\u25C7', - 'whitediamondcontainingblacksmalldiamond': '\u25C8', - 'whitedownpointingsmalltriangle': '\u25BF', - 'whitedownpointingtriangle': '\u25BD', - 'whiteleftpointingsmalltriangle': '\u25C3', - 'whiteleftpointingtriangle': '\u25C1', - 'whitelenticularbracketleft': '\u3016', - 'whitelenticularbracketright': '\u3017', - 'whiterightpointingsmalltriangle': '\u25B9', - 'whiterightpointingtriangle': '\u25B7', - 'whitesmallsquare': '\u25AB', - 'whitesmilingface': '\u263A', - 'whitesquare': '\u25A1', - 'whitestar': '\u2606', - 'whitetelephone': '\u260F', - 'whitetortoiseshellbracketleft': '\u3018', - 'whitetortoiseshellbracketright': '\u3019', - 'whiteuppointingsmalltriangle': '\u25B5', - 'whiteuppointingtriangle': '\u25B3', - 'wihiragana': '\u3090', - 'wikatakana': '\u30F0', - 'wikorean': '\u315F', - 'wmonospace': '\uFF57', - 'wohiragana': '\u3092', - 'wokatakana': '\u30F2', - 'wokatakanahalfwidth': '\uFF66', - 'won': '\u20A9', - 'wonmonospace': '\uFFE6', - 'wowaenthai': '\u0E27', - 'wparen': '\u24B2', - 'wring': '\u1E98', - 'wsuperior': '\u02B7', - 'wturned': '\u028D', - 'wynn': '\u01BF', - 'x': '\u0078', - 'xabovecmb': '\u033D', - 'xbopomofo': '\u3112', - 'xcircle': '\u24E7', - 'xdieresis': '\u1E8D', - 'xdotaccent': '\u1E8B', - 'xeharmenian': '\u056D', - 'xi': '\u03BE', - 'xmonospace': '\uFF58', - 'xparen': '\u24B3', - 'xsuperior': '\u02E3', - 'y': '\u0079', - 'yaadosquare': '\u334E', - 'yabengali': '\u09AF', - 'yacute': '\u00FD', - 'yadeva': '\u092F', - 'yaekorean': '\u3152', - 'yagujarati': '\u0AAF', - 'yagurmukhi': '\u0A2F', - 'yahiragana': '\u3084', - 'yakatakana': '\u30E4', - 'yakatakanahalfwidth': '\uFF94', - 'yakorean': '\u3151', - 'yamakkanthai': '\u0E4E', - 'yasmallhiragana': '\u3083', - 'yasmallkatakana': '\u30E3', - 'yasmallkatakanahalfwidth': '\uFF6C', - 'yatcyrillic': '\u0463', - 'ycircle': '\u24E8', - 'ycircumflex': '\u0177', - 'ydieresis': '\u00FF', - 'ydotaccent': '\u1E8F', - 'ydotbelow': '\u1EF5', - 'yeharabic': '\u064A', - 'yehbarreearabic': '\u06D2', - 'yehbarreefinalarabic': '\uFBAF', - 'yehfinalarabic': '\uFEF2', - 'yehhamzaabovearabic': '\u0626', - 'yehhamzaabovefinalarabic': '\uFE8A', - 'yehhamzaaboveinitialarabic': '\uFE8B', - 'yehhamzaabovemedialarabic': '\uFE8C', - 'yehinitialarabic': '\uFEF3', - 'yehmedialarabic': '\uFEF4', - 'yehmeeminitialarabic': '\uFCDD', - 'yehmeemisolatedarabic': '\uFC58', - 'yehnoonfinalarabic': '\uFC94', - 'yehthreedotsbelowarabic': '\u06D1', - 'yekorean': '\u3156', - 'yen': '\u00A5', - 'yenmonospace': '\uFFE5', - 'yeokorean': '\u3155', - 'yeorinhieuhkorean': '\u3186', - 'yerahbenyomohebrew': '\u05AA', - 'yerahbenyomolefthebrew': '\u05AA', - 'yericyrillic': '\u044B', - 'yerudieresiscyrillic': '\u04F9', - 'yesieungkorean': '\u3181', - 'yesieungpansioskorean': '\u3183', - 'yesieungsioskorean': '\u3182', - 'yetivhebrew': '\u059A', - 'ygrave': '\u1EF3', - 'yhook': '\u01B4', - 'yhookabove': '\u1EF7', - 'yiarmenian': '\u0575', - 'yicyrillic': '\u0457', - 'yikorean': '\u3162', - 'yinyang': '\u262F', - 'yiwnarmenian': '\u0582', - 'ymonospace': '\uFF59', - 'yod': '\u05D9', - 'yoddagesh': '\uFB39', - 'yoddageshhebrew': '\uFB39', - 'yodhebrew': '\u05D9', - 'yodyodhebrew': '\u05F2', - 'yodyodpatahhebrew': '\uFB1F', - 'yohiragana': '\u3088', - 'yoikorean': '\u3189', - 'yokatakana': '\u30E8', - 'yokatakanahalfwidth': '\uFF96', - 'yokorean': '\u315B', - 'yosmallhiragana': '\u3087', - 'yosmallkatakana': '\u30E7', - 'yosmallkatakanahalfwidth': '\uFF6E', - 'yotgreek': '\u03F3', - 'yoyaekorean': '\u3188', - 'yoyakorean': '\u3187', - 'yoyakthai': '\u0E22', - 'yoyingthai': '\u0E0D', - 'yparen': '\u24B4', - 'ypogegrammeni': '\u037A', - 'ypogegrammenigreekcmb': '\u0345', - 'yr': '\u01A6', - 'yring': '\u1E99', - 'ysuperior': '\u02B8', - 'ytilde': '\u1EF9', - 'yturned': '\u028E', - 'yuhiragana': '\u3086', - 'yuikorean': '\u318C', - 'yukatakana': '\u30E6', - 'yukatakanahalfwidth': '\uFF95', - 'yukorean': '\u3160', - 'yusbigcyrillic': '\u046B', - 'yusbigiotifiedcyrillic': '\u046D', - 'yuslittlecyrillic': '\u0467', - 'yuslittleiotifiedcyrillic': '\u0469', - 'yusmallhiragana': '\u3085', - 'yusmallkatakana': '\u30E5', - 'yusmallkatakanahalfwidth': '\uFF6D', - 'yuyekorean': '\u318B', - 'yuyeokorean': '\u318A', - 'yyabengali': '\u09DF', - 'yyadeva': '\u095F', - 'z': '\u007A', - 'zaarmenian': '\u0566', - 'zacute': '\u017A', - 'zadeva': '\u095B', - 'zagurmukhi': '\u0A5B', - 'zaharabic': '\u0638', - 'zahfinalarabic': '\uFEC6', - 'zahinitialarabic': '\uFEC7', - 'zahiragana': '\u3056', - 'zahmedialarabic': '\uFEC8', - 'zainarabic': '\u0632', - 'zainfinalarabic': '\uFEB0', - 'zakatakana': '\u30B6', - 'zaqefgadolhebrew': '\u0595', - 'zaqefqatanhebrew': '\u0594', - 'zarqahebrew': '\u0598', - 'zayin': '\u05D6', - 'zayindagesh': '\uFB36', - 'zayindageshhebrew': '\uFB36', - 'zayinhebrew': '\u05D6', - 'zbopomofo': '\u3117', - 'zcaron': '\u017E', - 'zcircle': '\u24E9', - 'zcircumflex': '\u1E91', - 'zcurl': '\u0291', - 'zdot': '\u017C', - 'zdotaccent': '\u017C', - 'zdotbelow': '\u1E93', - 'zecyrillic': '\u0437', - 'zedescendercyrillic': '\u0499', - 'zedieresiscyrillic': '\u04DF', - 'zehiragana': '\u305C', - 'zekatakana': '\u30BC', - 'zero': '\u0030', - 'zeroarabic': '\u0660', - 'zerobengali': '\u09E6', - 'zerodeva': '\u0966', - 'zerogujarati': '\u0AE6', - 'zerogurmukhi': '\u0A66', - 'zerohackarabic': '\u0660', - 'zeroinferior': '\u2080', - 'zeromonospace': '\uFF10', - 'zerooldstyle': '\uF730', - 'zeropersian': '\u06F0', - 'zerosuperior': '\u2070', - 'zerothai': '\u0E50', - 'zerowidthjoiner': '\uFEFF', - 'zerowidthnonjoiner': '\u200C', - 'zerowidthspace': '\u200B', - 'zeta': '\u03B6', - 'zhbopomofo': '\u3113', - 'zhearmenian': '\u056A', - 'zhebrevecyrillic': '\u04C2', - 'zhecyrillic': '\u0436', - 'zhedescendercyrillic': '\u0497', - 'zhedieresiscyrillic': '\u04DD', - 'zihiragana': '\u3058', - 'zikatakana': '\u30B8', - 'zinorhebrew': '\u05AE', - 'zlinebelow': '\u1E95', - 'zmonospace': '\uFF5A', - 'zohiragana': '\u305E', - 'zokatakana': '\u30BE', - 'zparen': '\u24B5', - 'zretroflexhook': '\u0290', - 'zstroke': '\u01B6', - 'zuhiragana': '\u305A', - 'zukatakana': '\u30BA', + "A": "\u0041", + "AE": "\u00C6", + "AEacute": "\u01FC", + "AEmacron": "\u01E2", + "AEsmall": "\uF7E6", + "Aacute": "\u00C1", + "Aacutesmall": "\uF7E1", + "Abreve": "\u0102", + "Abreveacute": "\u1EAE", + "Abrevecyrillic": "\u04D0", + "Abrevedotbelow": "\u1EB6", + "Abrevegrave": "\u1EB0", + "Abrevehookabove": "\u1EB2", + "Abrevetilde": "\u1EB4", + "Acaron": "\u01CD", + "Acircle": "\u24B6", + "Acircumflex": "\u00C2", + "Acircumflexacute": "\u1EA4", + "Acircumflexdotbelow": "\u1EAC", + "Acircumflexgrave": "\u1EA6", + "Acircumflexhookabove": "\u1EA8", + "Acircumflexsmall": "\uF7E2", + "Acircumflextilde": "\u1EAA", + "Acute": "\uF6C9", + "Acutesmall": "\uF7B4", + "Acyrillic": "\u0410", + "Adblgrave": "\u0200", + "Adieresis": "\u00C4", + "Adieresiscyrillic": "\u04D2", + "Adieresismacron": "\u01DE", + "Adieresissmall": "\uF7E4", + "Adotbelow": "\u1EA0", + "Adotmacron": "\u01E0", + "Agrave": "\u00C0", + "Agravesmall": "\uF7E0", + "Ahookabove": "\u1EA2", + "Aiecyrillic": "\u04D4", + "Ainvertedbreve": "\u0202", + "Alpha": "\u0391", + "Alphatonos": "\u0386", + "Amacron": "\u0100", + "Amonospace": "\uFF21", + "Aogonek": "\u0104", + "Aring": "\u00C5", + "Aringacute": "\u01FA", + "Aringbelow": "\u1E00", + "Aringsmall": "\uF7E5", + "Asmall": "\uF761", + "Atilde": "\u00C3", + "Atildesmall": "\uF7E3", + "Aybarmenian": "\u0531", + "B": "\u0042", + "Bcircle": "\u24B7", + "Bdotaccent": "\u1E02", + "Bdotbelow": "\u1E04", + "Becyrillic": "\u0411", + "Benarmenian": "\u0532", + "Beta": "\u0392", + "Bhook": "\u0181", + "Blinebelow": "\u1E06", + "Bmonospace": "\uFF22", + "Brevesmall": "\uF6F4", + "Bsmall": "\uF762", + "Btopbar": "\u0182", + "C": "\u0043", + "Caarmenian": "\u053E", + "Cacute": "\u0106", + "Caron": "\uF6CA", + "Caronsmall": "\uF6F5", + "Ccaron": "\u010C", + "Ccedilla": "\u00C7", + "Ccedillaacute": "\u1E08", + "Ccedillasmall": "\uF7E7", + "Ccircle": "\u24B8", + "Ccircumflex": "\u0108", + "Cdot": "\u010A", + "Cdotaccent": "\u010A", + "Cedillasmall": "\uF7B8", + "Chaarmenian": "\u0549", + "Cheabkhasiancyrillic": "\u04BC", + "Checyrillic": "\u0427", + "Chedescenderabkhasiancyrillic": "\u04BE", + "Chedescendercyrillic": "\u04B6", + "Chedieresiscyrillic": "\u04F4", + "Cheharmenian": "\u0543", + "Chekhakassiancyrillic": "\u04CB", + "Cheverticalstrokecyrillic": "\u04B8", + "Chi": "\u03A7", + "Chook": "\u0187", + "Circumflexsmall": "\uF6F6", + "Cmonospace": "\uFF23", + "Coarmenian": "\u0551", + "Csmall": "\uF763", + "D": "\u0044", + "DZ": "\u01F1", + "DZcaron": "\u01C4", + "Daarmenian": "\u0534", + "Dafrican": "\u0189", + "Dcaron": "\u010E", + "Dcedilla": "\u1E10", + "Dcircle": "\u24B9", + "Dcircumflexbelow": "\u1E12", + "Dcroat": "\u0110", + "Ddotaccent": "\u1E0A", + "Ddotbelow": "\u1E0C", + "Decyrillic": "\u0414", + "Deicoptic": "\u03EE", + "Delta": "\u2206", + "Deltagreek": "\u0394", + "Dhook": "\u018A", + "Dieresis": "\uF6CB", + "DieresisAcute": "\uF6CC", + "DieresisGrave": "\uF6CD", + "Dieresissmall": "\uF7A8", + "Digammagreek": "\u03DC", + "Djecyrillic": "\u0402", + "Dlinebelow": "\u1E0E", + "Dmonospace": "\uFF24", + "Dotaccentsmall": "\uF6F7", + "Dslash": "\u0110", + "Dsmall": "\uF764", + "Dtopbar": "\u018B", + "Dz": "\u01F2", + "Dzcaron": "\u01C5", + "Dzeabkhasiancyrillic": "\u04E0", + "Dzecyrillic": "\u0405", + "Dzhecyrillic": "\u040F", + "E": "\u0045", + "Eacute": "\u00C9", + "Eacutesmall": "\uF7E9", + "Ebreve": "\u0114", + "Ecaron": "\u011A", + "Ecedillabreve": "\u1E1C", + "Echarmenian": "\u0535", + "Ecircle": "\u24BA", + "Ecircumflex": "\u00CA", + "Ecircumflexacute": "\u1EBE", + "Ecircumflexbelow": "\u1E18", + "Ecircumflexdotbelow": "\u1EC6", + "Ecircumflexgrave": "\u1EC0", + "Ecircumflexhookabove": "\u1EC2", + "Ecircumflexsmall": "\uF7EA", + "Ecircumflextilde": "\u1EC4", + "Ecyrillic": "\u0404", + "Edblgrave": "\u0204", + "Edieresis": "\u00CB", + "Edieresissmall": "\uF7EB", + "Edot": "\u0116", + "Edotaccent": "\u0116", + "Edotbelow": "\u1EB8", + "Efcyrillic": "\u0424", + "Egrave": "\u00C8", + "Egravesmall": "\uF7E8", + "Eharmenian": "\u0537", + "Ehookabove": "\u1EBA", + "Eightroman": "\u2167", + "Einvertedbreve": "\u0206", + "Eiotifiedcyrillic": "\u0464", + "Elcyrillic": "\u041B", + "Elevenroman": "\u216A", + "Emacron": "\u0112", + "Emacronacute": "\u1E16", + "Emacrongrave": "\u1E14", + "Emcyrillic": "\u041C", + "Emonospace": "\uFF25", + "Encyrillic": "\u041D", + "Endescendercyrillic": "\u04A2", + "Eng": "\u014A", + "Enghecyrillic": "\u04A4", + "Enhookcyrillic": "\u04C7", + "Eogonek": "\u0118", + "Eopen": "\u0190", + "Epsilon": "\u0395", + "Epsilontonos": "\u0388", + "Ercyrillic": "\u0420", + "Ereversed": "\u018E", + "Ereversedcyrillic": "\u042D", + "Escyrillic": "\u0421", + "Esdescendercyrillic": "\u04AA", + "Esh": "\u01A9", + "Esmall": "\uF765", + "Eta": "\u0397", + "Etarmenian": "\u0538", + "Etatonos": "\u0389", + "Eth": "\u00D0", + "Ethsmall": "\uF7F0", + "Etilde": "\u1EBC", + "Etildebelow": "\u1E1A", + "Euro": "\u20AC", + "Ezh": "\u01B7", + "Ezhcaron": "\u01EE", + "Ezhreversed": "\u01B8", + "F": "\u0046", + "Fcircle": "\u24BB", + "Fdotaccent": "\u1E1E", + "Feharmenian": "\u0556", + "Feicoptic": "\u03E4", + "Fhook": "\u0191", + "Fitacyrillic": "\u0472", + "Fiveroman": "\u2164", + "Fmonospace": "\uFF26", + "Fourroman": "\u2163", + "Fsmall": "\uF766", + "G": "\u0047", + "GBsquare": "\u3387", + "Gacute": "\u01F4", + "Gamma": "\u0393", + "Gammaafrican": "\u0194", + "Gangiacoptic": "\u03EA", + "Gbreve": "\u011E", + "Gcaron": "\u01E6", + "Gcedilla": "\u0122", + "Gcircle": "\u24BC", + "Gcircumflex": "\u011C", + "Gcommaaccent": "\u0122", + "Gdot": "\u0120", + "Gdotaccent": "\u0120", + "Gecyrillic": "\u0413", + "Ghadarmenian": "\u0542", + "Ghemiddlehookcyrillic": "\u0494", + "Ghestrokecyrillic": "\u0492", + "Gheupturncyrillic": "\u0490", + "Ghook": "\u0193", + "Gimarmenian": "\u0533", + "Gjecyrillic": "\u0403", + "Gmacron": "\u1E20", + "Gmonospace": "\uFF27", + "Grave": "\uF6CE", + "Gravesmall": "\uF760", + "Gsmall": "\uF767", + "Gsmallhook": "\u029B", + "Gstroke": "\u01E4", + "H": "\u0048", + "H18533": "\u25CF", + "H18543": "\u25AA", + "H18551": "\u25AB", + "H22073": "\u25A1", + "HPsquare": "\u33CB", + "Haabkhasiancyrillic": "\u04A8", + "Hadescendercyrillic": "\u04B2", + "Hardsigncyrillic": "\u042A", + "Hbar": "\u0126", + "Hbrevebelow": "\u1E2A", + "Hcedilla": "\u1E28", + "Hcircle": "\u24BD", + "Hcircumflex": "\u0124", + "Hdieresis": "\u1E26", + "Hdotaccent": "\u1E22", + "Hdotbelow": "\u1E24", + "Hmonospace": "\uFF28", + "Hoarmenian": "\u0540", + "Horicoptic": "\u03E8", + "Hsmall": "\uF768", + "Hungarumlaut": "\uF6CF", + "Hungarumlautsmall": "\uF6F8", + "Hzsquare": "\u3390", + "I": "\u0049", + "IAcyrillic": "\u042F", + "IJ": "\u0132", + "IUcyrillic": "\u042E", + "Iacute": "\u00CD", + "Iacutesmall": "\uF7ED", + "Ibreve": "\u012C", + "Icaron": "\u01CF", + "Icircle": "\u24BE", + "Icircumflex": "\u00CE", + "Icircumflexsmall": "\uF7EE", + "Icyrillic": "\u0406", + "Idblgrave": "\u0208", + "Idieresis": "\u00CF", + "Idieresisacute": "\u1E2E", + "Idieresiscyrillic": "\u04E4", + "Idieresissmall": "\uF7EF", + "Idot": "\u0130", + "Idotaccent": "\u0130", + "Idotbelow": "\u1ECA", + "Iebrevecyrillic": "\u04D6", + "Iecyrillic": "\u0415", + "Ifraktur": "\u2111", + "Igrave": "\u00CC", + "Igravesmall": "\uF7EC", + "Ihookabove": "\u1EC8", + "Iicyrillic": "\u0418", + "Iinvertedbreve": "\u020A", + "Iishortcyrillic": "\u0419", + "Imacron": "\u012A", + "Imacroncyrillic": "\u04E2", + "Imonospace": "\uFF29", + "Iniarmenian": "\u053B", + "Iocyrillic": "\u0401", + "Iogonek": "\u012E", + "Iota": "\u0399", + "Iotaafrican": "\u0196", + "Iotadieresis": "\u03AA", + "Iotatonos": "\u038A", + "Ismall": "\uF769", + "Istroke": "\u0197", + "Itilde": "\u0128", + "Itildebelow": "\u1E2C", + "Izhitsacyrillic": "\u0474", + "Izhitsadblgravecyrillic": "\u0476", + "J": "\u004A", + "Jaarmenian": "\u0541", + "Jcircle": "\u24BF", + "Jcircumflex": "\u0134", + "Jecyrillic": "\u0408", + "Jheharmenian": "\u054B", + "Jmonospace": "\uFF2A", + "Jsmall": "\uF76A", + "K": "\u004B", + "KBsquare": "\u3385", + "KKsquare": "\u33CD", + "Kabashkircyrillic": "\u04A0", + "Kacute": "\u1E30", + "Kacyrillic": "\u041A", + "Kadescendercyrillic": "\u049A", + "Kahookcyrillic": "\u04C3", + "Kappa": "\u039A", + "Kastrokecyrillic": "\u049E", + "Kaverticalstrokecyrillic": "\u049C", + "Kcaron": "\u01E8", + "Kcedilla": "\u0136", + "Kcircle": "\u24C0", + "Kcommaaccent": "\u0136", + "Kdotbelow": "\u1E32", + "Keharmenian": "\u0554", + "Kenarmenian": "\u053F", + "Khacyrillic": "\u0425", + "Kheicoptic": "\u03E6", + "Khook": "\u0198", + "Kjecyrillic": "\u040C", + "Klinebelow": "\u1E34", + "Kmonospace": "\uFF2B", + "Koppacyrillic": "\u0480", + "Koppagreek": "\u03DE", + "Ksicyrillic": "\u046E", + "Ksmall": "\uF76B", + "L": "\u004C", + "LJ": "\u01C7", + "LL": "\uF6BF", + "Lacute": "\u0139", + "Lambda": "\u039B", + "Lcaron": "\u013D", + "Lcedilla": "\u013B", + "Lcircle": "\u24C1", + "Lcircumflexbelow": "\u1E3C", + "Lcommaaccent": "\u013B", + "Ldot": "\u013F", + "Ldotaccent": "\u013F", + "Ldotbelow": "\u1E36", + "Ldotbelowmacron": "\u1E38", + "Liwnarmenian": "\u053C", + "Lj": "\u01C8", + "Ljecyrillic": "\u0409", + "Llinebelow": "\u1E3A", + "Lmonospace": "\uFF2C", + "Lslash": "\u0141", + "Lslashsmall": "\uF6F9", + "Lsmall": "\uF76C", + "M": "\u004D", + "MBsquare": "\u3386", + "Macron": "\uF6D0", + "Macronsmall": "\uF7AF", + "Macute": "\u1E3E", + "Mcircle": "\u24C2", + "Mdotaccent": "\u1E40", + "Mdotbelow": "\u1E42", + "Menarmenian": "\u0544", + "Mmonospace": "\uFF2D", + "Msmall": "\uF76D", + "Mturned": "\u019C", + "Mu": "\u039C", + "N": "\u004E", + "NJ": "\u01CA", + "Nacute": "\u0143", + "Ncaron": "\u0147", + "Ncedilla": "\u0145", + "Ncircle": "\u24C3", + "Ncircumflexbelow": "\u1E4A", + "Ncommaaccent": "\u0145", + "Ndotaccent": "\u1E44", + "Ndotbelow": "\u1E46", + "Nhookleft": "\u019D", + "Nineroman": "\u2168", + "Nj": "\u01CB", + "Njecyrillic": "\u040A", + "Nlinebelow": "\u1E48", + "Nmonospace": "\uFF2E", + "Nowarmenian": "\u0546", + "Nsmall": "\uF76E", + "Ntilde": "\u00D1", + "Ntildesmall": "\uF7F1", + "Nu": "\u039D", + "O": "\u004F", + "OE": "\u0152", + "OEsmall": "\uF6FA", + "Oacute": "\u00D3", + "Oacutesmall": "\uF7F3", + "Obarredcyrillic": "\u04E8", + "Obarreddieresiscyrillic": "\u04EA", + "Obreve": "\u014E", + "Ocaron": "\u01D1", + "Ocenteredtilde": "\u019F", + "Ocircle": "\u24C4", + "Ocircumflex": "\u00D4", + "Ocircumflexacute": "\u1ED0", + "Ocircumflexdotbelow": "\u1ED8", + "Ocircumflexgrave": "\u1ED2", + "Ocircumflexhookabove": "\u1ED4", + "Ocircumflexsmall": "\uF7F4", + "Ocircumflextilde": "\u1ED6", + "Ocyrillic": "\u041E", + "Odblacute": "\u0150", + "Odblgrave": "\u020C", + "Odieresis": "\u00D6", + "Odieresiscyrillic": "\u04E6", + "Odieresissmall": "\uF7F6", + "Odotbelow": "\u1ECC", + "Ogoneksmall": "\uF6FB", + "Ograve": "\u00D2", + "Ogravesmall": "\uF7F2", + "Oharmenian": "\u0555", + "Ohm": "\u2126", + "Ohookabove": "\u1ECE", + "Ohorn": "\u01A0", + "Ohornacute": "\u1EDA", + "Ohorndotbelow": "\u1EE2", + "Ohorngrave": "\u1EDC", + "Ohornhookabove": "\u1EDE", + "Ohorntilde": "\u1EE0", + "Ohungarumlaut": "\u0150", + "Oi": "\u01A2", + "Oinvertedbreve": "\u020E", + "Omacron": "\u014C", + "Omacronacute": "\u1E52", + "Omacrongrave": "\u1E50", + "Omega": "\u2126", + "Omegacyrillic": "\u0460", + "Omegagreek": "\u03A9", + "Omegaroundcyrillic": "\u047A", + "Omegatitlocyrillic": "\u047C", + "Omegatonos": "\u038F", + "Omicron": "\u039F", + "Omicrontonos": "\u038C", + "Omonospace": "\uFF2F", + "Oneroman": "\u2160", + "Oogonek": "\u01EA", + "Oogonekmacron": "\u01EC", + "Oopen": "\u0186", + "Oslash": "\u00D8", + "Oslashacute": "\u01FE", + "Oslashsmall": "\uF7F8", + "Osmall": "\uF76F", + "Ostrokeacute": "\u01FE", + "Otcyrillic": "\u047E", + "Otilde": "\u00D5", + "Otildeacute": "\u1E4C", + "Otildedieresis": "\u1E4E", + "Otildesmall": "\uF7F5", + "P": "\u0050", + "Pacute": "\u1E54", + "Pcircle": "\u24C5", + "Pdotaccent": "\u1E56", + "Pecyrillic": "\u041F", + "Peharmenian": "\u054A", + "Pemiddlehookcyrillic": "\u04A6", + "Phi": "\u03A6", + "Phook": "\u01A4", + "Pi": "\u03A0", + "Piwrarmenian": "\u0553", + "Pmonospace": "\uFF30", + "Psi": "\u03A8", + "Psicyrillic": "\u0470", + "Psmall": "\uF770", + "Q": "\u0051", + "Qcircle": "\u24C6", + "Qmonospace": "\uFF31", + "Qsmall": "\uF771", + "R": "\u0052", + "Raarmenian": "\u054C", + "Racute": "\u0154", + "Rcaron": "\u0158", + "Rcedilla": "\u0156", + "Rcircle": "\u24C7", + "Rcommaaccent": "\u0156", + "Rdblgrave": "\u0210", + "Rdotaccent": "\u1E58", + "Rdotbelow": "\u1E5A", + "Rdotbelowmacron": "\u1E5C", + "Reharmenian": "\u0550", + "Rfraktur": "\u211C", + "Rho": "\u03A1", + "Ringsmall": "\uF6FC", + "Rinvertedbreve": "\u0212", + "Rlinebelow": "\u1E5E", + "Rmonospace": "\uFF32", + "Rsmall": "\uF772", + "Rsmallinverted": "\u0281", + "Rsmallinvertedsuperior": "\u02B6", + "S": "\u0053", + "SF010000": "\u250C", + "SF020000": "\u2514", + "SF030000": "\u2510", + "SF040000": "\u2518", + "SF050000": "\u253C", + "SF060000": "\u252C", + "SF070000": "\u2534", + "SF080000": "\u251C", + "SF090000": "\u2524", + "SF100000": "\u2500", + "SF110000": "\u2502", + "SF190000": "\u2561", + "SF200000": "\u2562", + "SF210000": "\u2556", + "SF220000": "\u2555", + "SF230000": "\u2563", + "SF240000": "\u2551", + "SF250000": "\u2557", + "SF260000": "\u255D", + "SF270000": "\u255C", + "SF280000": "\u255B", + "SF360000": "\u255E", + "SF370000": "\u255F", + "SF380000": "\u255A", + "SF390000": "\u2554", + "SF400000": "\u2569", + "SF410000": "\u2566", + "SF420000": "\u2560", + "SF430000": "\u2550", + "SF440000": "\u256C", + "SF450000": "\u2567", + "SF460000": "\u2568", + "SF470000": "\u2564", + "SF480000": "\u2565", + "SF490000": "\u2559", + "SF500000": "\u2558", + "SF510000": "\u2552", + "SF520000": "\u2553", + "SF530000": "\u256B", + "SF540000": "\u256A", + "Sacute": "\u015A", + "Sacutedotaccent": "\u1E64", + "Sampigreek": "\u03E0", + "Scaron": "\u0160", + "Scarondotaccent": "\u1E66", + "Scaronsmall": "\uF6FD", + "Scedilla": "\u015E", + "Schwa": "\u018F", + "Schwacyrillic": "\u04D8", + "Schwadieresiscyrillic": "\u04DA", + "Scircle": "\u24C8", + "Scircumflex": "\u015C", + "Scommaaccent": "\u0218", + "Sdotaccent": "\u1E60", + "Sdotbelow": "\u1E62", + "Sdotbelowdotaccent": "\u1E68", + "Seharmenian": "\u054D", + "Sevenroman": "\u2166", + "Shaarmenian": "\u0547", + "Shacyrillic": "\u0428", + "Shchacyrillic": "\u0429", + "Sheicoptic": "\u03E2", + "Shhacyrillic": "\u04BA", + "Shimacoptic": "\u03EC", + "Sigma": "\u03A3", + "Sixroman": "\u2165", + "Smonospace": "\uFF33", + "Softsigncyrillic": "\u042C", + "Ssmall": "\uF773", + "Stigmagreek": "\u03DA", + "T": "\u0054", + "Tau": "\u03A4", + "Tbar": "\u0166", + "Tcaron": "\u0164", + "Tcedilla": "\u0162", + "Tcircle": "\u24C9", + "Tcircumflexbelow": "\u1E70", + "Tcommaaccent": "\u0162", + "Tdotaccent": "\u1E6A", + "Tdotbelow": "\u1E6C", + "Tecyrillic": "\u0422", + "Tedescendercyrillic": "\u04AC", + "Tenroman": "\u2169", + "Tetsecyrillic": "\u04B4", + "Theta": "\u0398", + "Thook": "\u01AC", + "Thorn": "\u00DE", + "Thornsmall": "\uF7FE", + "Threeroman": "\u2162", + "Tildesmall": "\uF6FE", + "Tiwnarmenian": "\u054F", + "Tlinebelow": "\u1E6E", + "Tmonospace": "\uFF34", + "Toarmenian": "\u0539", + "Tonefive": "\u01BC", + "Tonesix": "\u0184", + "Tonetwo": "\u01A7", + "Tretroflexhook": "\u01AE", + "Tsecyrillic": "\u0426", + "Tshecyrillic": "\u040B", + "Tsmall": "\uF774", + "Twelveroman": "\u216B", + "Tworoman": "\u2161", + "U": "\u0055", + "Uacute": "\u00DA", + "Uacutesmall": "\uF7FA", + "Ubreve": "\u016C", + "Ucaron": "\u01D3", + "Ucircle": "\u24CA", + "Ucircumflex": "\u00DB", + "Ucircumflexbelow": "\u1E76", + "Ucircumflexsmall": "\uF7FB", + "Ucyrillic": "\u0423", + "Udblacute": "\u0170", + "Udblgrave": "\u0214", + "Udieresis": "\u00DC", + "Udieresisacute": "\u01D7", + "Udieresisbelow": "\u1E72", + "Udieresiscaron": "\u01D9", + "Udieresiscyrillic": "\u04F0", + "Udieresisgrave": "\u01DB", + "Udieresismacron": "\u01D5", + "Udieresissmall": "\uF7FC", + "Udotbelow": "\u1EE4", + "Ugrave": "\u00D9", + "Ugravesmall": "\uF7F9", + "Uhookabove": "\u1EE6", + "Uhorn": "\u01AF", + "Uhornacute": "\u1EE8", + "Uhorndotbelow": "\u1EF0", + "Uhorngrave": "\u1EEA", + "Uhornhookabove": "\u1EEC", + "Uhorntilde": "\u1EEE", + "Uhungarumlaut": "\u0170", + "Uhungarumlautcyrillic": "\u04F2", + "Uinvertedbreve": "\u0216", + "Ukcyrillic": "\u0478", + "Umacron": "\u016A", + "Umacroncyrillic": "\u04EE", + "Umacrondieresis": "\u1E7A", + "Umonospace": "\uFF35", + "Uogonek": "\u0172", + "Upsilon": "\u03A5", + "Upsilon1": "\u03D2", + "Upsilonacutehooksymbolgreek": "\u03D3", + "Upsilonafrican": "\u01B1", + "Upsilondieresis": "\u03AB", + "Upsilondieresishooksymbolgreek": "\u03D4", + "Upsilonhooksymbol": "\u03D2", + "Upsilontonos": "\u038E", + "Uring": "\u016E", + "Ushortcyrillic": "\u040E", + "Usmall": "\uF775", + "Ustraightcyrillic": "\u04AE", + "Ustraightstrokecyrillic": "\u04B0", + "Utilde": "\u0168", + "Utildeacute": "\u1E78", + "Utildebelow": "\u1E74", + "V": "\u0056", + "Vcircle": "\u24CB", + "Vdotbelow": "\u1E7E", + "Vecyrillic": "\u0412", + "Vewarmenian": "\u054E", + "Vhook": "\u01B2", + "Vmonospace": "\uFF36", + "Voarmenian": "\u0548", + "Vsmall": "\uF776", + "Vtilde": "\u1E7C", + "W": "\u0057", + "Wacute": "\u1E82", + "Wcircle": "\u24CC", + "Wcircumflex": "\u0174", + "Wdieresis": "\u1E84", + "Wdotaccent": "\u1E86", + "Wdotbelow": "\u1E88", + "Wgrave": "\u1E80", + "Wmonospace": "\uFF37", + "Wsmall": "\uF777", + "X": "\u0058", + "Xcircle": "\u24CD", + "Xdieresis": "\u1E8C", + "Xdotaccent": "\u1E8A", + "Xeharmenian": "\u053D", + "Xi": "\u039E", + "Xmonospace": "\uFF38", + "Xsmall": "\uF778", + "Y": "\u0059", + "Yacute": "\u00DD", + "Yacutesmall": "\uF7FD", + "Yatcyrillic": "\u0462", + "Ycircle": "\u24CE", + "Ycircumflex": "\u0176", + "Ydieresis": "\u0178", + "Ydieresissmall": "\uF7FF", + "Ydotaccent": "\u1E8E", + "Ydotbelow": "\u1EF4", + "Yericyrillic": "\u042B", + "Yerudieresiscyrillic": "\u04F8", + "Ygrave": "\u1EF2", + "Yhook": "\u01B3", + "Yhookabove": "\u1EF6", + "Yiarmenian": "\u0545", + "Yicyrillic": "\u0407", + "Yiwnarmenian": "\u0552", + "Ymonospace": "\uFF39", + "Ysmall": "\uF779", + "Ytilde": "\u1EF8", + "Yusbigcyrillic": "\u046A", + "Yusbigiotifiedcyrillic": "\u046C", + "Yuslittlecyrillic": "\u0466", + "Yuslittleiotifiedcyrillic": "\u0468", + "Z": "\u005A", + "Zaarmenian": "\u0536", + "Zacute": "\u0179", + "Zcaron": "\u017D", + "Zcaronsmall": "\uF6FF", + "Zcircle": "\u24CF", + "Zcircumflex": "\u1E90", + "Zdot": "\u017B", + "Zdotaccent": "\u017B", + "Zdotbelow": "\u1E92", + "Zecyrillic": "\u0417", + "Zedescendercyrillic": "\u0498", + "Zedieresiscyrillic": "\u04DE", + "Zeta": "\u0396", + "Zhearmenian": "\u053A", + "Zhebrevecyrillic": "\u04C1", + "Zhecyrillic": "\u0416", + "Zhedescendercyrillic": "\u0496", + "Zhedieresiscyrillic": "\u04DC", + "Zlinebelow": "\u1E94", + "Zmonospace": "\uFF3A", + "Zsmall": "\uF77A", + "Zstroke": "\u01B5", + "a": "\u0061", + "aabengali": "\u0986", + "aacute": "\u00E1", + "aadeva": "\u0906", + "aagujarati": "\u0A86", + "aagurmukhi": "\u0A06", + "aamatragurmukhi": "\u0A3E", + "aarusquare": "\u3303", + "aavowelsignbengali": "\u09BE", + "aavowelsigndeva": "\u093E", + "aavowelsigngujarati": "\u0ABE", + "abbreviationmarkarmenian": "\u055F", + "abbreviationsigndeva": "\u0970", + "abengali": "\u0985", + "abopomofo": "\u311A", + "abreve": "\u0103", + "abreveacute": "\u1EAF", + "abrevecyrillic": "\u04D1", + "abrevedotbelow": "\u1EB7", + "abrevegrave": "\u1EB1", + "abrevehookabove": "\u1EB3", + "abrevetilde": "\u1EB5", + "acaron": "\u01CE", + "acircle": "\u24D0", + "acircumflex": "\u00E2", + "acircumflexacute": "\u1EA5", + "acircumflexdotbelow": "\u1EAD", + "acircumflexgrave": "\u1EA7", + "acircumflexhookabove": "\u1EA9", + "acircumflextilde": "\u1EAB", + "acute": "\u00B4", + "acutebelowcmb": "\u0317", + "acutecmb": "\u0301", + "acutecomb": "\u0301", + "acutedeva": "\u0954", + "acutelowmod": "\u02CF", + "acutetonecmb": "\u0341", + "acyrillic": "\u0430", + "adblgrave": "\u0201", + "addakgurmukhi": "\u0A71", + "adeva": "\u0905", + "adieresis": "\u00E4", + "adieresiscyrillic": "\u04D3", + "adieresismacron": "\u01DF", + "adotbelow": "\u1EA1", + "adotmacron": "\u01E1", + "ae": "\u00E6", + "aeacute": "\u01FD", + "aekorean": "\u3150", + "aemacron": "\u01E3", + "afii00208": "\u2015", + "afii08941": "\u20A4", + "afii10017": "\u0410", + "afii10018": "\u0411", + "afii10019": "\u0412", + "afii10020": "\u0413", + "afii10021": "\u0414", + "afii10022": "\u0415", + "afii10023": "\u0401", + "afii10024": "\u0416", + "afii10025": "\u0417", + "afii10026": "\u0418", + "afii10027": "\u0419", + "afii10028": "\u041A", + "afii10029": "\u041B", + "afii10030": "\u041C", + "afii10031": "\u041D", + "afii10032": "\u041E", + "afii10033": "\u041F", + "afii10034": "\u0420", + "afii10035": "\u0421", + "afii10036": "\u0422", + "afii10037": "\u0423", + "afii10038": "\u0424", + "afii10039": "\u0425", + "afii10040": "\u0426", + "afii10041": "\u0427", + "afii10042": "\u0428", + "afii10043": "\u0429", + "afii10044": "\u042A", + "afii10045": "\u042B", + "afii10046": "\u042C", + "afii10047": "\u042D", + "afii10048": "\u042E", + "afii10049": "\u042F", + "afii10050": "\u0490", + "afii10051": "\u0402", + "afii10052": "\u0403", + "afii10053": "\u0404", + "afii10054": "\u0405", + "afii10055": "\u0406", + "afii10056": "\u0407", + "afii10057": "\u0408", + "afii10058": "\u0409", + "afii10059": "\u040A", + "afii10060": "\u040B", + "afii10061": "\u040C", + "afii10062": "\u040E", + "afii10063": "\uF6C4", + "afii10064": "\uF6C5", + "afii10065": "\u0430", + "afii10066": "\u0431", + "afii10067": "\u0432", + "afii10068": "\u0433", + "afii10069": "\u0434", + "afii10070": "\u0435", + "afii10071": "\u0451", + "afii10072": "\u0436", + "afii10073": "\u0437", + "afii10074": "\u0438", + "afii10075": "\u0439", + "afii10076": "\u043A", + "afii10077": "\u043B", + "afii10078": "\u043C", + "afii10079": "\u043D", + "afii10080": "\u043E", + "afii10081": "\u043F", + "afii10082": "\u0440", + "afii10083": "\u0441", + "afii10084": "\u0442", + "afii10085": "\u0443", + "afii10086": "\u0444", + "afii10087": "\u0445", + "afii10088": "\u0446", + "afii10089": "\u0447", + "afii10090": "\u0448", + "afii10091": "\u0449", + "afii10092": "\u044A", + "afii10093": "\u044B", + "afii10094": "\u044C", + "afii10095": "\u044D", + "afii10096": "\u044E", + "afii10097": "\u044F", + "afii10098": "\u0491", + "afii10099": "\u0452", + "afii10100": "\u0453", + "afii10101": "\u0454", + "afii10102": "\u0455", + "afii10103": "\u0456", + "afii10104": "\u0457", + "afii10105": "\u0458", + "afii10106": "\u0459", + "afii10107": "\u045A", + "afii10108": "\u045B", + "afii10109": "\u045C", + "afii10110": "\u045E", + "afii10145": "\u040F", + "afii10146": "\u0462", + "afii10147": "\u0472", + "afii10148": "\u0474", + "afii10192": "\uF6C6", + "afii10193": "\u045F", + "afii10194": "\u0463", + "afii10195": "\u0473", + "afii10196": "\u0475", + "afii10831": "\uF6C7", + "afii10832": "\uF6C8", + "afii10846": "\u04D9", + "afii299": "\u200E", + "afii300": "\u200F", + "afii301": "\u200D", + "afii57381": "\u066A", + "afii57388": "\u060C", + "afii57392": "\u0660", + "afii57393": "\u0661", + "afii57394": "\u0662", + "afii57395": "\u0663", + "afii57396": "\u0664", + "afii57397": "\u0665", + "afii57398": "\u0666", + "afii57399": "\u0667", + "afii57400": "\u0668", + "afii57401": "\u0669", + "afii57403": "\u061B", + "afii57407": "\u061F", + "afii57409": "\u0621", + "afii57410": "\u0622", + "afii57411": "\u0623", + "afii57412": "\u0624", + "afii57413": "\u0625", + "afii57414": "\u0626", + "afii57415": "\u0627", + "afii57416": "\u0628", + "afii57417": "\u0629", + "afii57418": "\u062A", + "afii57419": "\u062B", + "afii57420": "\u062C", + "afii57421": "\u062D", + "afii57422": "\u062E", + "afii57423": "\u062F", + "afii57424": "\u0630", + "afii57425": "\u0631", + "afii57426": "\u0632", + "afii57427": "\u0633", + "afii57428": "\u0634", + "afii57429": "\u0635", + "afii57430": "\u0636", + "afii57431": "\u0637", + "afii57432": "\u0638", + "afii57433": "\u0639", + "afii57434": "\u063A", + "afii57440": "\u0640", + "afii57441": "\u0641", + "afii57442": "\u0642", + "afii57443": "\u0643", + "afii57444": "\u0644", + "afii57445": "\u0645", + "afii57446": "\u0646", + "afii57448": "\u0648", + "afii57449": "\u0649", + "afii57450": "\u064A", + "afii57451": "\u064B", + "afii57452": "\u064C", + "afii57453": "\u064D", + "afii57454": "\u064E", + "afii57455": "\u064F", + "afii57456": "\u0650", + "afii57457": "\u0651", + "afii57458": "\u0652", + "afii57470": "\u0647", + "afii57505": "\u06A4", + "afii57506": "\u067E", + "afii57507": "\u0686", + "afii57508": "\u0698", + "afii57509": "\u06AF", + "afii57511": "\u0679", + "afii57512": "\u0688", + "afii57513": "\u0691", + "afii57514": "\u06BA", + "afii57519": "\u06D2", + "afii57534": "\u06D5", + "afii57636": "\u20AA", + "afii57645": "\u05BE", + "afii57658": "\u05C3", + "afii57664": "\u05D0", + "afii57665": "\u05D1", + "afii57666": "\u05D2", + "afii57667": "\u05D3", + "afii57668": "\u05D4", + "afii57669": "\u05D5", + "afii57670": "\u05D6", + "afii57671": "\u05D7", + "afii57672": "\u05D8", + "afii57673": "\u05D9", + "afii57674": "\u05DA", + "afii57675": "\u05DB", + "afii57676": "\u05DC", + "afii57677": "\u05DD", + "afii57678": "\u05DE", + "afii57679": "\u05DF", + "afii57680": "\u05E0", + "afii57681": "\u05E1", + "afii57682": "\u05E2", + "afii57683": "\u05E3", + "afii57684": "\u05E4", + "afii57685": "\u05E5", + "afii57686": "\u05E6", + "afii57687": "\u05E7", + "afii57688": "\u05E8", + "afii57689": "\u05E9", + "afii57690": "\u05EA", + "afii57694": "\uFB2A", + "afii57695": "\uFB2B", + "afii57700": "\uFB4B", + "afii57705": "\uFB1F", + "afii57716": "\u05F0", + "afii57717": "\u05F1", + "afii57718": "\u05F2", + "afii57723": "\uFB35", + "afii57793": "\u05B4", + "afii57794": "\u05B5", + "afii57795": "\u05B6", + "afii57796": "\u05BB", + "afii57797": "\u05B8", + "afii57798": "\u05B7", + "afii57799": "\u05B0", + "afii57800": "\u05B2", + "afii57801": "\u05B1", + "afii57802": "\u05B3", + "afii57803": "\u05C2", + "afii57804": "\u05C1", + "afii57806": "\u05B9", + "afii57807": "\u05BC", + "afii57839": "\u05BD", + "afii57841": "\u05BF", + "afii57842": "\u05C0", + "afii57929": "\u02BC", + "afii61248": "\u2105", + "afii61289": "\u2113", + "afii61352": "\u2116", + "afii61573": "\u202C", + "afii61574": "\u202D", + "afii61575": "\u202E", + "afii61664": "\u200C", + "afii63167": "\u066D", + "afii64937": "\u02BD", + "agrave": "\u00E0", + "agujarati": "\u0A85", + "agurmukhi": "\u0A05", + "ahiragana": "\u3042", + "ahookabove": "\u1EA3", + "aibengali": "\u0990", + "aibopomofo": "\u311E", + "aideva": "\u0910", + "aiecyrillic": "\u04D5", + "aigujarati": "\u0A90", + "aigurmukhi": "\u0A10", + "aimatragurmukhi": "\u0A48", + "ainarabic": "\u0639", + "ainfinalarabic": "\uFECA", + "aininitialarabic": "\uFECB", + "ainmedialarabic": "\uFECC", + "ainvertedbreve": "\u0203", + "aivowelsignbengali": "\u09C8", + "aivowelsigndeva": "\u0948", + "aivowelsigngujarati": "\u0AC8", + "akatakana": "\u30A2", + "akatakanahalfwidth": "\uFF71", + "akorean": "\u314F", + "alef": "\u05D0", + "alefarabic": "\u0627", + "alefdageshhebrew": "\uFB30", + "aleffinalarabic": "\uFE8E", + "alefhamzaabovearabic": "\u0623", + "alefhamzaabovefinalarabic": "\uFE84", + "alefhamzabelowarabic": "\u0625", + "alefhamzabelowfinalarabic": "\uFE88", + "alefhebrew": "\u05D0", + "aleflamedhebrew": "\uFB4F", + "alefmaddaabovearabic": "\u0622", + "alefmaddaabovefinalarabic": "\uFE82", + "alefmaksuraarabic": "\u0649", + "alefmaksurafinalarabic": "\uFEF0", + "alefmaksurainitialarabic": "\uFEF3", + "alefmaksuramedialarabic": "\uFEF4", + "alefpatahhebrew": "\uFB2E", + "alefqamatshebrew": "\uFB2F", + "aleph": "\u2135", + "allequal": "\u224C", + "alpha": "\u03B1", + "alphatonos": "\u03AC", + "amacron": "\u0101", + "amonospace": "\uFF41", + "ampersand": "\u0026", + "ampersandmonospace": "\uFF06", + "ampersandsmall": "\uF726", + "amsquare": "\u33C2", + "anbopomofo": "\u3122", + "angbopomofo": "\u3124", + "angkhankhuthai": "\u0E5A", + "angle": "\u2220", + "anglebracketleft": "\u3008", + "anglebracketleftvertical": "\uFE3F", + "anglebracketright": "\u3009", + "anglebracketrightvertical": "\uFE40", + "angleleft": "\u2329", + "angleright": "\u232A", + "angstrom": "\u212B", + "anoteleia": "\u0387", + "anudattadeva": "\u0952", + "anusvarabengali": "\u0982", + "anusvaradeva": "\u0902", + "anusvaragujarati": "\u0A82", + "aogonek": "\u0105", + "apaatosquare": "\u3300", + "aparen": "\u249C", + "apostrophearmenian": "\u055A", + "apostrophemod": "\u02BC", + "apple": "\uF8FF", + "approaches": "\u2250", + "approxequal": "\u2248", + "approxequalorimage": "\u2252", + "approximatelyequal": "\u2245", + "araeaekorean": "\u318E", + "araeakorean": "\u318D", + "arc": "\u2312", + "arighthalfring": "\u1E9A", + "aring": "\u00E5", + "aringacute": "\u01FB", + "aringbelow": "\u1E01", + "arrowboth": "\u2194", + "arrowdashdown": "\u21E3", + "arrowdashleft": "\u21E0", + "arrowdashright": "\u21E2", + "arrowdashup": "\u21E1", + "arrowdblboth": "\u21D4", + "arrowdbldown": "\u21D3", + "arrowdblleft": "\u21D0", + "arrowdblright": "\u21D2", + "arrowdblup": "\u21D1", + "arrowdown": "\u2193", + "arrowdownleft": "\u2199", + "arrowdownright": "\u2198", + "arrowdownwhite": "\u21E9", + "arrowheaddownmod": "\u02C5", + "arrowheadleftmod": "\u02C2", + "arrowheadrightmod": "\u02C3", + "arrowheadupmod": "\u02C4", + "arrowhorizex": "\uF8E7", + "arrowleft": "\u2190", + "arrowleftdbl": "\u21D0", + "arrowleftdblstroke": "\u21CD", + "arrowleftoverright": "\u21C6", + "arrowleftwhite": "\u21E6", + "arrowright": "\u2192", + "arrowrightdblstroke": "\u21CF", + "arrowrightheavy": "\u279E", + "arrowrightoverleft": "\u21C4", + "arrowrightwhite": "\u21E8", + "arrowtableft": "\u21E4", + "arrowtabright": "\u21E5", + "arrowup": "\u2191", + "arrowupdn": "\u2195", + "arrowupdnbse": "\u21A8", + "arrowupdownbase": "\u21A8", + "arrowupleft": "\u2196", + "arrowupleftofdown": "\u21C5", + "arrowupright": "\u2197", + "arrowupwhite": "\u21E7", + "arrowvertex": "\uF8E6", + "asciicircum": "\u005E", + "asciicircummonospace": "\uFF3E", + "asciitilde": "\u007E", + "asciitildemonospace": "\uFF5E", + "ascript": "\u0251", + "ascriptturned": "\u0252", + "asmallhiragana": "\u3041", + "asmallkatakana": "\u30A1", + "asmallkatakanahalfwidth": "\uFF67", + "asterisk": "\u002A", + "asteriskaltonearabic": "\u066D", + "asteriskarabic": "\u066D", + "asteriskmath": "\u2217", + "asteriskmonospace": "\uFF0A", + "asterisksmall": "\uFE61", + "asterism": "\u2042", + "asuperior": "\uF6E9", + "asymptoticallyequal": "\u2243", + "at": "\u0040", + "atilde": "\u00E3", + "atmonospace": "\uFF20", + "atsmall": "\uFE6B", + "aturned": "\u0250", + "aubengali": "\u0994", + "aubopomofo": "\u3120", + "audeva": "\u0914", + "augujarati": "\u0A94", + "augurmukhi": "\u0A14", + "aulengthmarkbengali": "\u09D7", + "aumatragurmukhi": "\u0A4C", + "auvowelsignbengali": "\u09CC", + "auvowelsigndeva": "\u094C", + "auvowelsigngujarati": "\u0ACC", + "avagrahadeva": "\u093D", + "aybarmenian": "\u0561", + "ayin": "\u05E2", + "ayinaltonehebrew": "\uFB20", + "ayinhebrew": "\u05E2", + "b": "\u0062", + "babengali": "\u09AC", + "backslash": "\u005C", + "backslashmonospace": "\uFF3C", + "badeva": "\u092C", + "bagujarati": "\u0AAC", + "bagurmukhi": "\u0A2C", + "bahiragana": "\u3070", + "bahtthai": "\u0E3F", + "bakatakana": "\u30D0", + "bar": "\u007C", + "barmonospace": "\uFF5C", + "bbopomofo": "\u3105", + "bcircle": "\u24D1", + "bdotaccent": "\u1E03", + "bdotbelow": "\u1E05", + "beamedsixteenthnotes": "\u266C", + "because": "\u2235", + "becyrillic": "\u0431", + "beharabic": "\u0628", + "behfinalarabic": "\uFE90", + "behinitialarabic": "\uFE91", + "behiragana": "\u3079", + "behmedialarabic": "\uFE92", + "behmeeminitialarabic": "\uFC9F", + "behmeemisolatedarabic": "\uFC08", + "behnoonfinalarabic": "\uFC6D", + "bekatakana": "\u30D9", + "benarmenian": "\u0562", + "bet": "\u05D1", + "beta": "\u03B2", + "betasymbolgreek": "\u03D0", + "betdagesh": "\uFB31", + "betdageshhebrew": "\uFB31", + "bethebrew": "\u05D1", + "betrafehebrew": "\uFB4C", + "bhabengali": "\u09AD", + "bhadeva": "\u092D", + "bhagujarati": "\u0AAD", + "bhagurmukhi": "\u0A2D", + "bhook": "\u0253", + "bihiragana": "\u3073", + "bikatakana": "\u30D3", + "bilabialclick": "\u0298", + "bindigurmukhi": "\u0A02", + "birusquare": "\u3331", + "blackcircle": "\u25CF", + "blackdiamond": "\u25C6", + "blackdownpointingtriangle": "\u25BC", + "blackleftpointingpointer": "\u25C4", + "blackleftpointingtriangle": "\u25C0", + "blacklenticularbracketleft": "\u3010", + "blacklenticularbracketleftvertical": "\uFE3B", + "blacklenticularbracketright": "\u3011", + "blacklenticularbracketrightvertical": "\uFE3C", + "blacklowerlefttriangle": "\u25E3", + "blacklowerrighttriangle": "\u25E2", + "blackrectangle": "\u25AC", + "blackrightpointingpointer": "\u25BA", + "blackrightpointingtriangle": "\u25B6", + "blacksmallsquare": "\u25AA", + "blacksmilingface": "\u263B", + "blacksquare": "\u25A0", + "blackstar": "\u2605", + "blackupperlefttriangle": "\u25E4", + "blackupperrighttriangle": "\u25E5", + "blackuppointingsmalltriangle": "\u25B4", + "blackuppointingtriangle": "\u25B2", + "blank": "\u2423", + "blinebelow": "\u1E07", + "block": "\u2588", + "bmonospace": "\uFF42", + "bobaimaithai": "\u0E1A", + "bohiragana": "\u307C", + "bokatakana": "\u30DC", + "bparen": "\u249D", + "bqsquare": "\u33C3", + "braceex": "\uF8F4", + "braceleft": "\u007B", + "braceleftbt": "\uF8F3", + "braceleftmid": "\uF8F2", + "braceleftmonospace": "\uFF5B", + "braceleftsmall": "\uFE5B", + "bracelefttp": "\uF8F1", + "braceleftvertical": "\uFE37", + "braceright": "\u007D", + "bracerightbt": "\uF8FE", + "bracerightmid": "\uF8FD", + "bracerightmonospace": "\uFF5D", + "bracerightsmall": "\uFE5C", + "bracerighttp": "\uF8FC", + "bracerightvertical": "\uFE38", + "bracketleft": "\u005B", + "bracketleftbt": "\uF8F0", + "bracketleftex": "\uF8EF", + "bracketleftmonospace": "\uFF3B", + "bracketlefttp": "\uF8EE", + "bracketright": "\u005D", + "bracketrightbt": "\uF8FB", + "bracketrightex": "\uF8FA", + "bracketrightmonospace": "\uFF3D", + "bracketrighttp": "\uF8F9", + "breve": "\u02D8", + "brevebelowcmb": "\u032E", + "brevecmb": "\u0306", + "breveinvertedbelowcmb": "\u032F", + "breveinvertedcmb": "\u0311", + "breveinverteddoublecmb": "\u0361", + "bridgebelowcmb": "\u032A", + "bridgeinvertedbelowcmb": "\u033A", + "brokenbar": "\u00A6", + "bstroke": "\u0180", + "bsuperior": "\uF6EA", + "btopbar": "\u0183", + "buhiragana": "\u3076", + "bukatakana": "\u30D6", + "bullet": "\u2022", + "bulletinverse": "\u25D8", + "bulletoperator": "\u2219", + "bullseye": "\u25CE", + "c": "\u0063", + "caarmenian": "\u056E", + "cabengali": "\u099A", + "cacute": "\u0107", + "cadeva": "\u091A", + "cagujarati": "\u0A9A", + "cagurmukhi": "\u0A1A", + "calsquare": "\u3388", + "candrabindubengali": "\u0981", + "candrabinducmb": "\u0310", + "candrabindudeva": "\u0901", + "candrabindugujarati": "\u0A81", + "capslock": "\u21EA", + "careof": "\u2105", + "caron": "\u02C7", + "caronbelowcmb": "\u032C", + "caroncmb": "\u030C", + "carriagereturn": "\u21B5", + "cbopomofo": "\u3118", + "ccaron": "\u010D", + "ccedilla": "\u00E7", + "ccedillaacute": "\u1E09", + "ccircle": "\u24D2", + "ccircumflex": "\u0109", + "ccurl": "\u0255", + "cdot": "\u010B", + "cdotaccent": "\u010B", + "cdsquare": "\u33C5", + "cedilla": "\u00B8", + "cedillacmb": "\u0327", + "cent": "\u00A2", + "centigrade": "\u2103", + "centinferior": "\uF6DF", + "centmonospace": "\uFFE0", + "centoldstyle": "\uF7A2", + "centsuperior": "\uF6E0", + "chaarmenian": "\u0579", + "chabengali": "\u099B", + "chadeva": "\u091B", + "chagujarati": "\u0A9B", + "chagurmukhi": "\u0A1B", + "chbopomofo": "\u3114", + "cheabkhasiancyrillic": "\u04BD", + "checkmark": "\u2713", + "checyrillic": "\u0447", + "chedescenderabkhasiancyrillic": "\u04BF", + "chedescendercyrillic": "\u04B7", + "chedieresiscyrillic": "\u04F5", + "cheharmenian": "\u0573", + "chekhakassiancyrillic": "\u04CC", + "cheverticalstrokecyrillic": "\u04B9", + "chi": "\u03C7", + "chieuchacirclekorean": "\u3277", + "chieuchaparenkorean": "\u3217", + "chieuchcirclekorean": "\u3269", + "chieuchkorean": "\u314A", + "chieuchparenkorean": "\u3209", + "chochangthai": "\u0E0A", + "chochanthai": "\u0E08", + "chochingthai": "\u0E09", + "chochoethai": "\u0E0C", + "chook": "\u0188", + "cieucacirclekorean": "\u3276", + "cieucaparenkorean": "\u3216", + "cieuccirclekorean": "\u3268", + "cieuckorean": "\u3148", + "cieucparenkorean": "\u3208", + "cieucuparenkorean": "\u321C", + "circle": "\u25CB", + "circlemultiply": "\u2297", + "circleot": "\u2299", + "circleplus": "\u2295", + "circlepostalmark": "\u3036", + "circlewithlefthalfblack": "\u25D0", + "circlewithrighthalfblack": "\u25D1", + "circumflex": "\u02C6", + "circumflexbelowcmb": "\u032D", + "circumflexcmb": "\u0302", + "clear": "\u2327", + "clickalveolar": "\u01C2", + "clickdental": "\u01C0", + "clicklateral": "\u01C1", + "clickretroflex": "\u01C3", + "club": "\u2663", + "clubsuitblack": "\u2663", + "clubsuitwhite": "\u2667", + "cmcubedsquare": "\u33A4", + "cmonospace": "\uFF43", + "cmsquaredsquare": "\u33A0", + "coarmenian": "\u0581", + "colon": "\u003A", + "colonmonetary": "\u20A1", + "colonmonospace": "\uFF1A", + "colonsign": "\u20A1", + "colonsmall": "\uFE55", + "colontriangularhalfmod": "\u02D1", + "colontriangularmod": "\u02D0", + "comma": "\u002C", + "commaabovecmb": "\u0313", + "commaaboverightcmb": "\u0315", + "commaaccent": "\uF6C3", + "commaarabic": "\u060C", + "commaarmenian": "\u055D", + "commainferior": "\uF6E1", + "commamonospace": "\uFF0C", + "commareversedabovecmb": "\u0314", + "commareversedmod": "\u02BD", + "commasmall": "\uFE50", + "commasuperior": "\uF6E2", + "commaturnedabovecmb": "\u0312", + "commaturnedmod": "\u02BB", + "compass": "\u263C", + "congruent": "\u2245", + "contourintegral": "\u222E", + "control": "\u2303", + "controlACK": "\u0006", + "controlBEL": "\u0007", + "controlBS": "\u0008", + "controlCAN": "\u0018", + "controlCR": "\u000D", + "controlDC1": "\u0011", + "controlDC2": "\u0012", + "controlDC3": "\u0013", + "controlDC4": "\u0014", + "controlDEL": "\u007F", + "controlDLE": "\u0010", + "controlEM": "\u0019", + "controlENQ": "\u0005", + "controlEOT": "\u0004", + "controlESC": "\u001B", + "controlETB": "\u0017", + "controlETX": "\u0003", + "controlFF": "\u000C", + "controlFS": "\u001C", + "controlGS": "\u001D", + "controlHT": "\u0009", + "controlLF": "\u000A", + "controlNAK": "\u0015", + "controlRS": "\u001E", + "controlSI": "\u000F", + "controlSO": "\u000E", + "controlSOT": "\u0002", + "controlSTX": "\u0001", + "controlSUB": "\u001A", + "controlSYN": "\u0016", + "controlUS": "\u001F", + "controlVT": "\u000B", + "copyright": "\u00A9", + "copyrightsans": "\uF8E9", + "copyrightserif": "\uF6D9", + "cornerbracketleft": "\u300C", + "cornerbracketlefthalfwidth": "\uFF62", + "cornerbracketleftvertical": "\uFE41", + "cornerbracketright": "\u300D", + "cornerbracketrighthalfwidth": "\uFF63", + "cornerbracketrightvertical": "\uFE42", + "corporationsquare": "\u337F", + "cosquare": "\u33C7", + "coverkgsquare": "\u33C6", + "cparen": "\u249E", + "cruzeiro": "\u20A2", + "cstretched": "\u0297", + "curlyand": "\u22CF", + "curlyor": "\u22CE", + "currency": "\u00A4", + "cyrBreve": "\uF6D1", + "cyrFlex": "\uF6D2", + "cyrbreve": "\uF6D4", + "cyrflex": "\uF6D5", + "d": "\u0064", + "daarmenian": "\u0564", + "dabengali": "\u09A6", + "dadarabic": "\u0636", + "dadeva": "\u0926", + "dadfinalarabic": "\uFEBE", + "dadinitialarabic": "\uFEBF", + "dadmedialarabic": "\uFEC0", + "dagesh": "\u05BC", + "dageshhebrew": "\u05BC", + "dagger": "\u2020", + "daggerdbl": "\u2021", + "dagujarati": "\u0AA6", + "dagurmukhi": "\u0A26", + "dahiragana": "\u3060", + "dakatakana": "\u30C0", + "dalarabic": "\u062F", + "dalet": "\u05D3", + "daletdagesh": "\uFB33", + "daletdageshhebrew": "\uFB33", + "dalethatafpatah": "\u05D3\u05B2", + "dalethatafpatahhebrew": "\u05D3\u05B2", + "dalethatafsegol": "\u05D3\u05B1", + "dalethatafsegolhebrew": "\u05D3\u05B1", + "dalethebrew": "\u05D3", + "dalethiriq": "\u05D3\u05B4", + "dalethiriqhebrew": "\u05D3\u05B4", + "daletholam": "\u05D3\u05B9", + "daletholamhebrew": "\u05D3\u05B9", + "daletpatah": "\u05D3\u05B7", + "daletpatahhebrew": "\u05D3\u05B7", + "daletqamats": "\u05D3\u05B8", + "daletqamatshebrew": "\u05D3\u05B8", + "daletqubuts": "\u05D3\u05BB", + "daletqubutshebrew": "\u05D3\u05BB", + "daletsegol": "\u05D3\u05B6", + "daletsegolhebrew": "\u05D3\u05B6", + "daletsheva": "\u05D3\u05B0", + "daletshevahebrew": "\u05D3\u05B0", + "dalettsere": "\u05D3\u05B5", + "dalettserehebrew": "\u05D3\u05B5", + "dalfinalarabic": "\uFEAA", + "dammaarabic": "\u064F", + "dammalowarabic": "\u064F", + "dammatanaltonearabic": "\u064C", + "dammatanarabic": "\u064C", + "danda": "\u0964", + "dargahebrew": "\u05A7", + "dargalefthebrew": "\u05A7", + "dasiapneumatacyrilliccmb": "\u0485", + "dblGrave": "\uF6D3", + "dblanglebracketleft": "\u300A", + "dblanglebracketleftvertical": "\uFE3D", + "dblanglebracketright": "\u300B", + "dblanglebracketrightvertical": "\uFE3E", + "dblarchinvertedbelowcmb": "\u032B", + "dblarrowleft": "\u21D4", + "dblarrowright": "\u21D2", + "dbldanda": "\u0965", + "dblgrave": "\uF6D6", + "dblgravecmb": "\u030F", + "dblintegral": "\u222C", + "dbllowline": "\u2017", + "dbllowlinecmb": "\u0333", + "dbloverlinecmb": "\u033F", + "dblprimemod": "\u02BA", + "dblverticalbar": "\u2016", + "dblverticallineabovecmb": "\u030E", + "dbopomofo": "\u3109", + "dbsquare": "\u33C8", + "dcaron": "\u010F", + "dcedilla": "\u1E11", + "dcircle": "\u24D3", + "dcircumflexbelow": "\u1E13", + "dcroat": "\u0111", + "ddabengali": "\u09A1", + "ddadeva": "\u0921", + "ddagujarati": "\u0AA1", + "ddagurmukhi": "\u0A21", + "ddalarabic": "\u0688", + "ddalfinalarabic": "\uFB89", + "dddhadeva": "\u095C", + "ddhabengali": "\u09A2", + "ddhadeva": "\u0922", + "ddhagujarati": "\u0AA2", + "ddhagurmukhi": "\u0A22", + "ddotaccent": "\u1E0B", + "ddotbelow": "\u1E0D", + "decimalseparatorarabic": "\u066B", + "decimalseparatorpersian": "\u066B", + "decyrillic": "\u0434", + "degree": "\u00B0", + "dehihebrew": "\u05AD", + "dehiragana": "\u3067", + "deicoptic": "\u03EF", + "dekatakana": "\u30C7", + "deleteleft": "\u232B", + "deleteright": "\u2326", + "delta": "\u03B4", + "deltaturned": "\u018D", + "denominatorminusonenumeratorbengali": "\u09F8", + "dezh": "\u02A4", + "dhabengali": "\u09A7", + "dhadeva": "\u0927", + "dhagujarati": "\u0AA7", + "dhagurmukhi": "\u0A27", + "dhook": "\u0257", + "dialytikatonos": "\u0385", + "dialytikatonoscmb": "\u0344", + "diamond": "\u2666", + "diamondsuitwhite": "\u2662", + "dieresis": "\u00A8", + "dieresisacute": "\uF6D7", + "dieresisbelowcmb": "\u0324", + "dieresiscmb": "\u0308", + "dieresisgrave": "\uF6D8", + "dieresistonos": "\u0385", + "dihiragana": "\u3062", + "dikatakana": "\u30C2", + "dittomark": "\u3003", + "divide": "\u00F7", + "divides": "\u2223", + "divisionslash": "\u2215", + "djecyrillic": "\u0452", + "dkshade": "\u2593", + "dlinebelow": "\u1E0F", + "dlsquare": "\u3397", + "dmacron": "\u0111", + "dmonospace": "\uFF44", + "dnblock": "\u2584", + "dochadathai": "\u0E0E", + "dodekthai": "\u0E14", + "dohiragana": "\u3069", + "dokatakana": "\u30C9", + "dollar": "\u0024", + "dollarinferior": "\uF6E3", + "dollarmonospace": "\uFF04", + "dollaroldstyle": "\uF724", + "dollarsmall": "\uFE69", + "dollarsuperior": "\uF6E4", + "dong": "\u20AB", + "dorusquare": "\u3326", + "dotaccent": "\u02D9", + "dotaccentcmb": "\u0307", + "dotbelowcmb": "\u0323", + "dotbelowcomb": "\u0323", + "dotkatakana": "\u30FB", + "dotlessi": "\u0131", + "dotlessj": "\uF6BE", + "dotlessjstrokehook": "\u0284", + "dotmath": "\u22C5", + "dottedcircle": "\u25CC", + "doubleyodpatah": "\uFB1F", + "doubleyodpatahhebrew": "\uFB1F", + "downtackbelowcmb": "\u031E", + "downtackmod": "\u02D5", + "dparen": "\u249F", + "dsuperior": "\uF6EB", + "dtail": "\u0256", + "dtopbar": "\u018C", + "duhiragana": "\u3065", + "dukatakana": "\u30C5", + "dz": "\u01F3", + "dzaltone": "\u02A3", + "dzcaron": "\u01C6", + "dzcurl": "\u02A5", + "dzeabkhasiancyrillic": "\u04E1", + "dzecyrillic": "\u0455", + "dzhecyrillic": "\u045F", + "e": "\u0065", + "eacute": "\u00E9", + "earth": "\u2641", + "ebengali": "\u098F", + "ebopomofo": "\u311C", + "ebreve": "\u0115", + "ecandradeva": "\u090D", + "ecandragujarati": "\u0A8D", + "ecandravowelsigndeva": "\u0945", + "ecandravowelsigngujarati": "\u0AC5", + "ecaron": "\u011B", + "ecedillabreve": "\u1E1D", + "echarmenian": "\u0565", + "echyiwnarmenian": "\u0587", + "ecircle": "\u24D4", + "ecircumflex": "\u00EA", + "ecircumflexacute": "\u1EBF", + "ecircumflexbelow": "\u1E19", + "ecircumflexdotbelow": "\u1EC7", + "ecircumflexgrave": "\u1EC1", + "ecircumflexhookabove": "\u1EC3", + "ecircumflextilde": "\u1EC5", + "ecyrillic": "\u0454", + "edblgrave": "\u0205", + "edeva": "\u090F", + "edieresis": "\u00EB", + "edot": "\u0117", + "edotaccent": "\u0117", + "edotbelow": "\u1EB9", + "eegurmukhi": "\u0A0F", + "eematragurmukhi": "\u0A47", + "efcyrillic": "\u0444", + "egrave": "\u00E8", + "egujarati": "\u0A8F", + "eharmenian": "\u0567", + "ehbopomofo": "\u311D", + "ehiragana": "\u3048", + "ehookabove": "\u1EBB", + "eibopomofo": "\u311F", + "eight": "\u0038", + "eightarabic": "\u0668", + "eightbengali": "\u09EE", + "eightcircle": "\u2467", + "eightcircleinversesansserif": "\u2791", + "eightdeva": "\u096E", + "eighteencircle": "\u2471", + "eighteenparen": "\u2485", + "eighteenperiod": "\u2499", + "eightgujarati": "\u0AEE", + "eightgurmukhi": "\u0A6E", + "eighthackarabic": "\u0668", + "eighthangzhou": "\u3028", + "eighthnotebeamed": "\u266B", + "eightideographicparen": "\u3227", + "eightinferior": "\u2088", + "eightmonospace": "\uFF18", + "eightoldstyle": "\uF738", + "eightparen": "\u247B", + "eightperiod": "\u248F", + "eightpersian": "\u06F8", + "eightroman": "\u2177", + "eightsuperior": "\u2078", + "eightthai": "\u0E58", + "einvertedbreve": "\u0207", + "eiotifiedcyrillic": "\u0465", + "ekatakana": "\u30A8", + "ekatakanahalfwidth": "\uFF74", + "ekonkargurmukhi": "\u0A74", + "ekorean": "\u3154", + "elcyrillic": "\u043B", + "element": "\u2208", + "elevencircle": "\u246A", + "elevenparen": "\u247E", + "elevenperiod": "\u2492", + "elevenroman": "\u217A", + "ellipsis": "\u2026", + "ellipsisvertical": "\u22EE", + "emacron": "\u0113", + "emacronacute": "\u1E17", + "emacrongrave": "\u1E15", + "emcyrillic": "\u043C", + "emdash": "\u2014", + "emdashvertical": "\uFE31", + "emonospace": "\uFF45", + "emphasismarkarmenian": "\u055B", + "emptyset": "\u2205", + "enbopomofo": "\u3123", + "encyrillic": "\u043D", + "endash": "\u2013", + "endashvertical": "\uFE32", + "endescendercyrillic": "\u04A3", + "eng": "\u014B", + "engbopomofo": "\u3125", + "enghecyrillic": "\u04A5", + "enhookcyrillic": "\u04C8", + "enspace": "\u2002", + "eogonek": "\u0119", + "eokorean": "\u3153", + "eopen": "\u025B", + "eopenclosed": "\u029A", + "eopenreversed": "\u025C", + "eopenreversedclosed": "\u025E", + "eopenreversedhook": "\u025D", + "eparen": "\u24A0", + "epsilon": "\u03B5", + "epsilontonos": "\u03AD", + "equal": "\u003D", + "equalmonospace": "\uFF1D", + "equalsmall": "\uFE66", + "equalsuperior": "\u207C", + "equivalence": "\u2261", + "erbopomofo": "\u3126", + "ercyrillic": "\u0440", + "ereversed": "\u0258", + "ereversedcyrillic": "\u044D", + "escyrillic": "\u0441", + "esdescendercyrillic": "\u04AB", + "esh": "\u0283", + "eshcurl": "\u0286", + "eshortdeva": "\u090E", + "eshortvowelsigndeva": "\u0946", + "eshreversedloop": "\u01AA", + "eshsquatreversed": "\u0285", + "esmallhiragana": "\u3047", + "esmallkatakana": "\u30A7", + "esmallkatakanahalfwidth": "\uFF6A", + "estimated": "\u212E", + "esuperior": "\uF6EC", + "eta": "\u03B7", + "etarmenian": "\u0568", + "etatonos": "\u03AE", + "eth": "\u00F0", + "etilde": "\u1EBD", + "etildebelow": "\u1E1B", + "etnahtafoukhhebrew": "\u0591", + "etnahtafoukhlefthebrew": "\u0591", + "etnahtahebrew": "\u0591", + "etnahtalefthebrew": "\u0591", + "eturned": "\u01DD", + "eukorean": "\u3161", + "euro": "\u20AC", + "evowelsignbengali": "\u09C7", + "evowelsigndeva": "\u0947", + "evowelsigngujarati": "\u0AC7", + "exclam": "\u0021", + "exclamarmenian": "\u055C", + "exclamdbl": "\u203C", + "exclamdown": "\u00A1", + "exclamdownsmall": "\uF7A1", + "exclammonospace": "\uFF01", + "exclamsmall": "\uF721", + "existential": "\u2203", + "ezh": "\u0292", + "ezhcaron": "\u01EF", + "ezhcurl": "\u0293", + "ezhreversed": "\u01B9", + "ezhtail": "\u01BA", + "f": "\u0066", + "fadeva": "\u095E", + "fagurmukhi": "\u0A5E", + "fahrenheit": "\u2109", + "fathaarabic": "\u064E", + "fathalowarabic": "\u064E", + "fathatanarabic": "\u064B", + "fbopomofo": "\u3108", + "fcircle": "\u24D5", + "fdotaccent": "\u1E1F", + "feharabic": "\u0641", + "feharmenian": "\u0586", + "fehfinalarabic": "\uFED2", + "fehinitialarabic": "\uFED3", + "fehmedialarabic": "\uFED4", + "feicoptic": "\u03E5", + "female": "\u2640", + "ff": "\uFB00", + "ffi": "\uFB03", + "ffl": "\uFB04", + "fi": "\uFB01", + "fifteencircle": "\u246E", + "fifteenparen": "\u2482", + "fifteenperiod": "\u2496", + "figuredash": "\u2012", + "filledbox": "\u25A0", + "filledrect": "\u25AC", + "finalkaf": "\u05DA", + "finalkafdagesh": "\uFB3A", + "finalkafdageshhebrew": "\uFB3A", + "finalkafhebrew": "\u05DA", + "finalkafqamats": "\u05DA\u05B8", + "finalkafqamatshebrew": "\u05DA\u05B8", + "finalkafsheva": "\u05DA\u05B0", + "finalkafshevahebrew": "\u05DA\u05B0", + "finalmem": "\u05DD", + "finalmemhebrew": "\u05DD", + "finalnun": "\u05DF", + "finalnunhebrew": "\u05DF", + "finalpe": "\u05E3", + "finalpehebrew": "\u05E3", + "finaltsadi": "\u05E5", + "finaltsadihebrew": "\u05E5", + "firsttonechinese": "\u02C9", + "fisheye": "\u25C9", + "fitacyrillic": "\u0473", + "five": "\u0035", + "fivearabic": "\u0665", + "fivebengali": "\u09EB", + "fivecircle": "\u2464", + "fivecircleinversesansserif": "\u278E", + "fivedeva": "\u096B", + "fiveeighths": "\u215D", + "fivegujarati": "\u0AEB", + "fivegurmukhi": "\u0A6B", + "fivehackarabic": "\u0665", + "fivehangzhou": "\u3025", + "fiveideographicparen": "\u3224", + "fiveinferior": "\u2085", + "fivemonospace": "\uFF15", + "fiveoldstyle": "\uF735", + "fiveparen": "\u2478", + "fiveperiod": "\u248C", + "fivepersian": "\u06F5", + "fiveroman": "\u2174", + "fivesuperior": "\u2075", + "fivethai": "\u0E55", + "fl": "\uFB02", + "florin": "\u0192", + "fmonospace": "\uFF46", + "fmsquare": "\u3399", + "fofanthai": "\u0E1F", + "fofathai": "\u0E1D", + "fongmanthai": "\u0E4F", + "forall": "\u2200", + "four": "\u0034", + "fourarabic": "\u0664", + "fourbengali": "\u09EA", + "fourcircle": "\u2463", + "fourcircleinversesansserif": "\u278D", + "fourdeva": "\u096A", + "fourgujarati": "\u0AEA", + "fourgurmukhi": "\u0A6A", + "fourhackarabic": "\u0664", + "fourhangzhou": "\u3024", + "fourideographicparen": "\u3223", + "fourinferior": "\u2084", + "fourmonospace": "\uFF14", + "fournumeratorbengali": "\u09F7", + "fouroldstyle": "\uF734", + "fourparen": "\u2477", + "fourperiod": "\u248B", + "fourpersian": "\u06F4", + "fourroman": "\u2173", + "foursuperior": "\u2074", + "fourteencircle": "\u246D", + "fourteenparen": "\u2481", + "fourteenperiod": "\u2495", + "fourthai": "\u0E54", + "fourthtonechinese": "\u02CB", + "fparen": "\u24A1", + "fraction": "\u2044", + "franc": "\u20A3", + "g": "\u0067", + "gabengali": "\u0997", + "gacute": "\u01F5", + "gadeva": "\u0917", + "gafarabic": "\u06AF", + "gaffinalarabic": "\uFB93", + "gafinitialarabic": "\uFB94", + "gafmedialarabic": "\uFB95", + "gagujarati": "\u0A97", + "gagurmukhi": "\u0A17", + "gahiragana": "\u304C", + "gakatakana": "\u30AC", + "gamma": "\u03B3", + "gammalatinsmall": "\u0263", + "gammasuperior": "\u02E0", + "gangiacoptic": "\u03EB", + "gbopomofo": "\u310D", + "gbreve": "\u011F", + "gcaron": "\u01E7", + "gcedilla": "\u0123", + "gcircle": "\u24D6", + "gcircumflex": "\u011D", + "gcommaaccent": "\u0123", + "gdot": "\u0121", + "gdotaccent": "\u0121", + "gecyrillic": "\u0433", + "gehiragana": "\u3052", + "gekatakana": "\u30B2", + "geometricallyequal": "\u2251", + "gereshaccenthebrew": "\u059C", + "gereshhebrew": "\u05F3", + "gereshmuqdamhebrew": "\u059D", + "germandbls": "\u00DF", + "gershayimaccenthebrew": "\u059E", + "gershayimhebrew": "\u05F4", + "getamark": "\u3013", + "ghabengali": "\u0998", + "ghadarmenian": "\u0572", + "ghadeva": "\u0918", + "ghagujarati": "\u0A98", + "ghagurmukhi": "\u0A18", + "ghainarabic": "\u063A", + "ghainfinalarabic": "\uFECE", + "ghaininitialarabic": "\uFECF", + "ghainmedialarabic": "\uFED0", + "ghemiddlehookcyrillic": "\u0495", + "ghestrokecyrillic": "\u0493", + "gheupturncyrillic": "\u0491", + "ghhadeva": "\u095A", + "ghhagurmukhi": "\u0A5A", + "ghook": "\u0260", + "ghzsquare": "\u3393", + "gihiragana": "\u304E", + "gikatakana": "\u30AE", + "gimarmenian": "\u0563", + "gimel": "\u05D2", + "gimeldagesh": "\uFB32", + "gimeldageshhebrew": "\uFB32", + "gimelhebrew": "\u05D2", + "gjecyrillic": "\u0453", + "glottalinvertedstroke": "\u01BE", + "glottalstop": "\u0294", + "glottalstopinverted": "\u0296", + "glottalstopmod": "\u02C0", + "glottalstopreversed": "\u0295", + "glottalstopreversedmod": "\u02C1", + "glottalstopreversedsuperior": "\u02E4", + "glottalstopstroke": "\u02A1", + "glottalstopstrokereversed": "\u02A2", + "gmacron": "\u1E21", + "gmonospace": "\uFF47", + "gohiragana": "\u3054", + "gokatakana": "\u30B4", + "gparen": "\u24A2", + "gpasquare": "\u33AC", + "gradient": "\u2207", + "grave": "\u0060", + "gravebelowcmb": "\u0316", + "gravecmb": "\u0300", + "gravecomb": "\u0300", + "gravedeva": "\u0953", + "gravelowmod": "\u02CE", + "gravemonospace": "\uFF40", + "gravetonecmb": "\u0340", + "greater": "\u003E", + "greaterequal": "\u2265", + "greaterequalorless": "\u22DB", + "greatermonospace": "\uFF1E", + "greaterorequivalent": "\u2273", + "greaterorless": "\u2277", + "greateroverequal": "\u2267", + "greatersmall": "\uFE65", + "gscript": "\u0261", + "gstroke": "\u01E5", + "guhiragana": "\u3050", + "guillemotleft": "\u00AB", + "guillemotright": "\u00BB", + "guilsinglleft": "\u2039", + "guilsinglright": "\u203A", + "gukatakana": "\u30B0", + "guramusquare": "\u3318", + "gysquare": "\u33C9", + "h": "\u0068", + "haabkhasiancyrillic": "\u04A9", + "haaltonearabic": "\u06C1", + "habengali": "\u09B9", + "hadescendercyrillic": "\u04B3", + "hadeva": "\u0939", + "hagujarati": "\u0AB9", + "hagurmukhi": "\u0A39", + "haharabic": "\u062D", + "hahfinalarabic": "\uFEA2", + "hahinitialarabic": "\uFEA3", + "hahiragana": "\u306F", + "hahmedialarabic": "\uFEA4", + "haitusquare": "\u332A", + "hakatakana": "\u30CF", + "hakatakanahalfwidth": "\uFF8A", + "halantgurmukhi": "\u0A4D", + "hamzaarabic": "\u0621", + "hamzadammaarabic": "\u0621\u064F", + "hamzadammatanarabic": "\u0621\u064C", + "hamzafathaarabic": "\u0621\u064E", + "hamzafathatanarabic": "\u0621\u064B", + "hamzalowarabic": "\u0621", + "hamzalowkasraarabic": "\u0621\u0650", + "hamzalowkasratanarabic": "\u0621\u064D", + "hamzasukunarabic": "\u0621\u0652", + "hangulfiller": "\u3164", + "hardsigncyrillic": "\u044A", + "harpoonleftbarbup": "\u21BC", + "harpoonrightbarbup": "\u21C0", + "hasquare": "\u33CA", + "hatafpatah": "\u05B2", + "hatafpatah16": "\u05B2", + "hatafpatah23": "\u05B2", + "hatafpatah2f": "\u05B2", + "hatafpatahhebrew": "\u05B2", + "hatafpatahnarrowhebrew": "\u05B2", + "hatafpatahquarterhebrew": "\u05B2", + "hatafpatahwidehebrew": "\u05B2", + "hatafqamats": "\u05B3", + "hatafqamats1b": "\u05B3", + "hatafqamats28": "\u05B3", + "hatafqamats34": "\u05B3", + "hatafqamatshebrew": "\u05B3", + "hatafqamatsnarrowhebrew": "\u05B3", + "hatafqamatsquarterhebrew": "\u05B3", + "hatafqamatswidehebrew": "\u05B3", + "hatafsegol": "\u05B1", + "hatafsegol17": "\u05B1", + "hatafsegol24": "\u05B1", + "hatafsegol30": "\u05B1", + "hatafsegolhebrew": "\u05B1", + "hatafsegolnarrowhebrew": "\u05B1", + "hatafsegolquarterhebrew": "\u05B1", + "hatafsegolwidehebrew": "\u05B1", + "hbar": "\u0127", + "hbopomofo": "\u310F", + "hbrevebelow": "\u1E2B", + "hcedilla": "\u1E29", + "hcircle": "\u24D7", + "hcircumflex": "\u0125", + "hdieresis": "\u1E27", + "hdotaccent": "\u1E23", + "hdotbelow": "\u1E25", + "he": "\u05D4", + "heart": "\u2665", + "heartsuitblack": "\u2665", + "heartsuitwhite": "\u2661", + "hedagesh": "\uFB34", + "hedageshhebrew": "\uFB34", + "hehaltonearabic": "\u06C1", + "heharabic": "\u0647", + "hehebrew": "\u05D4", + "hehfinalaltonearabic": "\uFBA7", + "hehfinalalttwoarabic": "\uFEEA", + "hehfinalarabic": "\uFEEA", + "hehhamzaabovefinalarabic": "\uFBA5", + "hehhamzaaboveisolatedarabic": "\uFBA4", + "hehinitialaltonearabic": "\uFBA8", + "hehinitialarabic": "\uFEEB", + "hehiragana": "\u3078", + "hehmedialaltonearabic": "\uFBA9", + "hehmedialarabic": "\uFEEC", + "heiseierasquare": "\u337B", + "hekatakana": "\u30D8", + "hekatakanahalfwidth": "\uFF8D", + "hekutaarusquare": "\u3336", + "henghook": "\u0267", + "herutusquare": "\u3339", + "het": "\u05D7", + "hethebrew": "\u05D7", + "hhook": "\u0266", + "hhooksuperior": "\u02B1", + "hieuhacirclekorean": "\u327B", + "hieuhaparenkorean": "\u321B", + "hieuhcirclekorean": "\u326D", + "hieuhkorean": "\u314E", + "hieuhparenkorean": "\u320D", + "hihiragana": "\u3072", + "hikatakana": "\u30D2", + "hikatakanahalfwidth": "\uFF8B", + "hiriq": "\u05B4", + "hiriq14": "\u05B4", + "hiriq21": "\u05B4", + "hiriq2d": "\u05B4", + "hiriqhebrew": "\u05B4", + "hiriqnarrowhebrew": "\u05B4", + "hiriqquarterhebrew": "\u05B4", + "hiriqwidehebrew": "\u05B4", + "hlinebelow": "\u1E96", + "hmonospace": "\uFF48", + "hoarmenian": "\u0570", + "hohipthai": "\u0E2B", + "hohiragana": "\u307B", + "hokatakana": "\u30DB", + "hokatakanahalfwidth": "\uFF8E", + "holam": "\u05B9", + "holam19": "\u05B9", + "holam26": "\u05B9", + "holam32": "\u05B9", + "holamhebrew": "\u05B9", + "holamnarrowhebrew": "\u05B9", + "holamquarterhebrew": "\u05B9", + "holamwidehebrew": "\u05B9", + "honokhukthai": "\u0E2E", + "hookabovecomb": "\u0309", + "hookcmb": "\u0309", + "hookpalatalizedbelowcmb": "\u0321", + "hookretroflexbelowcmb": "\u0322", + "hoonsquare": "\u3342", + "horicoptic": "\u03E9", + "horizontalbar": "\u2015", + "horncmb": "\u031B", + "hotsprings": "\u2668", + "house": "\u2302", + "hparen": "\u24A3", + "hsuperior": "\u02B0", + "hturned": "\u0265", + "huhiragana": "\u3075", + "huiitosquare": "\u3333", + "hukatakana": "\u30D5", + "hukatakanahalfwidth": "\uFF8C", + "hungarumlaut": "\u02DD", + "hungarumlautcmb": "\u030B", + "hv": "\u0195", + "hyphen": "\u002D", + "hypheninferior": "\uF6E5", + "hyphenmonospace": "\uFF0D", + "hyphensmall": "\uFE63", + "hyphensuperior": "\uF6E6", + "hyphentwo": "\u2010", + "i": "\u0069", + "iacute": "\u00ED", + "iacyrillic": "\u044F", + "ibengali": "\u0987", + "ibopomofo": "\u3127", + "ibreve": "\u012D", + "icaron": "\u01D0", + "icircle": "\u24D8", + "icircumflex": "\u00EE", + "icyrillic": "\u0456", + "idblgrave": "\u0209", + "ideographearthcircle": "\u328F", + "ideographfirecircle": "\u328B", + "ideographicallianceparen": "\u323F", + "ideographiccallparen": "\u323A", + "ideographiccentrecircle": "\u32A5", + "ideographicclose": "\u3006", + "ideographiccomma": "\u3001", + "ideographiccommaleft": "\uFF64", + "ideographiccongratulationparen": "\u3237", + "ideographiccorrectcircle": "\u32A3", + "ideographicearthparen": "\u322F", + "ideographicenterpriseparen": "\u323D", + "ideographicexcellentcircle": "\u329D", + "ideographicfestivalparen": "\u3240", + "ideographicfinancialcircle": "\u3296", + "ideographicfinancialparen": "\u3236", + "ideographicfireparen": "\u322B", + "ideographichaveparen": "\u3232", + "ideographichighcircle": "\u32A4", + "ideographiciterationmark": "\u3005", + "ideographiclaborcircle": "\u3298", + "ideographiclaborparen": "\u3238", + "ideographicleftcircle": "\u32A7", + "ideographiclowcircle": "\u32A6", + "ideographicmedicinecircle": "\u32A9", + "ideographicmetalparen": "\u322E", + "ideographicmoonparen": "\u322A", + "ideographicnameparen": "\u3234", + "ideographicperiod": "\u3002", + "ideographicprintcircle": "\u329E", + "ideographicreachparen": "\u3243", + "ideographicrepresentparen": "\u3239", + "ideographicresourceparen": "\u323E", + "ideographicrightcircle": "\u32A8", + "ideographicsecretcircle": "\u3299", + "ideographicselfparen": "\u3242", + "ideographicsocietyparen": "\u3233", + "ideographicspace": "\u3000", + "ideographicspecialparen": "\u3235", + "ideographicstockparen": "\u3231", + "ideographicstudyparen": "\u323B", + "ideographicsunparen": "\u3230", + "ideographicsuperviseparen": "\u323C", + "ideographicwaterparen": "\u322C", + "ideographicwoodparen": "\u322D", + "ideographiczero": "\u3007", + "ideographmetalcircle": "\u328E", + "ideographmooncircle": "\u328A", + "ideographnamecircle": "\u3294", + "ideographsuncircle": "\u3290", + "ideographwatercircle": "\u328C", + "ideographwoodcircle": "\u328D", + "ideva": "\u0907", + "idieresis": "\u00EF", + "idieresisacute": "\u1E2F", + "idieresiscyrillic": "\u04E5", + "idotbelow": "\u1ECB", + "iebrevecyrillic": "\u04D7", + "iecyrillic": "\u0435", + "ieungacirclekorean": "\u3275", + "ieungaparenkorean": "\u3215", + "ieungcirclekorean": "\u3267", + "ieungkorean": "\u3147", + "ieungparenkorean": "\u3207", + "igrave": "\u00EC", + "igujarati": "\u0A87", + "igurmukhi": "\u0A07", + "ihiragana": "\u3044", + "ihookabove": "\u1EC9", + "iibengali": "\u0988", + "iicyrillic": "\u0438", + "iideva": "\u0908", + "iigujarati": "\u0A88", + "iigurmukhi": "\u0A08", + "iimatragurmukhi": "\u0A40", + "iinvertedbreve": "\u020B", + "iishortcyrillic": "\u0439", + "iivowelsignbengali": "\u09C0", + "iivowelsigndeva": "\u0940", + "iivowelsigngujarati": "\u0AC0", + "ij": "\u0133", + "ikatakana": "\u30A4", + "ikatakanahalfwidth": "\uFF72", + "ikorean": "\u3163", + "ilde": "\u02DC", + "iluyhebrew": "\u05AC", + "imacron": "\u012B", + "imacroncyrillic": "\u04E3", + "imageorapproximatelyequal": "\u2253", + "imatragurmukhi": "\u0A3F", + "imonospace": "\uFF49", + "increment": "\u2206", + "infinity": "\u221E", + "iniarmenian": "\u056B", + "integral": "\u222B", + "integralbottom": "\u2321", + "integralbt": "\u2321", + "integralex": "\uF8F5", + "integraltop": "\u2320", + "integraltp": "\u2320", + "intersection": "\u2229", + "intisquare": "\u3305", + "invbullet": "\u25D8", + "invcircle": "\u25D9", + "invsmileface": "\u263B", + "iocyrillic": "\u0451", + "iogonek": "\u012F", + "iota": "\u03B9", + "iotadieresis": "\u03CA", + "iotadieresistonos": "\u0390", + "iotalatin": "\u0269", + "iotatonos": "\u03AF", + "iparen": "\u24A4", + "irigurmukhi": "\u0A72", + "ismallhiragana": "\u3043", + "ismallkatakana": "\u30A3", + "ismallkatakanahalfwidth": "\uFF68", + "issharbengali": "\u09FA", + "istroke": "\u0268", + "isuperior": "\uF6ED", + "iterationhiragana": "\u309D", + "iterationkatakana": "\u30FD", + "itilde": "\u0129", + "itildebelow": "\u1E2D", + "iubopomofo": "\u3129", + "iucyrillic": "\u044E", + "ivowelsignbengali": "\u09BF", + "ivowelsigndeva": "\u093F", + "ivowelsigngujarati": "\u0ABF", + "izhitsacyrillic": "\u0475", + "izhitsadblgravecyrillic": "\u0477", + "j": "\u006A", + "jaarmenian": "\u0571", + "jabengali": "\u099C", + "jadeva": "\u091C", + "jagujarati": "\u0A9C", + "jagurmukhi": "\u0A1C", + "jbopomofo": "\u3110", + "jcaron": "\u01F0", + "jcircle": "\u24D9", + "jcircumflex": "\u0135", + "jcrossedtail": "\u029D", + "jdotlessstroke": "\u025F", + "jecyrillic": "\u0458", + "jeemarabic": "\u062C", + "jeemfinalarabic": "\uFE9E", + "jeeminitialarabic": "\uFE9F", + "jeemmedialarabic": "\uFEA0", + "jeharabic": "\u0698", + "jehfinalarabic": "\uFB8B", + "jhabengali": "\u099D", + "jhadeva": "\u091D", + "jhagujarati": "\u0A9D", + "jhagurmukhi": "\u0A1D", + "jheharmenian": "\u057B", + "jis": "\u3004", + "jmonospace": "\uFF4A", + "jparen": "\u24A5", + "jsuperior": "\u02B2", + "k": "\u006B", + "kabashkircyrillic": "\u04A1", + "kabengali": "\u0995", + "kacute": "\u1E31", + "kacyrillic": "\u043A", + "kadescendercyrillic": "\u049B", + "kadeva": "\u0915", + "kaf": "\u05DB", + "kafarabic": "\u0643", + "kafdagesh": "\uFB3B", + "kafdageshhebrew": "\uFB3B", + "kaffinalarabic": "\uFEDA", + "kafhebrew": "\u05DB", + "kafinitialarabic": "\uFEDB", + "kafmedialarabic": "\uFEDC", + "kafrafehebrew": "\uFB4D", + "kagujarati": "\u0A95", + "kagurmukhi": "\u0A15", + "kahiragana": "\u304B", + "kahookcyrillic": "\u04C4", + "kakatakana": "\u30AB", + "kakatakanahalfwidth": "\uFF76", + "kappa": "\u03BA", + "kappasymbolgreek": "\u03F0", + "kapyeounmieumkorean": "\u3171", + "kapyeounphieuphkorean": "\u3184", + "kapyeounpieupkorean": "\u3178", + "kapyeounssangpieupkorean": "\u3179", + "karoriisquare": "\u330D", + "kashidaautoarabic": "\u0640", + "kashidaautonosidebearingarabic": "\u0640", + "kasmallkatakana": "\u30F5", + "kasquare": "\u3384", + "kasraarabic": "\u0650", + "kasratanarabic": "\u064D", + "kastrokecyrillic": "\u049F", + "katahiraprolongmarkhalfwidth": "\uFF70", + "kaverticalstrokecyrillic": "\u049D", + "kbopomofo": "\u310E", + "kcalsquare": "\u3389", + "kcaron": "\u01E9", + "kcedilla": "\u0137", + "kcircle": "\u24DA", + "kcommaaccent": "\u0137", + "kdotbelow": "\u1E33", + "keharmenian": "\u0584", + "kehiragana": "\u3051", + "kekatakana": "\u30B1", + "kekatakanahalfwidth": "\uFF79", + "kenarmenian": "\u056F", + "kesmallkatakana": "\u30F6", + "kgreenlandic": "\u0138", + "khabengali": "\u0996", + "khacyrillic": "\u0445", + "khadeva": "\u0916", + "khagujarati": "\u0A96", + "khagurmukhi": "\u0A16", + "khaharabic": "\u062E", + "khahfinalarabic": "\uFEA6", + "khahinitialarabic": "\uFEA7", + "khahmedialarabic": "\uFEA8", + "kheicoptic": "\u03E7", + "khhadeva": "\u0959", + "khhagurmukhi": "\u0A59", + "khieukhacirclekorean": "\u3278", + "khieukhaparenkorean": "\u3218", + "khieukhcirclekorean": "\u326A", + "khieukhkorean": "\u314B", + "khieukhparenkorean": "\u320A", + "khokhaithai": "\u0E02", + "khokhonthai": "\u0E05", + "khokhuatthai": "\u0E03", + "khokhwaithai": "\u0E04", + "khomutthai": "\u0E5B", + "khook": "\u0199", + "khorakhangthai": "\u0E06", + "khzsquare": "\u3391", + "kihiragana": "\u304D", + "kikatakana": "\u30AD", + "kikatakanahalfwidth": "\uFF77", + "kiroguramusquare": "\u3315", + "kiromeetorusquare": "\u3316", + "kirosquare": "\u3314", + "kiyeokacirclekorean": "\u326E", + "kiyeokaparenkorean": "\u320E", + "kiyeokcirclekorean": "\u3260", + "kiyeokkorean": "\u3131", + "kiyeokparenkorean": "\u3200", + "kiyeoksioskorean": "\u3133", + "kjecyrillic": "\u045C", + "klinebelow": "\u1E35", + "klsquare": "\u3398", + "kmcubedsquare": "\u33A6", + "kmonospace": "\uFF4B", + "kmsquaredsquare": "\u33A2", + "kohiragana": "\u3053", + "kohmsquare": "\u33C0", + "kokaithai": "\u0E01", + "kokatakana": "\u30B3", + "kokatakanahalfwidth": "\uFF7A", + "kooposquare": "\u331E", + "koppacyrillic": "\u0481", + "koreanstandardsymbol": "\u327F", + "koroniscmb": "\u0343", + "kparen": "\u24A6", + "kpasquare": "\u33AA", + "ksicyrillic": "\u046F", + "ktsquare": "\u33CF", + "kturned": "\u029E", + "kuhiragana": "\u304F", + "kukatakana": "\u30AF", + "kukatakanahalfwidth": "\uFF78", + "kvsquare": "\u33B8", + "kwsquare": "\u33BE", + "l": "\u006C", + "labengali": "\u09B2", + "lacute": "\u013A", + "ladeva": "\u0932", + "lagujarati": "\u0AB2", + "lagurmukhi": "\u0A32", + "lakkhangyaothai": "\u0E45", + "lamaleffinalarabic": "\uFEFC", + "lamalefhamzaabovefinalarabic": "\uFEF8", + "lamalefhamzaaboveisolatedarabic": "\uFEF7", + "lamalefhamzabelowfinalarabic": "\uFEFA", + "lamalefhamzabelowisolatedarabic": "\uFEF9", + "lamalefisolatedarabic": "\uFEFB", + "lamalefmaddaabovefinalarabic": "\uFEF6", + "lamalefmaddaaboveisolatedarabic": "\uFEF5", + "lamarabic": "\u0644", + "lambda": "\u03BB", + "lambdastroke": "\u019B", + "lamed": "\u05DC", + "lameddagesh": "\uFB3C", + "lameddageshhebrew": "\uFB3C", + "lamedhebrew": "\u05DC", + "lamedholam": "\u05DC\u05B9", + "lamedholamdagesh": "\u05DC\u05B9\u05BC", + "lamedholamdageshhebrew": "\u05DC\u05B9\u05BC", + "lamedholamhebrew": "\u05DC\u05B9", + "lamfinalarabic": "\uFEDE", + "lamhahinitialarabic": "\uFCCA", + "laminitialarabic": "\uFEDF", + "lamjeeminitialarabic": "\uFCC9", + "lamkhahinitialarabic": "\uFCCB", + "lamlamhehisolatedarabic": "\uFDF2", + "lammedialarabic": "\uFEE0", + "lammeemhahinitialarabic": "\uFD88", + "lammeeminitialarabic": "\uFCCC", + "lammeemjeeminitialarabic": "\uFEDF\uFEE4\uFEA0", + "lammeemkhahinitialarabic": "\uFEDF\uFEE4\uFEA8", + "largecircle": "\u25EF", + "lbar": "\u019A", + "lbelt": "\u026C", + "lbopomofo": "\u310C", + "lcaron": "\u013E", + "lcedilla": "\u013C", + "lcircle": "\u24DB", + "lcircumflexbelow": "\u1E3D", + "lcommaaccent": "\u013C", + "ldot": "\u0140", + "ldotaccent": "\u0140", + "ldotbelow": "\u1E37", + "ldotbelowmacron": "\u1E39", + "leftangleabovecmb": "\u031A", + "lefttackbelowcmb": "\u0318", + "less": "\u003C", + "lessequal": "\u2264", + "lessequalorgreater": "\u22DA", + "lessmonospace": "\uFF1C", + "lessorequivalent": "\u2272", + "lessorgreater": "\u2276", + "lessoverequal": "\u2266", + "lesssmall": "\uFE64", + "lezh": "\u026E", + "lfblock": "\u258C", + "lhookretroflex": "\u026D", + "lira": "\u20A4", + "liwnarmenian": "\u056C", + "lj": "\u01C9", + "ljecyrillic": "\u0459", + "ll": "\uF6C0", + "lladeva": "\u0933", + "llagujarati": "\u0AB3", + "llinebelow": "\u1E3B", + "llladeva": "\u0934", + "llvocalicbengali": "\u09E1", + "llvocalicdeva": "\u0961", + "llvocalicvowelsignbengali": "\u09E3", + "llvocalicvowelsigndeva": "\u0963", + "lmiddletilde": "\u026B", + "lmonospace": "\uFF4C", + "lmsquare": "\u33D0", + "lochulathai": "\u0E2C", + "logicaland": "\u2227", + "logicalnot": "\u00AC", + "logicalnotreversed": "\u2310", + "logicalor": "\u2228", + "lolingthai": "\u0E25", + "longs": "\u017F", + "lowlinecenterline": "\uFE4E", + "lowlinecmb": "\u0332", + "lowlinedashed": "\uFE4D", + "lozenge": "\u25CA", + "lparen": "\u24A7", + "lslash": "\u0142", + "lsquare": "\u2113", + "lsuperior": "\uF6EE", + "ltshade": "\u2591", + "luthai": "\u0E26", + "lvocalicbengali": "\u098C", + "lvocalicdeva": "\u090C", + "lvocalicvowelsignbengali": "\u09E2", + "lvocalicvowelsigndeva": "\u0962", + "lxsquare": "\u33D3", + "m": "\u006D", + "mabengali": "\u09AE", + "macron": "\u00AF", + "macronbelowcmb": "\u0331", + "macroncmb": "\u0304", + "macronlowmod": "\u02CD", + "macronmonospace": "\uFFE3", + "macute": "\u1E3F", + "madeva": "\u092E", + "magujarati": "\u0AAE", + "magurmukhi": "\u0A2E", + "mahapakhhebrew": "\u05A4", + "mahapakhlefthebrew": "\u05A4", + "mahiragana": "\u307E", + "maichattawalowleftthai": "\uF895", + "maichattawalowrightthai": "\uF894", + "maichattawathai": "\u0E4B", + "maichattawaupperleftthai": "\uF893", + "maieklowleftthai": "\uF88C", + "maieklowrightthai": "\uF88B", + "maiekthai": "\u0E48", + "maiekupperleftthai": "\uF88A", + "maihanakatleftthai": "\uF884", + "maihanakatthai": "\u0E31", + "maitaikhuleftthai": "\uF889", + "maitaikhuthai": "\u0E47", + "maitholowleftthai": "\uF88F", + "maitholowrightthai": "\uF88E", + "maithothai": "\u0E49", + "maithoupperleftthai": "\uF88D", + "maitrilowleftthai": "\uF892", + "maitrilowrightthai": "\uF891", + "maitrithai": "\u0E4A", + "maitriupperleftthai": "\uF890", + "maiyamokthai": "\u0E46", + "makatakana": "\u30DE", + "makatakanahalfwidth": "\uFF8F", + "male": "\u2642", + "mansyonsquare": "\u3347", + "maqafhebrew": "\u05BE", + "mars": "\u2642", + "masoracirclehebrew": "\u05AF", + "masquare": "\u3383", + "mbopomofo": "\u3107", + "mbsquare": "\u33D4", + "mcircle": "\u24DC", + "mcubedsquare": "\u33A5", + "mdotaccent": "\u1E41", + "mdotbelow": "\u1E43", + "meemarabic": "\u0645", + "meemfinalarabic": "\uFEE2", + "meeminitialarabic": "\uFEE3", + "meemmedialarabic": "\uFEE4", + "meemmeeminitialarabic": "\uFCD1", + "meemmeemisolatedarabic": "\uFC48", + "meetorusquare": "\u334D", + "mehiragana": "\u3081", + "meizierasquare": "\u337E", + "mekatakana": "\u30E1", + "mekatakanahalfwidth": "\uFF92", + "mem": "\u05DE", + "memdagesh": "\uFB3E", + "memdageshhebrew": "\uFB3E", + "memhebrew": "\u05DE", + "menarmenian": "\u0574", + "merkhahebrew": "\u05A5", + "merkhakefulahebrew": "\u05A6", + "merkhakefulalefthebrew": "\u05A6", + "merkhalefthebrew": "\u05A5", + "mhook": "\u0271", + "mhzsquare": "\u3392", + "middledotkatakanahalfwidth": "\uFF65", + "middot": "\u00B7", + "mieumacirclekorean": "\u3272", + "mieumaparenkorean": "\u3212", + "mieumcirclekorean": "\u3264", + "mieumkorean": "\u3141", + "mieumpansioskorean": "\u3170", + "mieumparenkorean": "\u3204", + "mieumpieupkorean": "\u316E", + "mieumsioskorean": "\u316F", + "mihiragana": "\u307F", + "mikatakana": "\u30DF", + "mikatakanahalfwidth": "\uFF90", + "minus": "\u2212", + "minusbelowcmb": "\u0320", + "minuscircle": "\u2296", + "minusmod": "\u02D7", + "minusplus": "\u2213", + "minute": "\u2032", + "miribaarusquare": "\u334A", + "mirisquare": "\u3349", + "mlonglegturned": "\u0270", + "mlsquare": "\u3396", + "mmcubedsquare": "\u33A3", + "mmonospace": "\uFF4D", + "mmsquaredsquare": "\u339F", + "mohiragana": "\u3082", + "mohmsquare": "\u33C1", + "mokatakana": "\u30E2", + "mokatakanahalfwidth": "\uFF93", + "molsquare": "\u33D6", + "momathai": "\u0E21", + "moverssquare": "\u33A7", + "moverssquaredsquare": "\u33A8", + "mparen": "\u24A8", + "mpasquare": "\u33AB", + "mssquare": "\u33B3", + "msuperior": "\uF6EF", + "mturned": "\u026F", + "mu": "\u00B5", + "mu1": "\u00B5", + "muasquare": "\u3382", + "muchgreater": "\u226B", + "muchless": "\u226A", + "mufsquare": "\u338C", + "mugreek": "\u03BC", + "mugsquare": "\u338D", + "muhiragana": "\u3080", + "mukatakana": "\u30E0", + "mukatakanahalfwidth": "\uFF91", + "mulsquare": "\u3395", + "multiply": "\u00D7", + "mumsquare": "\u339B", + "munahhebrew": "\u05A3", + "munahlefthebrew": "\u05A3", + "musicalnote": "\u266A", + "musicalnotedbl": "\u266B", + "musicflatsign": "\u266D", + "musicsharpsign": "\u266F", + "mussquare": "\u33B2", + "muvsquare": "\u33B6", + "muwsquare": "\u33BC", + "mvmegasquare": "\u33B9", + "mvsquare": "\u33B7", + "mwmegasquare": "\u33BF", + "mwsquare": "\u33BD", + "n": "\u006E", + "nabengali": "\u09A8", + "nabla": "\u2207", + "nacute": "\u0144", + "nadeva": "\u0928", + "nagujarati": "\u0AA8", + "nagurmukhi": "\u0A28", + "nahiragana": "\u306A", + "nakatakana": "\u30CA", + "nakatakanahalfwidth": "\uFF85", + "napostrophe": "\u0149", + "nasquare": "\u3381", + "nbopomofo": "\u310B", + "nbspace": "\u00A0", + "ncaron": "\u0148", + "ncedilla": "\u0146", + "ncircle": "\u24DD", + "ncircumflexbelow": "\u1E4B", + "ncommaaccent": "\u0146", + "ndotaccent": "\u1E45", + "ndotbelow": "\u1E47", + "nehiragana": "\u306D", + "nekatakana": "\u30CD", + "nekatakanahalfwidth": "\uFF88", + "newsheqelsign": "\u20AA", + "nfsquare": "\u338B", + "ngabengali": "\u0999", + "ngadeva": "\u0919", + "ngagujarati": "\u0A99", + "ngagurmukhi": "\u0A19", + "ngonguthai": "\u0E07", + "nhiragana": "\u3093", + "nhookleft": "\u0272", + "nhookretroflex": "\u0273", + "nieunacirclekorean": "\u326F", + "nieunaparenkorean": "\u320F", + "nieuncieuckorean": "\u3135", + "nieuncirclekorean": "\u3261", + "nieunhieuhkorean": "\u3136", + "nieunkorean": "\u3134", + "nieunpansioskorean": "\u3168", + "nieunparenkorean": "\u3201", + "nieunsioskorean": "\u3167", + "nieuntikeutkorean": "\u3166", + "nihiragana": "\u306B", + "nikatakana": "\u30CB", + "nikatakanahalfwidth": "\uFF86", + "nikhahitleftthai": "\uF899", + "nikhahitthai": "\u0E4D", + "nine": "\u0039", + "ninearabic": "\u0669", + "ninebengali": "\u09EF", + "ninecircle": "\u2468", + "ninecircleinversesansserif": "\u2792", + "ninedeva": "\u096F", + "ninegujarati": "\u0AEF", + "ninegurmukhi": "\u0A6F", + "ninehackarabic": "\u0669", + "ninehangzhou": "\u3029", + "nineideographicparen": "\u3228", + "nineinferior": "\u2089", + "ninemonospace": "\uFF19", + "nineoldstyle": "\uF739", + "nineparen": "\u247C", + "nineperiod": "\u2490", + "ninepersian": "\u06F9", + "nineroman": "\u2178", + "ninesuperior": "\u2079", + "nineteencircle": "\u2472", + "nineteenparen": "\u2486", + "nineteenperiod": "\u249A", + "ninethai": "\u0E59", + "nj": "\u01CC", + "njecyrillic": "\u045A", + "nkatakana": "\u30F3", + "nkatakanahalfwidth": "\uFF9D", + "nlegrightlong": "\u019E", + "nlinebelow": "\u1E49", + "nmonospace": "\uFF4E", + "nmsquare": "\u339A", + "nnabengali": "\u09A3", + "nnadeva": "\u0923", + "nnagujarati": "\u0AA3", + "nnagurmukhi": "\u0A23", + "nnnadeva": "\u0929", + "nohiragana": "\u306E", + "nokatakana": "\u30CE", + "nokatakanahalfwidth": "\uFF89", + "nonbreakingspace": "\u00A0", + "nonenthai": "\u0E13", + "nonuthai": "\u0E19", + "noonarabic": "\u0646", + "noonfinalarabic": "\uFEE6", + "noonghunnaarabic": "\u06BA", + "noonghunnafinalarabic": "\uFB9F", + "noonhehinitialarabic": "\uFEE7\uFEEC", + "nooninitialarabic": "\uFEE7", + "noonjeeminitialarabic": "\uFCD2", + "noonjeemisolatedarabic": "\uFC4B", + "noonmedialarabic": "\uFEE8", + "noonmeeminitialarabic": "\uFCD5", + "noonmeemisolatedarabic": "\uFC4E", + "noonnoonfinalarabic": "\uFC8D", + "notcontains": "\u220C", + "notelement": "\u2209", + "notelementof": "\u2209", + "notequal": "\u2260", + "notgreater": "\u226F", + "notgreaternorequal": "\u2271", + "notgreaternorless": "\u2279", + "notidentical": "\u2262", + "notless": "\u226E", + "notlessnorequal": "\u2270", + "notparallel": "\u2226", + "notprecedes": "\u2280", + "notsubset": "\u2284", + "notsucceeds": "\u2281", + "notsuperset": "\u2285", + "nowarmenian": "\u0576", + "nparen": "\u24A9", + "nssquare": "\u33B1", + "nsuperior": "\u207F", + "ntilde": "\u00F1", + "nu": "\u03BD", + "nuhiragana": "\u306C", + "nukatakana": "\u30CC", + "nukatakanahalfwidth": "\uFF87", + "nuktabengali": "\u09BC", + "nuktadeva": "\u093C", + "nuktagujarati": "\u0ABC", + "nuktagurmukhi": "\u0A3C", + "numbersign": "\u0023", + "numbersignmonospace": "\uFF03", + "numbersignsmall": "\uFE5F", + "numeralsigngreek": "\u0374", + "numeralsignlowergreek": "\u0375", + "numero": "\u2116", + "nun": "\u05E0", + "nundagesh": "\uFB40", + "nundageshhebrew": "\uFB40", + "nunhebrew": "\u05E0", + "nvsquare": "\u33B5", + "nwsquare": "\u33BB", + "nyabengali": "\u099E", + "nyadeva": "\u091E", + "nyagujarati": "\u0A9E", + "nyagurmukhi": "\u0A1E", + "o": "\u006F", + "oacute": "\u00F3", + "oangthai": "\u0E2D", + "obarred": "\u0275", + "obarredcyrillic": "\u04E9", + "obarreddieresiscyrillic": "\u04EB", + "obengali": "\u0993", + "obopomofo": "\u311B", + "obreve": "\u014F", + "ocandradeva": "\u0911", + "ocandragujarati": "\u0A91", + "ocandravowelsigndeva": "\u0949", + "ocandravowelsigngujarati": "\u0AC9", + "ocaron": "\u01D2", + "ocircle": "\u24DE", + "ocircumflex": "\u00F4", + "ocircumflexacute": "\u1ED1", + "ocircumflexdotbelow": "\u1ED9", + "ocircumflexgrave": "\u1ED3", + "ocircumflexhookabove": "\u1ED5", + "ocircumflextilde": "\u1ED7", + "ocyrillic": "\u043E", + "odblacute": "\u0151", + "odblgrave": "\u020D", + "odeva": "\u0913", + "odieresis": "\u00F6", + "odieresiscyrillic": "\u04E7", + "odotbelow": "\u1ECD", + "oe": "\u0153", + "oekorean": "\u315A", + "ogonek": "\u02DB", + "ogonekcmb": "\u0328", + "ograve": "\u00F2", + "ogujarati": "\u0A93", + "oharmenian": "\u0585", + "ohiragana": "\u304A", + "ohookabove": "\u1ECF", + "ohorn": "\u01A1", + "ohornacute": "\u1EDB", + "ohorndotbelow": "\u1EE3", + "ohorngrave": "\u1EDD", + "ohornhookabove": "\u1EDF", + "ohorntilde": "\u1EE1", + "ohungarumlaut": "\u0151", + "oi": "\u01A3", + "oinvertedbreve": "\u020F", + "okatakana": "\u30AA", + "okatakanahalfwidth": "\uFF75", + "okorean": "\u3157", + "olehebrew": "\u05AB", + "omacron": "\u014D", + "omacronacute": "\u1E53", + "omacrongrave": "\u1E51", + "omdeva": "\u0950", + "omega": "\u03C9", + "omega1": "\u03D6", + "omegacyrillic": "\u0461", + "omegalatinclosed": "\u0277", + "omegaroundcyrillic": "\u047B", + "omegatitlocyrillic": "\u047D", + "omegatonos": "\u03CE", + "omgujarati": "\u0AD0", + "omicron": "\u03BF", + "omicrontonos": "\u03CC", + "omonospace": "\uFF4F", + "one": "\u0031", + "onearabic": "\u0661", + "onebengali": "\u09E7", + "onecircle": "\u2460", + "onecircleinversesansserif": "\u278A", + "onedeva": "\u0967", + "onedotenleader": "\u2024", + "oneeighth": "\u215B", + "onefitted": "\uF6DC", + "onegujarati": "\u0AE7", + "onegurmukhi": "\u0A67", + "onehackarabic": "\u0661", + "onehalf": "\u00BD", + "onehangzhou": "\u3021", + "oneideographicparen": "\u3220", + "oneinferior": "\u2081", + "onemonospace": "\uFF11", + "onenumeratorbengali": "\u09F4", + "oneoldstyle": "\uF731", + "oneparen": "\u2474", + "oneperiod": "\u2488", + "onepersian": "\u06F1", + "onequarter": "\u00BC", + "oneroman": "\u2170", + "onesuperior": "\u00B9", + "onethai": "\u0E51", + "onethird": "\u2153", + "oogonek": "\u01EB", + "oogonekmacron": "\u01ED", + "oogurmukhi": "\u0A13", + "oomatragurmukhi": "\u0A4B", + "oopen": "\u0254", + "oparen": "\u24AA", + "openbullet": "\u25E6", + "option": "\u2325", + "ordfeminine": "\u00AA", + "ordmasculine": "\u00BA", + "orthogonal": "\u221F", + "oshortdeva": "\u0912", + "oshortvowelsigndeva": "\u094A", + "oslash": "\u00F8", + "oslashacute": "\u01FF", + "osmallhiragana": "\u3049", + "osmallkatakana": "\u30A9", + "osmallkatakanahalfwidth": "\uFF6B", + "ostrokeacute": "\u01FF", + "osuperior": "\uF6F0", + "otcyrillic": "\u047F", + "otilde": "\u00F5", + "otildeacute": "\u1E4D", + "otildedieresis": "\u1E4F", + "oubopomofo": "\u3121", + "overline": "\u203E", + "overlinecenterline": "\uFE4A", + "overlinecmb": "\u0305", + "overlinedashed": "\uFE49", + "overlinedblwavy": "\uFE4C", + "overlinewavy": "\uFE4B", + "overscore": "\u00AF", + "ovowelsignbengali": "\u09CB", + "ovowelsigndeva": "\u094B", + "ovowelsigngujarati": "\u0ACB", + "p": "\u0070", + "paampssquare": "\u3380", + "paasentosquare": "\u332B", + "pabengali": "\u09AA", + "pacute": "\u1E55", + "padeva": "\u092A", + "pagedown": "\u21DF", + "pageup": "\u21DE", + "pagujarati": "\u0AAA", + "pagurmukhi": "\u0A2A", + "pahiragana": "\u3071", + "paiyannoithai": "\u0E2F", + "pakatakana": "\u30D1", + "palatalizationcyrilliccmb": "\u0484", + "palochkacyrillic": "\u04C0", + "pansioskorean": "\u317F", + "paragraph": "\u00B6", + "parallel": "\u2225", + "parenleft": "\u0028", + "parenleftaltonearabic": "\uFD3E", + "parenleftbt": "\uF8ED", + "parenleftex": "\uF8EC", + "parenleftinferior": "\u208D", + "parenleftmonospace": "\uFF08", + "parenleftsmall": "\uFE59", + "parenleftsuperior": "\u207D", + "parenlefttp": "\uF8EB", + "parenleftvertical": "\uFE35", + "parenright": "\u0029", + "parenrightaltonearabic": "\uFD3F", + "parenrightbt": "\uF8F8", + "parenrightex": "\uF8F7", + "parenrightinferior": "\u208E", + "parenrightmonospace": "\uFF09", + "parenrightsmall": "\uFE5A", + "parenrightsuperior": "\u207E", + "parenrighttp": "\uF8F6", + "parenrightvertical": "\uFE36", + "partialdiff": "\u2202", + "paseqhebrew": "\u05C0", + "pashtahebrew": "\u0599", + "pasquare": "\u33A9", + "patah": "\u05B7", + "patah11": "\u05B7", + "patah1d": "\u05B7", + "patah2a": "\u05B7", + "patahhebrew": "\u05B7", + "patahnarrowhebrew": "\u05B7", + "patahquarterhebrew": "\u05B7", + "patahwidehebrew": "\u05B7", + "pazerhebrew": "\u05A1", + "pbopomofo": "\u3106", + "pcircle": "\u24DF", + "pdotaccent": "\u1E57", + "pe": "\u05E4", + "pecyrillic": "\u043F", + "pedagesh": "\uFB44", + "pedageshhebrew": "\uFB44", + "peezisquare": "\u333B", + "pefinaldageshhebrew": "\uFB43", + "peharabic": "\u067E", + "peharmenian": "\u057A", + "pehebrew": "\u05E4", + "pehfinalarabic": "\uFB57", + "pehinitialarabic": "\uFB58", + "pehiragana": "\u307A", + "pehmedialarabic": "\uFB59", + "pekatakana": "\u30DA", + "pemiddlehookcyrillic": "\u04A7", + "perafehebrew": "\uFB4E", + "percent": "\u0025", + "percentarabic": "\u066A", + "percentmonospace": "\uFF05", + "percentsmall": "\uFE6A", + "period": "\u002E", + "periodarmenian": "\u0589", + "periodcentered": "\u00B7", + "periodhalfwidth": "\uFF61", + "periodinferior": "\uF6E7", + "periodmonospace": "\uFF0E", + "periodsmall": "\uFE52", + "periodsuperior": "\uF6E8", + "perispomenigreekcmb": "\u0342", + "perpendicular": "\u22A5", + "perthousand": "\u2030", + "peseta": "\u20A7", + "pfsquare": "\u338A", + "phabengali": "\u09AB", + "phadeva": "\u092B", + "phagujarati": "\u0AAB", + "phagurmukhi": "\u0A2B", + "phi": "\u03C6", + "phi1": "\u03D5", + "phieuphacirclekorean": "\u327A", + "phieuphaparenkorean": "\u321A", + "phieuphcirclekorean": "\u326C", + "phieuphkorean": "\u314D", + "phieuphparenkorean": "\u320C", + "philatin": "\u0278", + "phinthuthai": "\u0E3A", + "phisymbolgreek": "\u03D5", + "phook": "\u01A5", + "phophanthai": "\u0E1E", + "phophungthai": "\u0E1C", + "phosamphaothai": "\u0E20", + "pi": "\u03C0", + "pieupacirclekorean": "\u3273", + "pieupaparenkorean": "\u3213", + "pieupcieuckorean": "\u3176", + "pieupcirclekorean": "\u3265", + "pieupkiyeokkorean": "\u3172", + "pieupkorean": "\u3142", + "pieupparenkorean": "\u3205", + "pieupsioskiyeokkorean": "\u3174", + "pieupsioskorean": "\u3144", + "pieupsiostikeutkorean": "\u3175", + "pieupthieuthkorean": "\u3177", + "pieuptikeutkorean": "\u3173", + "pihiragana": "\u3074", + "pikatakana": "\u30D4", + "pisymbolgreek": "\u03D6", + "piwrarmenian": "\u0583", + "plus": "\u002B", + "plusbelowcmb": "\u031F", + "pluscircle": "\u2295", + "plusminus": "\u00B1", + "plusmod": "\u02D6", + "plusmonospace": "\uFF0B", + "plussmall": "\uFE62", + "plussuperior": "\u207A", + "pmonospace": "\uFF50", + "pmsquare": "\u33D8", + "pohiragana": "\u307D", + "pointingindexdownwhite": "\u261F", + "pointingindexleftwhite": "\u261C", + "pointingindexrightwhite": "\u261E", + "pointingindexupwhite": "\u261D", + "pokatakana": "\u30DD", + "poplathai": "\u0E1B", + "postalmark": "\u3012", + "postalmarkface": "\u3020", + "pparen": "\u24AB", + "precedes": "\u227A", + "prescription": "\u211E", + "primemod": "\u02B9", + "primereversed": "\u2035", + "product": "\u220F", + "projective": "\u2305", + "prolongedkana": "\u30FC", + "propellor": "\u2318", + "propersubset": "\u2282", + "propersuperset": "\u2283", + "proportion": "\u2237", + "proportional": "\u221D", + "psi": "\u03C8", + "psicyrillic": "\u0471", + "psilipneumatacyrilliccmb": "\u0486", + "pssquare": "\u33B0", + "puhiragana": "\u3077", + "pukatakana": "\u30D7", + "pvsquare": "\u33B4", + "pwsquare": "\u33BA", + "q": "\u0071", + "qadeva": "\u0958", + "qadmahebrew": "\u05A8", + "qafarabic": "\u0642", + "qaffinalarabic": "\uFED6", + "qafinitialarabic": "\uFED7", + "qafmedialarabic": "\uFED8", + "qamats": "\u05B8", + "qamats10": "\u05B8", + "qamats1a": "\u05B8", + "qamats1c": "\u05B8", + "qamats27": "\u05B8", + "qamats29": "\u05B8", + "qamats33": "\u05B8", + "qamatsde": "\u05B8", + "qamatshebrew": "\u05B8", + "qamatsnarrowhebrew": "\u05B8", + "qamatsqatanhebrew": "\u05B8", + "qamatsqatannarrowhebrew": "\u05B8", + "qamatsqatanquarterhebrew": "\u05B8", + "qamatsqatanwidehebrew": "\u05B8", + "qamatsquarterhebrew": "\u05B8", + "qamatswidehebrew": "\u05B8", + "qarneyparahebrew": "\u059F", + "qbopomofo": "\u3111", + "qcircle": "\u24E0", + "qhook": "\u02A0", + "qmonospace": "\uFF51", + "qof": "\u05E7", + "qofdagesh": "\uFB47", + "qofdageshhebrew": "\uFB47", + "qofhatafpatah": "\u05E7\u05B2", + "qofhatafpatahhebrew": "\u05E7\u05B2", + "qofhatafsegol": "\u05E7\u05B1", + "qofhatafsegolhebrew": "\u05E7\u05B1", + "qofhebrew": "\u05E7", + "qofhiriq": "\u05E7\u05B4", + "qofhiriqhebrew": "\u05E7\u05B4", + "qofholam": "\u05E7\u05B9", + "qofholamhebrew": "\u05E7\u05B9", + "qofpatah": "\u05E7\u05B7", + "qofpatahhebrew": "\u05E7\u05B7", + "qofqamats": "\u05E7\u05B8", + "qofqamatshebrew": "\u05E7\u05B8", + "qofqubuts": "\u05E7\u05BB", + "qofqubutshebrew": "\u05E7\u05BB", + "qofsegol": "\u05E7\u05B6", + "qofsegolhebrew": "\u05E7\u05B6", + "qofsheva": "\u05E7\u05B0", + "qofshevahebrew": "\u05E7\u05B0", + "qoftsere": "\u05E7\u05B5", + "qoftserehebrew": "\u05E7\u05B5", + "qparen": "\u24AC", + "quarternote": "\u2669", + "qubuts": "\u05BB", + "qubuts18": "\u05BB", + "qubuts25": "\u05BB", + "qubuts31": "\u05BB", + "qubutshebrew": "\u05BB", + "qubutsnarrowhebrew": "\u05BB", + "qubutsquarterhebrew": "\u05BB", + "qubutswidehebrew": "\u05BB", + "question": "\u003F", + "questionarabic": "\u061F", + "questionarmenian": "\u055E", + "questiondown": "\u00BF", + "questiondownsmall": "\uF7BF", + "questiongreek": "\u037E", + "questionmonospace": "\uFF1F", + "questionsmall": "\uF73F", + "quotedbl": "\u0022", + "quotedblbase": "\u201E", + "quotedblleft": "\u201C", + "quotedblmonospace": "\uFF02", + "quotedblprime": "\u301E", + "quotedblprimereversed": "\u301D", + "quotedblright": "\u201D", + "quoteleft": "\u2018", + "quoteleftreversed": "\u201B", + "quotereversed": "\u201B", + "quoteright": "\u2019", + "quoterightn": "\u0149", + "quotesinglbase": "\u201A", + "quotesingle": "\u0027", + "quotesinglemonospace": "\uFF07", + "r": "\u0072", + "raarmenian": "\u057C", + "rabengali": "\u09B0", + "racute": "\u0155", + "radeva": "\u0930", + "radical": "\u221A", + "radicalex": "\uF8E5", + "radoverssquare": "\u33AE", + "radoverssquaredsquare": "\u33AF", + "radsquare": "\u33AD", + "rafe": "\u05BF", + "rafehebrew": "\u05BF", + "ragujarati": "\u0AB0", + "ragurmukhi": "\u0A30", + "rahiragana": "\u3089", + "rakatakana": "\u30E9", + "rakatakanahalfwidth": "\uFF97", + "ralowerdiagonalbengali": "\u09F1", + "ramiddlediagonalbengali": "\u09F0", + "ramshorn": "\u0264", + "ratio": "\u2236", + "rbopomofo": "\u3116", + "rcaron": "\u0159", + "rcedilla": "\u0157", + "rcircle": "\u24E1", + "rcommaaccent": "\u0157", + "rdblgrave": "\u0211", + "rdotaccent": "\u1E59", + "rdotbelow": "\u1E5B", + "rdotbelowmacron": "\u1E5D", + "referencemark": "\u203B", + "reflexsubset": "\u2286", + "reflexsuperset": "\u2287", + "registered": "\u00AE", + "registersans": "\uF8E8", + "registerserif": "\uF6DA", + "reharabic": "\u0631", + "reharmenian": "\u0580", + "rehfinalarabic": "\uFEAE", + "rehiragana": "\u308C", + "rehyehaleflamarabic": "\u0631\uFEF3\uFE8E\u0644", + "rekatakana": "\u30EC", + "rekatakanahalfwidth": "\uFF9A", + "resh": "\u05E8", + "reshdageshhebrew": "\uFB48", + "reshhatafpatah": "\u05E8\u05B2", + "reshhatafpatahhebrew": "\u05E8\u05B2", + "reshhatafsegol": "\u05E8\u05B1", + "reshhatafsegolhebrew": "\u05E8\u05B1", + "reshhebrew": "\u05E8", + "reshhiriq": "\u05E8\u05B4", + "reshhiriqhebrew": "\u05E8\u05B4", + "reshholam": "\u05E8\u05B9", + "reshholamhebrew": "\u05E8\u05B9", + "reshpatah": "\u05E8\u05B7", + "reshpatahhebrew": "\u05E8\u05B7", + "reshqamats": "\u05E8\u05B8", + "reshqamatshebrew": "\u05E8\u05B8", + "reshqubuts": "\u05E8\u05BB", + "reshqubutshebrew": "\u05E8\u05BB", + "reshsegol": "\u05E8\u05B6", + "reshsegolhebrew": "\u05E8\u05B6", + "reshsheva": "\u05E8\u05B0", + "reshshevahebrew": "\u05E8\u05B0", + "reshtsere": "\u05E8\u05B5", + "reshtserehebrew": "\u05E8\u05B5", + "reversedtilde": "\u223D", + "reviahebrew": "\u0597", + "reviamugrashhebrew": "\u0597", + "revlogicalnot": "\u2310", + "rfishhook": "\u027E", + "rfishhookreversed": "\u027F", + "rhabengali": "\u09DD", + "rhadeva": "\u095D", + "rho": "\u03C1", + "rhook": "\u027D", + "rhookturned": "\u027B", + "rhookturnedsuperior": "\u02B5", + "rhosymbolgreek": "\u03F1", + "rhotichookmod": "\u02DE", + "rieulacirclekorean": "\u3271", + "rieulaparenkorean": "\u3211", + "rieulcirclekorean": "\u3263", + "rieulhieuhkorean": "\u3140", + "rieulkiyeokkorean": "\u313A", + "rieulkiyeoksioskorean": "\u3169", + "rieulkorean": "\u3139", + "rieulmieumkorean": "\u313B", + "rieulpansioskorean": "\u316C", + "rieulparenkorean": "\u3203", + "rieulphieuphkorean": "\u313F", + "rieulpieupkorean": "\u313C", + "rieulpieupsioskorean": "\u316B", + "rieulsioskorean": "\u313D", + "rieulthieuthkorean": "\u313E", + "rieultikeutkorean": "\u316A", + "rieulyeorinhieuhkorean": "\u316D", + "rightangle": "\u221F", + "righttackbelowcmb": "\u0319", + "righttriangle": "\u22BF", + "rihiragana": "\u308A", + "rikatakana": "\u30EA", + "rikatakanahalfwidth": "\uFF98", + "ring": "\u02DA", + "ringbelowcmb": "\u0325", + "ringcmb": "\u030A", + "ringhalfleft": "\u02BF", + "ringhalfleftarmenian": "\u0559", + "ringhalfleftbelowcmb": "\u031C", + "ringhalfleftcentered": "\u02D3", + "ringhalfright": "\u02BE", + "ringhalfrightbelowcmb": "\u0339", + "ringhalfrightcentered": "\u02D2", + "rinvertedbreve": "\u0213", + "rittorusquare": "\u3351", + "rlinebelow": "\u1E5F", + "rlongleg": "\u027C", + "rlonglegturned": "\u027A", + "rmonospace": "\uFF52", + "rohiragana": "\u308D", + "rokatakana": "\u30ED", + "rokatakanahalfwidth": "\uFF9B", + "roruathai": "\u0E23", + "rparen": "\u24AD", + "rrabengali": "\u09DC", + "rradeva": "\u0931", + "rragurmukhi": "\u0A5C", + "rreharabic": "\u0691", + "rrehfinalarabic": "\uFB8D", + "rrvocalicbengali": "\u09E0", + "rrvocalicdeva": "\u0960", + "rrvocalicgujarati": "\u0AE0", + "rrvocalicvowelsignbengali": "\u09C4", + "rrvocalicvowelsigndeva": "\u0944", + "rrvocalicvowelsigngujarati": "\u0AC4", + "rsuperior": "\uF6F1", + "rtblock": "\u2590", + "rturned": "\u0279", + "rturnedsuperior": "\u02B4", + "ruhiragana": "\u308B", + "rukatakana": "\u30EB", + "rukatakanahalfwidth": "\uFF99", + "rupeemarkbengali": "\u09F2", + "rupeesignbengali": "\u09F3", + "rupiah": "\uF6DD", + "ruthai": "\u0E24", + "rvocalicbengali": "\u098B", + "rvocalicdeva": "\u090B", + "rvocalicgujarati": "\u0A8B", + "rvocalicvowelsignbengali": "\u09C3", + "rvocalicvowelsigndeva": "\u0943", + "rvocalicvowelsigngujarati": "\u0AC3", + "s": "\u0073", + "sabengali": "\u09B8", + "sacute": "\u015B", + "sacutedotaccent": "\u1E65", + "sadarabic": "\u0635", + "sadeva": "\u0938", + "sadfinalarabic": "\uFEBA", + "sadinitialarabic": "\uFEBB", + "sadmedialarabic": "\uFEBC", + "sagujarati": "\u0AB8", + "sagurmukhi": "\u0A38", + "sahiragana": "\u3055", + "sakatakana": "\u30B5", + "sakatakanahalfwidth": "\uFF7B", + "sallallahoualayhewasallamarabic": "\uFDFA", + "samekh": "\u05E1", + "samekhdagesh": "\uFB41", + "samekhdageshhebrew": "\uFB41", + "samekhhebrew": "\u05E1", + "saraaathai": "\u0E32", + "saraaethai": "\u0E41", + "saraaimaimalaithai": "\u0E44", + "saraaimaimuanthai": "\u0E43", + "saraamthai": "\u0E33", + "saraathai": "\u0E30", + "saraethai": "\u0E40", + "saraiileftthai": "\uF886", + "saraiithai": "\u0E35", + "saraileftthai": "\uF885", + "saraithai": "\u0E34", + "saraothai": "\u0E42", + "saraueeleftthai": "\uF888", + "saraueethai": "\u0E37", + "saraueleftthai": "\uF887", + "sarauethai": "\u0E36", + "sarauthai": "\u0E38", + "sarauuthai": "\u0E39", + "sbopomofo": "\u3119", + "scaron": "\u0161", + "scarondotaccent": "\u1E67", + "scedilla": "\u015F", + "schwa": "\u0259", + "schwacyrillic": "\u04D9", + "schwadieresiscyrillic": "\u04DB", + "schwahook": "\u025A", + "scircle": "\u24E2", + "scircumflex": "\u015D", + "scommaaccent": "\u0219", + "sdotaccent": "\u1E61", + "sdotbelow": "\u1E63", + "sdotbelowdotaccent": "\u1E69", + "seagullbelowcmb": "\u033C", + "second": "\u2033", + "secondtonechinese": "\u02CA", + "section": "\u00A7", + "seenarabic": "\u0633", + "seenfinalarabic": "\uFEB2", + "seeninitialarabic": "\uFEB3", + "seenmedialarabic": "\uFEB4", + "segol": "\u05B6", + "segol13": "\u05B6", + "segol1f": "\u05B6", + "segol2c": "\u05B6", + "segolhebrew": "\u05B6", + "segolnarrowhebrew": "\u05B6", + "segolquarterhebrew": "\u05B6", + "segoltahebrew": "\u0592", + "segolwidehebrew": "\u05B6", + "seharmenian": "\u057D", + "sehiragana": "\u305B", + "sekatakana": "\u30BB", + "sekatakanahalfwidth": "\uFF7E", + "semicolon": "\u003B", + "semicolonarabic": "\u061B", + "semicolonmonospace": "\uFF1B", + "semicolonsmall": "\uFE54", + "semivoicedmarkkana": "\u309C", + "semivoicedmarkkanahalfwidth": "\uFF9F", + "sentisquare": "\u3322", + "sentosquare": "\u3323", + "seven": "\u0037", + "sevenarabic": "\u0667", + "sevenbengali": "\u09ED", + "sevencircle": "\u2466", + "sevencircleinversesansserif": "\u2790", + "sevendeva": "\u096D", + "seveneighths": "\u215E", + "sevengujarati": "\u0AED", + "sevengurmukhi": "\u0A6D", + "sevenhackarabic": "\u0667", + "sevenhangzhou": "\u3027", + "sevenideographicparen": "\u3226", + "seveninferior": "\u2087", + "sevenmonospace": "\uFF17", + "sevenoldstyle": "\uF737", + "sevenparen": "\u247A", + "sevenperiod": "\u248E", + "sevenpersian": "\u06F7", + "sevenroman": "\u2176", + "sevensuperior": "\u2077", + "seventeencircle": "\u2470", + "seventeenparen": "\u2484", + "seventeenperiod": "\u2498", + "seventhai": "\u0E57", + "sfthyphen": "\u00AD", + "shaarmenian": "\u0577", + "shabengali": "\u09B6", + "shacyrillic": "\u0448", + "shaddaarabic": "\u0651", + "shaddadammaarabic": "\uFC61", + "shaddadammatanarabic": "\uFC5E", + "shaddafathaarabic": "\uFC60", + "shaddafathatanarabic": "\u0651\u064B", + "shaddakasraarabic": "\uFC62", + "shaddakasratanarabic": "\uFC5F", + "shade": "\u2592", + "shadedark": "\u2593", + "shadelight": "\u2591", + "shademedium": "\u2592", + "shadeva": "\u0936", + "shagujarati": "\u0AB6", + "shagurmukhi": "\u0A36", + "shalshelethebrew": "\u0593", + "shbopomofo": "\u3115", + "shchacyrillic": "\u0449", + "sheenarabic": "\u0634", + "sheenfinalarabic": "\uFEB6", + "sheeninitialarabic": "\uFEB7", + "sheenmedialarabic": "\uFEB8", + "sheicoptic": "\u03E3", + "sheqel": "\u20AA", + "sheqelhebrew": "\u20AA", + "sheva": "\u05B0", + "sheva115": "\u05B0", + "sheva15": "\u05B0", + "sheva22": "\u05B0", + "sheva2e": "\u05B0", + "shevahebrew": "\u05B0", + "shevanarrowhebrew": "\u05B0", + "shevaquarterhebrew": "\u05B0", + "shevawidehebrew": "\u05B0", + "shhacyrillic": "\u04BB", + "shimacoptic": "\u03ED", + "shin": "\u05E9", + "shindagesh": "\uFB49", + "shindageshhebrew": "\uFB49", + "shindageshshindot": "\uFB2C", + "shindageshshindothebrew": "\uFB2C", + "shindageshsindot": "\uFB2D", + "shindageshsindothebrew": "\uFB2D", + "shindothebrew": "\u05C1", + "shinhebrew": "\u05E9", + "shinshindot": "\uFB2A", + "shinshindothebrew": "\uFB2A", + "shinsindot": "\uFB2B", + "shinsindothebrew": "\uFB2B", + "shook": "\u0282", + "sigma": "\u03C3", + "sigma1": "\u03C2", + "sigmafinal": "\u03C2", + "sigmalunatesymbolgreek": "\u03F2", + "sihiragana": "\u3057", + "sikatakana": "\u30B7", + "sikatakanahalfwidth": "\uFF7C", + "siluqhebrew": "\u05BD", + "siluqlefthebrew": "\u05BD", + "similar": "\u223C", + "sindothebrew": "\u05C2", + "siosacirclekorean": "\u3274", + "siosaparenkorean": "\u3214", + "sioscieuckorean": "\u317E", + "sioscirclekorean": "\u3266", + "sioskiyeokkorean": "\u317A", + "sioskorean": "\u3145", + "siosnieunkorean": "\u317B", + "siosparenkorean": "\u3206", + "siospieupkorean": "\u317D", + "siostikeutkorean": "\u317C", + "six": "\u0036", + "sixarabic": "\u0666", + "sixbengali": "\u09EC", + "sixcircle": "\u2465", + "sixcircleinversesansserif": "\u278F", + "sixdeva": "\u096C", + "sixgujarati": "\u0AEC", + "sixgurmukhi": "\u0A6C", + "sixhackarabic": "\u0666", + "sixhangzhou": "\u3026", + "sixideographicparen": "\u3225", + "sixinferior": "\u2086", + "sixmonospace": "\uFF16", + "sixoldstyle": "\uF736", + "sixparen": "\u2479", + "sixperiod": "\u248D", + "sixpersian": "\u06F6", + "sixroman": "\u2175", + "sixsuperior": "\u2076", + "sixteencircle": "\u246F", + "sixteencurrencydenominatorbengali": "\u09F9", + "sixteenparen": "\u2483", + "sixteenperiod": "\u2497", + "sixthai": "\u0E56", + "slash": "\u002F", + "slashmonospace": "\uFF0F", + "slong": "\u017F", + "slongdotaccent": "\u1E9B", + "smileface": "\u263A", + "smonospace": "\uFF53", + "sofpasuqhebrew": "\u05C3", + "softhyphen": "\u00AD", + "softsigncyrillic": "\u044C", + "sohiragana": "\u305D", + "sokatakana": "\u30BD", + "sokatakanahalfwidth": "\uFF7F", + "soliduslongoverlaycmb": "\u0338", + "solidusshortoverlaycmb": "\u0337", + "sorusithai": "\u0E29", + "sosalathai": "\u0E28", + "sosothai": "\u0E0B", + "sosuathai": "\u0E2A", + "space": "\u0020", + "spacehackarabic": "\u0020", + "spade": "\u2660", + "spadesuitblack": "\u2660", + "spadesuitwhite": "\u2664", + "sparen": "\u24AE", + "squarebelowcmb": "\u033B", + "squarecc": "\u33C4", + "squarecm": "\u339D", + "squarediagonalcrosshatchfill": "\u25A9", + "squarehorizontalfill": "\u25A4", + "squarekg": "\u338F", + "squarekm": "\u339E", + "squarekmcapital": "\u33CE", + "squareln": "\u33D1", + "squarelog": "\u33D2", + "squaremg": "\u338E", + "squaremil": "\u33D5", + "squaremm": "\u339C", + "squaremsquared": "\u33A1", + "squareorthogonalcrosshatchfill": "\u25A6", + "squareupperlefttolowerrightfill": "\u25A7", + "squareupperrighttolowerleftfill": "\u25A8", + "squareverticalfill": "\u25A5", + "squarewhitewithsmallblack": "\u25A3", + "srsquare": "\u33DB", + "ssabengali": "\u09B7", + "ssadeva": "\u0937", + "ssagujarati": "\u0AB7", + "ssangcieuckorean": "\u3149", + "ssanghieuhkorean": "\u3185", + "ssangieungkorean": "\u3180", + "ssangkiyeokkorean": "\u3132", + "ssangnieunkorean": "\u3165", + "ssangpieupkorean": "\u3143", + "ssangsioskorean": "\u3146", + "ssangtikeutkorean": "\u3138", + "ssuperior": "\uF6F2", + "sterling": "\u00A3", + "sterlingmonospace": "\uFFE1", + "strokelongoverlaycmb": "\u0336", + "strokeshortoverlaycmb": "\u0335", + "subset": "\u2282", + "subsetnotequal": "\u228A", + "subsetorequal": "\u2286", + "succeeds": "\u227B", + "suchthat": "\u220B", + "suhiragana": "\u3059", + "sukatakana": "\u30B9", + "sukatakanahalfwidth": "\uFF7D", + "sukunarabic": "\u0652", + "summation": "\u2211", + "sun": "\u263C", + "superset": "\u2283", + "supersetnotequal": "\u228B", + "supersetorequal": "\u2287", + "svsquare": "\u33DC", + "syouwaerasquare": "\u337C", + "t": "\u0074", + "tabengali": "\u09A4", + "tackdown": "\u22A4", + "tackleft": "\u22A3", + "tadeva": "\u0924", + "tagujarati": "\u0AA4", + "tagurmukhi": "\u0A24", + "taharabic": "\u0637", + "tahfinalarabic": "\uFEC2", + "tahinitialarabic": "\uFEC3", + "tahiragana": "\u305F", + "tahmedialarabic": "\uFEC4", + "taisyouerasquare": "\u337D", + "takatakana": "\u30BF", + "takatakanahalfwidth": "\uFF80", + "tatweelarabic": "\u0640", + "tau": "\u03C4", + "tav": "\u05EA", + "tavdages": "\uFB4A", + "tavdagesh": "\uFB4A", + "tavdageshhebrew": "\uFB4A", + "tavhebrew": "\u05EA", + "tbar": "\u0167", + "tbopomofo": "\u310A", + "tcaron": "\u0165", + "tccurl": "\u02A8", + "tcedilla": "\u0163", + "tcheharabic": "\u0686", + "tchehfinalarabic": "\uFB7B", + "tchehinitialarabic": "\uFB7C", + "tchehmedialarabic": "\uFB7D", + "tchehmeeminitialarabic": "\uFB7C\uFEE4", + "tcircle": "\u24E3", + "tcircumflexbelow": "\u1E71", + "tcommaaccent": "\u0163", + "tdieresis": "\u1E97", + "tdotaccent": "\u1E6B", + "tdotbelow": "\u1E6D", + "tecyrillic": "\u0442", + "tedescendercyrillic": "\u04AD", + "teharabic": "\u062A", + "tehfinalarabic": "\uFE96", + "tehhahinitialarabic": "\uFCA2", + "tehhahisolatedarabic": "\uFC0C", + "tehinitialarabic": "\uFE97", + "tehiragana": "\u3066", + "tehjeeminitialarabic": "\uFCA1", + "tehjeemisolatedarabic": "\uFC0B", + "tehmarbutaarabic": "\u0629", + "tehmarbutafinalarabic": "\uFE94", + "tehmedialarabic": "\uFE98", + "tehmeeminitialarabic": "\uFCA4", + "tehmeemisolatedarabic": "\uFC0E", + "tehnoonfinalarabic": "\uFC73", + "tekatakana": "\u30C6", + "tekatakanahalfwidth": "\uFF83", + "telephone": "\u2121", + "telephoneblack": "\u260E", + "telishagedolahebrew": "\u05A0", + "telishaqetanahebrew": "\u05A9", + "tencircle": "\u2469", + "tenideographicparen": "\u3229", + "tenparen": "\u247D", + "tenperiod": "\u2491", + "tenroman": "\u2179", + "tesh": "\u02A7", + "tet": "\u05D8", + "tetdagesh": "\uFB38", + "tetdageshhebrew": "\uFB38", + "tethebrew": "\u05D8", + "tetsecyrillic": "\u04B5", + "tevirhebrew": "\u059B", + "tevirlefthebrew": "\u059B", + "thabengali": "\u09A5", + "thadeva": "\u0925", + "thagujarati": "\u0AA5", + "thagurmukhi": "\u0A25", + "thalarabic": "\u0630", + "thalfinalarabic": "\uFEAC", + "thanthakhatlowleftthai": "\uF898", + "thanthakhatlowrightthai": "\uF897", + "thanthakhatthai": "\u0E4C", + "thanthakhatupperleftthai": "\uF896", + "theharabic": "\u062B", + "thehfinalarabic": "\uFE9A", + "thehinitialarabic": "\uFE9B", + "thehmedialarabic": "\uFE9C", + "thereexists": "\u2203", + "therefore": "\u2234", + "theta": "\u03B8", + "theta1": "\u03D1", + "thetasymbolgreek": "\u03D1", + "thieuthacirclekorean": "\u3279", + "thieuthaparenkorean": "\u3219", + "thieuthcirclekorean": "\u326B", + "thieuthkorean": "\u314C", + "thieuthparenkorean": "\u320B", + "thirteencircle": "\u246C", + "thirteenparen": "\u2480", + "thirteenperiod": "\u2494", + "thonangmonthothai": "\u0E11", + "thook": "\u01AD", + "thophuthaothai": "\u0E12", + "thorn": "\u00FE", + "thothahanthai": "\u0E17", + "thothanthai": "\u0E10", + "thothongthai": "\u0E18", + "thothungthai": "\u0E16", + "thousandcyrillic": "\u0482", + "thousandsseparatorarabic": "\u066C", + "thousandsseparatorpersian": "\u066C", + "three": "\u0033", + "threearabic": "\u0663", + "threebengali": "\u09E9", + "threecircle": "\u2462", + "threecircleinversesansserif": "\u278C", + "threedeva": "\u0969", + "threeeighths": "\u215C", + "threegujarati": "\u0AE9", + "threegurmukhi": "\u0A69", + "threehackarabic": "\u0663", + "threehangzhou": "\u3023", + "threeideographicparen": "\u3222", + "threeinferior": "\u2083", + "threemonospace": "\uFF13", + "threenumeratorbengali": "\u09F6", + "threeoldstyle": "\uF733", + "threeparen": "\u2476", + "threeperiod": "\u248A", + "threepersian": "\u06F3", + "threequarters": "\u00BE", + "threequartersemdash": "\uF6DE", + "threeroman": "\u2172", + "threesuperior": "\u00B3", + "threethai": "\u0E53", + "thzsquare": "\u3394", + "tihiragana": "\u3061", + "tikatakana": "\u30C1", + "tikatakanahalfwidth": "\uFF81", + "tikeutacirclekorean": "\u3270", + "tikeutaparenkorean": "\u3210", + "tikeutcirclekorean": "\u3262", + "tikeutkorean": "\u3137", + "tikeutparenkorean": "\u3202", + "tilde": "\u02DC", + "tildebelowcmb": "\u0330", + "tildecmb": "\u0303", + "tildecomb": "\u0303", + "tildedoublecmb": "\u0360", + "tildeoperator": "\u223C", + "tildeoverlaycmb": "\u0334", + "tildeverticalcmb": "\u033E", + "timescircle": "\u2297", + "tipehahebrew": "\u0596", + "tipehalefthebrew": "\u0596", + "tippigurmukhi": "\u0A70", + "titlocyrilliccmb": "\u0483", + "tiwnarmenian": "\u057F", + "tlinebelow": "\u1E6F", + "tmonospace": "\uFF54", + "toarmenian": "\u0569", + "tohiragana": "\u3068", + "tokatakana": "\u30C8", + "tokatakanahalfwidth": "\uFF84", + "tonebarextrahighmod": "\u02E5", + "tonebarextralowmod": "\u02E9", + "tonebarhighmod": "\u02E6", + "tonebarlowmod": "\u02E8", + "tonebarmidmod": "\u02E7", + "tonefive": "\u01BD", + "tonesix": "\u0185", + "tonetwo": "\u01A8", + "tonos": "\u0384", + "tonsquare": "\u3327", + "topatakthai": "\u0E0F", + "tortoiseshellbracketleft": "\u3014", + "tortoiseshellbracketleftsmall": "\uFE5D", + "tortoiseshellbracketleftvertical": "\uFE39", + "tortoiseshellbracketright": "\u3015", + "tortoiseshellbracketrightsmall": "\uFE5E", + "tortoiseshellbracketrightvertical": "\uFE3A", + "totaothai": "\u0E15", + "tpalatalhook": "\u01AB", + "tparen": "\u24AF", + "trademark": "\u2122", + "trademarksans": "\uF8EA", + "trademarkserif": "\uF6DB", + "tretroflexhook": "\u0288", + "triagdn": "\u25BC", + "triaglf": "\u25C4", + "triagrt": "\u25BA", + "triagup": "\u25B2", + "ts": "\u02A6", + "tsadi": "\u05E6", + "tsadidagesh": "\uFB46", + "tsadidageshhebrew": "\uFB46", + "tsadihebrew": "\u05E6", + "tsecyrillic": "\u0446", + "tsere": "\u05B5", + "tsere12": "\u05B5", + "tsere1e": "\u05B5", + "tsere2b": "\u05B5", + "tserehebrew": "\u05B5", + "tserenarrowhebrew": "\u05B5", + "tserequarterhebrew": "\u05B5", + "tserewidehebrew": "\u05B5", + "tshecyrillic": "\u045B", + "tsuperior": "\uF6F3", + "ttabengali": "\u099F", + "ttadeva": "\u091F", + "ttagujarati": "\u0A9F", + "ttagurmukhi": "\u0A1F", + "tteharabic": "\u0679", + "ttehfinalarabic": "\uFB67", + "ttehinitialarabic": "\uFB68", + "ttehmedialarabic": "\uFB69", + "tthabengali": "\u09A0", + "tthadeva": "\u0920", + "tthagujarati": "\u0AA0", + "tthagurmukhi": "\u0A20", + "tturned": "\u0287", + "tuhiragana": "\u3064", + "tukatakana": "\u30C4", + "tukatakanahalfwidth": "\uFF82", + "tusmallhiragana": "\u3063", + "tusmallkatakana": "\u30C3", + "tusmallkatakanahalfwidth": "\uFF6F", + "twelvecircle": "\u246B", + "twelveparen": "\u247F", + "twelveperiod": "\u2493", + "twelveroman": "\u217B", + "twentycircle": "\u2473", + "twentyhangzhou": "\u5344", + "twentyparen": "\u2487", + "twentyperiod": "\u249B", + "two": "\u0032", + "twoarabic": "\u0662", + "twobengali": "\u09E8", + "twocircle": "\u2461", + "twocircleinversesansserif": "\u278B", + "twodeva": "\u0968", + "twodotenleader": "\u2025", + "twodotleader": "\u2025", + "twodotleadervertical": "\uFE30", + "twogujarati": "\u0AE8", + "twogurmukhi": "\u0A68", + "twohackarabic": "\u0662", + "twohangzhou": "\u3022", + "twoideographicparen": "\u3221", + "twoinferior": "\u2082", + "twomonospace": "\uFF12", + "twonumeratorbengali": "\u09F5", + "twooldstyle": "\uF732", + "twoparen": "\u2475", + "twoperiod": "\u2489", + "twopersian": "\u06F2", + "tworoman": "\u2171", + "twostroke": "\u01BB", + "twosuperior": "\u00B2", + "twothai": "\u0E52", + "twothirds": "\u2154", + "u": "\u0075", + "uacute": "\u00FA", + "ubar": "\u0289", + "ubengali": "\u0989", + "ubopomofo": "\u3128", + "ubreve": "\u016D", + "ucaron": "\u01D4", + "ucircle": "\u24E4", + "ucircumflex": "\u00FB", + "ucircumflexbelow": "\u1E77", + "ucyrillic": "\u0443", + "udattadeva": "\u0951", + "udblacute": "\u0171", + "udblgrave": "\u0215", + "udeva": "\u0909", + "udieresis": "\u00FC", + "udieresisacute": "\u01D8", + "udieresisbelow": "\u1E73", + "udieresiscaron": "\u01DA", + "udieresiscyrillic": "\u04F1", + "udieresisgrave": "\u01DC", + "udieresismacron": "\u01D6", + "udotbelow": "\u1EE5", + "ugrave": "\u00F9", + "ugujarati": "\u0A89", + "ugurmukhi": "\u0A09", + "uhiragana": "\u3046", + "uhookabove": "\u1EE7", + "uhorn": "\u01B0", + "uhornacute": "\u1EE9", + "uhorndotbelow": "\u1EF1", + "uhorngrave": "\u1EEB", + "uhornhookabove": "\u1EED", + "uhorntilde": "\u1EEF", + "uhungarumlaut": "\u0171", + "uhungarumlautcyrillic": "\u04F3", + "uinvertedbreve": "\u0217", + "ukatakana": "\u30A6", + "ukatakanahalfwidth": "\uFF73", + "ukcyrillic": "\u0479", + "ukorean": "\u315C", + "umacron": "\u016B", + "umacroncyrillic": "\u04EF", + "umacrondieresis": "\u1E7B", + "umatragurmukhi": "\u0A41", + "umonospace": "\uFF55", + "underscore": "\u005F", + "underscoredbl": "\u2017", + "underscoremonospace": "\uFF3F", + "underscorevertical": "\uFE33", + "underscorewavy": "\uFE4F", + "union": "\u222A", + "universal": "\u2200", + "uogonek": "\u0173", + "uparen": "\u24B0", + "upblock": "\u2580", + "upperdothebrew": "\u05C4", + "upsilon": "\u03C5", + "upsilondieresis": "\u03CB", + "upsilondieresistonos": "\u03B0", + "upsilonlatin": "\u028A", + "upsilontonos": "\u03CD", + "uptackbelowcmb": "\u031D", + "uptackmod": "\u02D4", + "uragurmukhi": "\u0A73", + "uring": "\u016F", + "ushortcyrillic": "\u045E", + "usmallhiragana": "\u3045", + "usmallkatakana": "\u30A5", + "usmallkatakanahalfwidth": "\uFF69", + "ustraightcyrillic": "\u04AF", + "ustraightstrokecyrillic": "\u04B1", + "utilde": "\u0169", + "utildeacute": "\u1E79", + "utildebelow": "\u1E75", + "uubengali": "\u098A", + "uudeva": "\u090A", + "uugujarati": "\u0A8A", + "uugurmukhi": "\u0A0A", + "uumatragurmukhi": "\u0A42", + "uuvowelsignbengali": "\u09C2", + "uuvowelsigndeva": "\u0942", + "uuvowelsigngujarati": "\u0AC2", + "uvowelsignbengali": "\u09C1", + "uvowelsigndeva": "\u0941", + "uvowelsigngujarati": "\u0AC1", + "v": "\u0076", + "vadeva": "\u0935", + "vagujarati": "\u0AB5", + "vagurmukhi": "\u0A35", + "vakatakana": "\u30F7", + "vav": "\u05D5", + "vavdagesh": "\uFB35", + "vavdagesh65": "\uFB35", + "vavdageshhebrew": "\uFB35", + "vavhebrew": "\u05D5", + "vavholam": "\uFB4B", + "vavholamhebrew": "\uFB4B", + "vavvavhebrew": "\u05F0", + "vavyodhebrew": "\u05F1", + "vcircle": "\u24E5", + "vdotbelow": "\u1E7F", + "vecyrillic": "\u0432", + "veharabic": "\u06A4", + "vehfinalarabic": "\uFB6B", + "vehinitialarabic": "\uFB6C", + "vehmedialarabic": "\uFB6D", + "vekatakana": "\u30F9", + "venus": "\u2640", + "verticalbar": "\u007C", + "verticallineabovecmb": "\u030D", + "verticallinebelowcmb": "\u0329", + "verticallinelowmod": "\u02CC", + "verticallinemod": "\u02C8", + "vewarmenian": "\u057E", + "vhook": "\u028B", + "vikatakana": "\u30F8", + "viramabengali": "\u09CD", + "viramadeva": "\u094D", + "viramagujarati": "\u0ACD", + "visargabengali": "\u0983", + "visargadeva": "\u0903", + "visargagujarati": "\u0A83", + "vmonospace": "\uFF56", + "voarmenian": "\u0578", + "voicediterationhiragana": "\u309E", + "voicediterationkatakana": "\u30FE", + "voicedmarkkana": "\u309B", + "voicedmarkkanahalfwidth": "\uFF9E", + "vokatakana": "\u30FA", + "vparen": "\u24B1", + "vtilde": "\u1E7D", + "vturned": "\u028C", + "vuhiragana": "\u3094", + "vukatakana": "\u30F4", + "w": "\u0077", + "wacute": "\u1E83", + "waekorean": "\u3159", + "wahiragana": "\u308F", + "wakatakana": "\u30EF", + "wakatakanahalfwidth": "\uFF9C", + "wakorean": "\u3158", + "wasmallhiragana": "\u308E", + "wasmallkatakana": "\u30EE", + "wattosquare": "\u3357", + "wavedash": "\u301C", + "wavyunderscorevertical": "\uFE34", + "wawarabic": "\u0648", + "wawfinalarabic": "\uFEEE", + "wawhamzaabovearabic": "\u0624", + "wawhamzaabovefinalarabic": "\uFE86", + "wbsquare": "\u33DD", + "wcircle": "\u24E6", + "wcircumflex": "\u0175", + "wdieresis": "\u1E85", + "wdotaccent": "\u1E87", + "wdotbelow": "\u1E89", + "wehiragana": "\u3091", + "weierstrass": "\u2118", + "wekatakana": "\u30F1", + "wekorean": "\u315E", + "weokorean": "\u315D", + "wgrave": "\u1E81", + "whitebullet": "\u25E6", + "whitecircle": "\u25CB", + "whitecircleinverse": "\u25D9", + "whitecornerbracketleft": "\u300E", + "whitecornerbracketleftvertical": "\uFE43", + "whitecornerbracketright": "\u300F", + "whitecornerbracketrightvertical": "\uFE44", + "whitediamond": "\u25C7", + "whitediamondcontainingblacksmalldiamond": "\u25C8", + "whitedownpointingsmalltriangle": "\u25BF", + "whitedownpointingtriangle": "\u25BD", + "whiteleftpointingsmalltriangle": "\u25C3", + "whiteleftpointingtriangle": "\u25C1", + "whitelenticularbracketleft": "\u3016", + "whitelenticularbracketright": "\u3017", + "whiterightpointingsmalltriangle": "\u25B9", + "whiterightpointingtriangle": "\u25B7", + "whitesmallsquare": "\u25AB", + "whitesmilingface": "\u263A", + "whitesquare": "\u25A1", + "whitestar": "\u2606", + "whitetelephone": "\u260F", + "whitetortoiseshellbracketleft": "\u3018", + "whitetortoiseshellbracketright": "\u3019", + "whiteuppointingsmalltriangle": "\u25B5", + "whiteuppointingtriangle": "\u25B3", + "wihiragana": "\u3090", + "wikatakana": "\u30F0", + "wikorean": "\u315F", + "wmonospace": "\uFF57", + "wohiragana": "\u3092", + "wokatakana": "\u30F2", + "wokatakanahalfwidth": "\uFF66", + "won": "\u20A9", + "wonmonospace": "\uFFE6", + "wowaenthai": "\u0E27", + "wparen": "\u24B2", + "wring": "\u1E98", + "wsuperior": "\u02B7", + "wturned": "\u028D", + "wynn": "\u01BF", + "x": "\u0078", + "xabovecmb": "\u033D", + "xbopomofo": "\u3112", + "xcircle": "\u24E7", + "xdieresis": "\u1E8D", + "xdotaccent": "\u1E8B", + "xeharmenian": "\u056D", + "xi": "\u03BE", + "xmonospace": "\uFF58", + "xparen": "\u24B3", + "xsuperior": "\u02E3", + "y": "\u0079", + "yaadosquare": "\u334E", + "yabengali": "\u09AF", + "yacute": "\u00FD", + "yadeva": "\u092F", + "yaekorean": "\u3152", + "yagujarati": "\u0AAF", + "yagurmukhi": "\u0A2F", + "yahiragana": "\u3084", + "yakatakana": "\u30E4", + "yakatakanahalfwidth": "\uFF94", + "yakorean": "\u3151", + "yamakkanthai": "\u0E4E", + "yasmallhiragana": "\u3083", + "yasmallkatakana": "\u30E3", + "yasmallkatakanahalfwidth": "\uFF6C", + "yatcyrillic": "\u0463", + "ycircle": "\u24E8", + "ycircumflex": "\u0177", + "ydieresis": "\u00FF", + "ydotaccent": "\u1E8F", + "ydotbelow": "\u1EF5", + "yeharabic": "\u064A", + "yehbarreearabic": "\u06D2", + "yehbarreefinalarabic": "\uFBAF", + "yehfinalarabic": "\uFEF2", + "yehhamzaabovearabic": "\u0626", + "yehhamzaabovefinalarabic": "\uFE8A", + "yehhamzaaboveinitialarabic": "\uFE8B", + "yehhamzaabovemedialarabic": "\uFE8C", + "yehinitialarabic": "\uFEF3", + "yehmedialarabic": "\uFEF4", + "yehmeeminitialarabic": "\uFCDD", + "yehmeemisolatedarabic": "\uFC58", + "yehnoonfinalarabic": "\uFC94", + "yehthreedotsbelowarabic": "\u06D1", + "yekorean": "\u3156", + "yen": "\u00A5", + "yenmonospace": "\uFFE5", + "yeokorean": "\u3155", + "yeorinhieuhkorean": "\u3186", + "yerahbenyomohebrew": "\u05AA", + "yerahbenyomolefthebrew": "\u05AA", + "yericyrillic": "\u044B", + "yerudieresiscyrillic": "\u04F9", + "yesieungkorean": "\u3181", + "yesieungpansioskorean": "\u3183", + "yesieungsioskorean": "\u3182", + "yetivhebrew": "\u059A", + "ygrave": "\u1EF3", + "yhook": "\u01B4", + "yhookabove": "\u1EF7", + "yiarmenian": "\u0575", + "yicyrillic": "\u0457", + "yikorean": "\u3162", + "yinyang": "\u262F", + "yiwnarmenian": "\u0582", + "ymonospace": "\uFF59", + "yod": "\u05D9", + "yoddagesh": "\uFB39", + "yoddageshhebrew": "\uFB39", + "yodhebrew": "\u05D9", + "yodyodhebrew": "\u05F2", + "yodyodpatahhebrew": "\uFB1F", + "yohiragana": "\u3088", + "yoikorean": "\u3189", + "yokatakana": "\u30E8", + "yokatakanahalfwidth": "\uFF96", + "yokorean": "\u315B", + "yosmallhiragana": "\u3087", + "yosmallkatakana": "\u30E7", + "yosmallkatakanahalfwidth": "\uFF6E", + "yotgreek": "\u03F3", + "yoyaekorean": "\u3188", + "yoyakorean": "\u3187", + "yoyakthai": "\u0E22", + "yoyingthai": "\u0E0D", + "yparen": "\u24B4", + "ypogegrammeni": "\u037A", + "ypogegrammenigreekcmb": "\u0345", + "yr": "\u01A6", + "yring": "\u1E99", + "ysuperior": "\u02B8", + "ytilde": "\u1EF9", + "yturned": "\u028E", + "yuhiragana": "\u3086", + "yuikorean": "\u318C", + "yukatakana": "\u30E6", + "yukatakanahalfwidth": "\uFF95", + "yukorean": "\u3160", + "yusbigcyrillic": "\u046B", + "yusbigiotifiedcyrillic": "\u046D", + "yuslittlecyrillic": "\u0467", + "yuslittleiotifiedcyrillic": "\u0469", + "yusmallhiragana": "\u3085", + "yusmallkatakana": "\u30E5", + "yusmallkatakanahalfwidth": "\uFF6D", + "yuyekorean": "\u318B", + "yuyeokorean": "\u318A", + "yyabengali": "\u09DF", + "yyadeva": "\u095F", + "z": "\u007A", + "zaarmenian": "\u0566", + "zacute": "\u017A", + "zadeva": "\u095B", + "zagurmukhi": "\u0A5B", + "zaharabic": "\u0638", + "zahfinalarabic": "\uFEC6", + "zahinitialarabic": "\uFEC7", + "zahiragana": "\u3056", + "zahmedialarabic": "\uFEC8", + "zainarabic": "\u0632", + "zainfinalarabic": "\uFEB0", + "zakatakana": "\u30B6", + "zaqefgadolhebrew": "\u0595", + "zaqefqatanhebrew": "\u0594", + "zarqahebrew": "\u0598", + "zayin": "\u05D6", + "zayindagesh": "\uFB36", + "zayindageshhebrew": "\uFB36", + "zayinhebrew": "\u05D6", + "zbopomofo": "\u3117", + "zcaron": "\u017E", + "zcircle": "\u24E9", + "zcircumflex": "\u1E91", + "zcurl": "\u0291", + "zdot": "\u017C", + "zdotaccent": "\u017C", + "zdotbelow": "\u1E93", + "zecyrillic": "\u0437", + "zedescendercyrillic": "\u0499", + "zedieresiscyrillic": "\u04DF", + "zehiragana": "\u305C", + "zekatakana": "\u30BC", + "zero": "\u0030", + "zeroarabic": "\u0660", + "zerobengali": "\u09E6", + "zerodeva": "\u0966", + "zerogujarati": "\u0AE6", + "zerogurmukhi": "\u0A66", + "zerohackarabic": "\u0660", + "zeroinferior": "\u2080", + "zeromonospace": "\uFF10", + "zerooldstyle": "\uF730", + "zeropersian": "\u06F0", + "zerosuperior": "\u2070", + "zerothai": "\u0E50", + "zerowidthjoiner": "\uFEFF", + "zerowidthnonjoiner": "\u200C", + "zerowidthspace": "\u200B", + "zeta": "\u03B6", + "zhbopomofo": "\u3113", + "zhearmenian": "\u056A", + "zhebrevecyrillic": "\u04C2", + "zhecyrillic": "\u0436", + "zhedescendercyrillic": "\u0497", + "zhedieresiscyrillic": "\u04DD", + "zihiragana": "\u3058", + "zikatakana": "\u30B8", + "zinorhebrew": "\u05AE", + "zlinebelow": "\u1E95", + "zmonospace": "\uFF5A", + "zohiragana": "\u305E", + "zokatakana": "\u30BE", + "zparen": "\u24B5", + "zretroflexhook": "\u0290", + "zstroke": "\u01B6", + "zuhiragana": "\u305A", + "zukatakana": "\u30BA", } # --end diff --git a/pdfminer/high_level.py b/pdfminer/high_level.py index f8c5ca4..29a985c 100644 --- a/pdfminer/high_level.py +++ b/pdfminer/high_level.py @@ -5,8 +5,7 @@ import sys from io import StringIO from typing import Any, BinaryIO, Container, Iterator, Optional, cast -from .converter import XMLConverter, HTMLConverter, TextConverter, \ - PDFPageAggregator +from .converter import XMLConverter, HTMLConverter, TextConverter, PDFPageAggregator from .image import ImageWriter from .layout import LAParams, LTPage from .pdfdevice import PDFDevice, TagExtractor @@ -18,20 +17,20 @@ from .utils import open_filename, FileOrName, AnyIO def extract_text_to_fp( inf: BinaryIO, outfp: AnyIO, - output_type: str = 'text', - codec: str = 'utf-8', + output_type: str = "text", + codec: str = "utf-8", laparams: Optional[LAParams] = None, maxpages: int = 0, page_numbers: Optional[Container[int]] = None, password: str = "", scale: float = 1.0, rotation: int = 0, - layoutmode: str = 'normal', + layoutmode: str = "normal", output_dir: Optional[str] = None, strip_control: bool = False, debug: bool = False, disable_caching: bool = False, - **kwargs: Any + **kwargs: Any, ) -> None: """Parses text from inf-file and writes to outfp file-like object. @@ -72,39 +71,52 @@ def extract_text_to_fp( rsrcmgr = PDFResourceManager(caching=not disable_caching) device: Optional[PDFDevice] = None - if output_type != 'text' and outfp == sys.stdout: + if output_type != "text" and outfp == sys.stdout: outfp = sys.stdout.buffer - if output_type == 'text': - device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, - imagewriter=imagewriter) + if output_type == "text": + device = TextConverter( + rsrcmgr, outfp, codec=codec, laparams=laparams, imagewriter=imagewriter + ) - elif output_type == 'xml': - device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, - imagewriter=imagewriter, - stripcontrol=strip_control) + elif output_type == "xml": + device = XMLConverter( + rsrcmgr, + outfp, + codec=codec, + laparams=laparams, + imagewriter=imagewriter, + stripcontrol=strip_control, + ) - elif output_type == 'html': - device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale, - layoutmode=layoutmode, laparams=laparams, - imagewriter=imagewriter) + elif output_type == "html": + device = HTMLConverter( + rsrcmgr, + outfp, + codec=codec, + scale=scale, + layoutmode=layoutmode, + laparams=laparams, + imagewriter=imagewriter, + ) - elif output_type == 'tag': + elif output_type == "tag": # Binary I/O is required, but we have no good way to test it here. device = TagExtractor(rsrcmgr, cast(BinaryIO, outfp), codec=codec) else: - msg = f"Output type can be text, html, xml or tag but is " \ - f"{output_type}" + msg = f"Output type can be text, html, xml or tag but is " f"{output_type}" raise ValueError(msg) assert device is not None interpreter = PDFPageInterpreter(rsrcmgr, device) - for page in PDFPage.get_pages(inf, - page_numbers, - maxpages=maxpages, - password=password, - caching=not disable_caching): + for page in PDFPage.get_pages( + inf, + page_numbers, + maxpages=maxpages, + password=password, + caching=not disable_caching, + ): page.rotate = (page.rotate + rotation) % 360 interpreter.process_page(page) @@ -113,12 +125,12 @@ def extract_text_to_fp( def extract_text( pdf_file: FileOrName, - password: str = '', + password: str = "", page_numbers: Optional[Container[int]] = None, maxpages: int = 0, caching: bool = True, - codec: str = 'utf-8', - laparams: Optional[LAParams] = None + codec: str = "utf-8", + laparams: Optional[LAParams] = None, ) -> str: """Parse and return the text contained in a PDF file. @@ -139,16 +151,15 @@ def extract_text( with open_filename(pdf_file, "rb") as fp, StringIO() as output_string: fp = cast(BinaryIO, fp) # we opened in binary mode rsrcmgr = PDFResourceManager(caching=caching) - device = TextConverter(rsrcmgr, output_string, codec=codec, - laparams=laparams) + device = TextConverter(rsrcmgr, output_string, codec=codec, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) for page in PDFPage.get_pages( - fp, - page_numbers, - maxpages=maxpages, - password=password, - caching=caching, + fp, + page_numbers, + maxpages=maxpages, + password=password, + caching=caching, ): interpreter.process_page(page) @@ -157,11 +168,11 @@ def extract_text( def extract_pages( pdf_file: FileOrName, - password: str = '', + password: str = "", page_numbers: Optional[Container[int]] = None, maxpages: int = 0, caching: bool = True, - laparams: Optional[LAParams] = None + laparams: Optional[LAParams] = None, ) -> Iterator[LTPage]: """Extract and yield LTPage objects @@ -183,8 +194,9 @@ def extract_pages( resource_manager = PDFResourceManager(caching=caching) device = PDFPageAggregator(resource_manager, laparams=laparams) interpreter = PDFPageInterpreter(resource_manager, device) - for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages, - password=password, caching=caching): + for page in PDFPage.get_pages( + fp, page_numbers, maxpages=maxpages, password=password, caching=caching + ): interpreter.process_page(page) layout = device.get_result() yield layout diff --git a/pdfminer/image.py b/pdfminer/image.py index d537d7c..3123326 100644 --- a/pdfminer/image.py +++ b/pdfminer/image.py @@ -9,22 +9,15 @@ from .layout import LTImage from .pdfcolor import LITERAL_DEVICE_CMYK from .pdfcolor import LITERAL_DEVICE_GRAY from .pdfcolor import LITERAL_DEVICE_RGB -from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, \ - LITERALS_JPX_DECODE +from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, LITERALS_JPX_DECODE def align32(x: int) -> int: - return ((x+3)//4)*4 + return ((x + 3) // 4) * 4 class BMPWriter: - def __init__( - self, - fp: BinaryIO, - bits: int, - width: int, - height: int - ) -> None: + def __init__(self, fp: BinaryIO, bits: int, width: int, height: int) -> None: self.fp = fp self.bits = bits self.width = width @@ -37,30 +30,43 @@ class BMPWriter: ncols = 0 else: raise ValueError(bits) - self.linesize = align32((self.width*self.bits+7)//8) + self.linesize = align32((self.width * self.bits + 7) // 8) self.datasize = self.linesize * self.height - headersize = 14+40+ncols*4 - info = struct.pack(' None: - self.fp.seek(self.pos1 - (y+1)*self.linesize) + self.fp.seek(self.pos1 - (y + 1) * self.linesize) self.fp.write(data) @@ -80,43 +86,46 @@ class ImageWriter: is_jbig2 = self.is_jbig2_image(image) ext = self._get_image_extension(image, width, height, is_jbig2) - name, path = self._create_unique_image_name(self.outdir, - image.name, ext) + name, path = self._create_unique_image_name(self.outdir, image.name, ext) - fp = open(path, 'wb') - if ext == '.jpg': + fp = open(path, "wb") + if ext == ".jpg": raw_data = image.stream.get_rawdata() assert raw_data is not None if LITERAL_DEVICE_CMYK in image.colorspace: from PIL import Image # type: ignore[import] from PIL import ImageChops + ifp = BytesIO(raw_data) i = Image.open(ifp) i = ImageChops.invert(i) - i = i.convert('RGB') - i.save(fp, 'JPEG') + i = i.convert("RGB") + i.save(fp, "JPEG") else: fp.write(raw_data) - elif ext == '.jp2': + elif ext == ".jp2": # if we just write the raw data, most image programs # that I have tried cannot open the file. However, # open and saving with PIL produces a file that # seems to be easily opened by other programs from PIL import Image + raw_data = image.stream.get_rawdata() assert raw_data is not None ifp = BytesIO(raw_data) i = Image.open(ifp) - i.save(fp, 'JPEG2000') + i.save(fp, "JPEG2000") elif is_jbig2: input_stream = BytesIO() global_streams = self.jbig2_global(image) if len(global_streams) > 1: - msg = 'There should never be more than one JBIG2Globals ' \ - 'associated with a JBIG2 embedded image' + msg = ( + "There should never be more than one JBIG2Globals " + "associated with a JBIG2 embedded image" + ) raise ValueError(msg) if len(global_streams) == 1: - input_stream.write(global_streams[0].get_data().rstrip(b'\n')) + input_stream.write(global_streams[0].get_data().rstrip(b"\n")) input_stream.write(image.stream.get_data()) input_stream.seek(0) reader = JBIG2StreamReader(input_stream) @@ -128,24 +137,24 @@ class ImageWriter: bmp = BMPWriter(fp, 1, width, height) data = image.stream.get_data() i = 0 - width = (width+7)//8 + width = (width + 7) // 8 for y in range(height): - bmp.write_line(y, data[i:i+width]) + bmp.write_line(y, data[i : i + width]) i += width elif image.bits == 8 and LITERAL_DEVICE_RGB in image.colorspace: bmp = BMPWriter(fp, 24, width, height) data = image.stream.get_data() i = 0 - width = width*3 + width = width * 3 for y in range(height): - bmp.write_line(y, data[i:i+width]) + bmp.write_line(y, data[i : i + width]) i += width elif image.bits == 8 and LITERAL_DEVICE_GRAY in image.colorspace: bmp = BMPWriter(fp, 8, width, height) data = image.stream.get_data() i = 0 for y in range(height): - bmp.write_line(y, data[i:i+width]) + bmp.write_line(y, data[i : i + width]) i += width else: fp.write(image.stream.get_data()) @@ -168,43 +177,42 @@ class ImageWriter: filters = image.stream.get_filters() for filter_name, params in filters: if filter_name in LITERALS_JBIG2_DECODE: - global_streams.append(params['JBIG2Globals'].resolve()) + global_streams.append(params["JBIG2Globals"].resolve()) return global_streams @staticmethod def _get_image_extension( - image: LTImage, - width: int, - height: int, - is_jbig2: bool + image: LTImage, width: int, height: int, is_jbig2: bool ) -> str: filters = image.stream.get_filters() if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE: - ext = '.jpg' + ext = ".jpg" elif len(filters) == 1 and filters[0][0] in LITERALS_JPX_DECODE: - ext = '.jp2' + ext = ".jp2" elif is_jbig2: - ext = '.jb2' - elif (image.bits == 1 or - image.bits == 8 and - (LITERAL_DEVICE_RGB in image.colorspace or - LITERAL_DEVICE_GRAY in image.colorspace)): - ext = '.%dx%d.bmp' % (width, height) + ext = ".jb2" + elif ( + image.bits == 1 + or image.bits == 8 + and ( + LITERAL_DEVICE_RGB in image.colorspace + or LITERAL_DEVICE_GRAY in image.colorspace + ) + ): + ext = ".%dx%d.bmp" % (width, height) else: - ext = '.%d.%dx%d.img' % (image.bits, width, height) + ext = ".%d.%dx%d.img" % (image.bits, width, height) return ext @staticmethod def _create_unique_image_name( - dirname: str, - image_name: str, - ext: str + dirname: str, image_name: str, ext: str ) -> Tuple[str, str]: name = image_name + ext path = os.path.join(dirname, name) img_index = 0 while os.path.exists(path): - name = '%s.%d%s' % (image_name, img_index, ext) + name = "%s.%d%s" % (image_name, img_index, ext) path = os.path.join(dirname, name) img_index += 1 return name, path diff --git a/pdfminer/jbig2.py b/pdfminer/jbig2.py index 269b028..113ca80 100644 --- a/pdfminer/jbig2.py +++ b/pdfminer/jbig2.py @@ -19,10 +19,10 @@ HEADER_FLAG_PAGE_ASSOC_LONG = 0b01000000 SEG_TYPE_MASK = 0b00111111 REF_COUNT_SHORT_MASK = 0b11100000 -REF_COUNT_LONG_MASK = 0x1fffffff +REF_COUNT_LONG_MASK = 0x1FFFFFFF REF_COUNT_LONG = 7 -DATA_LEN_UNKNOWN = 0xffffffff +DATA_LEN_UNKNOWN = 0xFFFFFFFF # segment types SEG_TYPE_IMMEDIATE_GEN_REGION = 38 @@ -30,7 +30,7 @@ SEG_TYPE_END_OF_PAGE = 49 SEG_TYPE_END_OF_FILE = 51 # file literals -FILE_HEADER_ID = b'\x97\x4A\x42\x32\x0D\x0A\x1A\x0A' +FILE_HEADER_ID = b"\x97\x4A\x42\x32\x0D\x0A\x1A\x0A" FILE_HEAD_FLAG_SEQUENTIAL = 0b00000001 @@ -66,12 +66,14 @@ def unpack_int(format: str, buffer: bytes) -> int: JBIG2SegmentFlags = Dict[str, Union[int, bool]] JBIG2RetentionFlags = Dict[str, Union[int, List[int], List[bool]]] -JBIG2Segment = Dict[str, Union[bool, int, bytes, JBIG2SegmentFlags, - JBIG2RetentionFlags]] +JBIG2Segment = Dict[ + str, Union[bool, int, bytes, JBIG2SegmentFlags, JBIG2RetentionFlags] +] class JBIG2StreamReader: """Read segments from a JBIG2 byte stream""" + def __init__(self, stream: BinaryIO) -> None: self.stream = stream @@ -96,29 +98,23 @@ class JBIG2StreamReader: return segments def is_eof(self) -> bool: - if self.stream.read(1) == b'': + if self.stream.read(1) == b"": return True else: self.stream.seek(-1, os.SEEK_CUR) return False def parse_flags( - self, - segment: JBIG2Segment, - flags: int, - field: bytes + self, segment: JBIG2Segment, flags: int, field: bytes ) -> JBIG2SegmentFlags: return { "deferred": check_flag(HEADER_FLAG_DEFERRED, flags), "page_assoc_long": check_flag(HEADER_FLAG_PAGE_ASSOC_LONG, flags), - "type": masked_value(SEG_TYPE_MASK, flags) + "type": masked_value(SEG_TYPE_MASK, flags), } def parse_retention_flags( - self, - segment: JBIG2Segment, - flags: int, - field: bytes + self, segment: JBIG2Segment, flags: int, field: bytes ) -> JBIG2RetentionFlags: ref_count = masked_value(REF_COUNT_SHORT_MASK, flags) retain_segments = [] @@ -159,31 +155,23 @@ class JBIG2StreamReader: "ref_segments": ref_segments, } - def parse_page_assoc( - self, - segment: JBIG2Segment, - page: int, - field: bytes - ) -> int: + def parse_page_assoc(self, segment: JBIG2Segment, page: int, field: bytes) -> int: if cast(JBIG2SegmentFlags, segment["flags"])["page_assoc_long"]: field += self.stream.read(3) page = unpack_int(">L", field) return page def parse_data_length( - self, - segment: JBIG2Segment, - length: int, - field: bytes + self, segment: JBIG2Segment, length: int, field: bytes ) -> int: if length: - if (cast(JBIG2SegmentFlags, segment["flags"])["type"] == - SEG_TYPE_IMMEDIATE_GEN_REGION) \ - and (length == DATA_LEN_UNKNOWN): + if ( + cast(JBIG2SegmentFlags, segment["flags"])["type"] + == SEG_TYPE_IMMEDIATE_GEN_REGION + ) and (length == DATA_LEN_UNKNOWN): raise NotImplementedError( - "Working with unknown segment length " - "is not implemented yet" + "Working with unknown segment length " "is not implemented yet" ) else: segment["raw_data"] = self.stream.read(length) @@ -195,18 +183,16 @@ class JBIG2StreamWriter: """Write JBIG2 segments to a file in JBIG2 format""" EMPTY_RETENTION_FLAGS: JBIG2RetentionFlags = { - 'ref_count': 0, - 'ref_segments': cast(List[int], []), - 'retain_segments': cast(List[bool], []) + "ref_count": 0, + "ref_segments": cast(List[int], []), + "retain_segments": cast(List[bool], []), } def __init__(self, stream: BinaryIO) -> None: self.stream = stream def write_segments( - self, - segments: Iterable[JBIG2Segment], - fix_last_page: bool = True + self, segments: Iterable[JBIG2Segment], fix_last_page: bool = True ) -> int: data_len = 0 current_page: Optional[int] = None @@ -222,8 +208,10 @@ class JBIG2StreamWriter: if fix_last_page: seg_page = cast(int, segment.get("page_assoc")) - if cast(JBIG2SegmentFlags, segment["flags"])["type"] == \ - SEG_TYPE_END_OF_PAGE: + if ( + cast(JBIG2SegmentFlags, segment["flags"])["type"] + == SEG_TYPE_END_OF_PAGE + ): current_page = None elif seg_page: current_page = seg_page @@ -237,9 +225,7 @@ class JBIG2StreamWriter: return data_len def write_file( - self, - segments: Iterable[JBIG2Segment], - fix_last_page: bool = True + self, segments: Iterable[JBIG2Segment], fix_last_page: bool = True ) -> int: header = FILE_HEADER_ID header_flags = FILE_HEAD_FLAG_SEQUENTIAL @@ -270,7 +256,7 @@ class JBIG2StreamWriter: return data_len def encode_segment(self, segment: JBIG2Segment) -> bytes: - data = b'' + data = b"" for field_format, name in SEG_STRUCT: value = segment.get(name) encoder = getattr(self, "encode_%s" % name, None) @@ -281,27 +267,26 @@ class JBIG2StreamWriter: data += field return data - def encode_flags(self, value: JBIG2SegmentFlags, segment: JBIG2Segment - ) -> bytes: + def encode_flags(self, value: JBIG2SegmentFlags, segment: JBIG2Segment) -> bytes: flags = 0 if value.get("deferred"): flags |= HEADER_FLAG_DEFERRED if "page_assoc_long" in value: - flags |= HEADER_FLAG_PAGE_ASSOC_LONG \ - if value["page_assoc_long"] else flags + flags |= HEADER_FLAG_PAGE_ASSOC_LONG if value["page_assoc_long"] else flags else: - flags |= HEADER_FLAG_PAGE_ASSOC_LONG \ - if cast(int, segment.get("page", 0)) > 255 else flags + flags |= ( + HEADER_FLAG_PAGE_ASSOC_LONG + if cast(int, segment.get("page", 0)) > 255 + else flags + ) flags |= mask_value(SEG_TYPE_MASK, value["type"]) return pack(">B", flags) def encode_retention_flags( - self, - value: JBIG2RetentionFlags, - segment: JBIG2Segment + self, value: JBIG2RetentionFlags, segment: JBIG2Segment ) -> bytes: flags = [] flags_format = ">B" @@ -318,15 +303,12 @@ class JBIG2StreamWriter: else: bytes_count = math.ceil((ref_count + 1) / 8) flags_format = ">L" + ("B" * bytes_count) - flags_dword = mask_value( - REF_COUNT_SHORT_MASK, - REF_COUNT_LONG - ) << 24 + flags_dword = mask_value(REF_COUNT_SHORT_MASK, REF_COUNT_LONG) << 24 flags.append(flags_dword) for byte_index in range(bytes_count): ret_byte = 0 - ret_part = retain_segments[byte_index * 8:byte_index * 8 + 8] + ret_part = retain_segments[byte_index * 8 : byte_index * 8 + 8] for bit_pos, ret_seg in enumerate(ret_part): ret_byte |= 1 << bit_pos if ret_seg else ret_byte @@ -353,26 +335,22 @@ class JBIG2StreamWriter: data += cast(bytes, segment["raw_data"]) return data - def get_eop_segment( - self, - seg_number: int, - page_number: int - ) -> JBIG2Segment: + def get_eop_segment(self, seg_number: int, page_number: int) -> JBIG2Segment: return { - 'data_length': 0, - 'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_PAGE}, - 'number': seg_number, - 'page_assoc': page_number, - 'raw_data': b'', - 'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS + "data_length": 0, + "flags": {"deferred": False, "type": SEG_TYPE_END_OF_PAGE}, + "number": seg_number, + "page_assoc": page_number, + "raw_data": b"", + "retention_flags": JBIG2StreamWriter.EMPTY_RETENTION_FLAGS, } def get_eof_segment(self, seg_number: int) -> JBIG2Segment: return { - 'data_length': 0, - 'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_FILE}, - 'number': seg_number, - 'page_assoc': 0, - 'raw_data': b'', - 'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS + "data_length": 0, + "flags": {"deferred": False, "type": SEG_TYPE_END_OF_FILE}, + "number": seg_number, + "page_assoc": 0, + "raw_data": b"", + "retention_flags": JBIG2StreamWriter.EMPTY_RETENTION_FLAGS, } diff --git a/pdfminer/latin_enc.py b/pdfminer/latin_enc.py index d579aea..6238745 100644 --- a/pdfminer/latin_enc.py +++ b/pdfminer/latin_enc.py @@ -7,241 +7,240 @@ This table is extracted from PDF Reference Manual 1.6, pp.925 from typing import List, Optional, Tuple -EncodingRow = \ - Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]] +EncodingRow = Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]] ENCODING: List[EncodingRow] = [ - # (name, std, mac, win, pdf) - ('A', 65, 65, 65, 65), - ('AE', 225, 174, 198, 198), - ('Aacute', None, 231, 193, 193), - ('Acircumflex', None, 229, 194, 194), - ('Adieresis', None, 128, 196, 196), - ('Agrave', None, 203, 192, 192), - ('Aring', None, 129, 197, 197), - ('Atilde', None, 204, 195, 195), - ('B', 66, 66, 66, 66), - ('C', 67, 67, 67, 67), - ('Ccedilla', None, 130, 199, 199), - ('D', 68, 68, 68, 68), - ('E', 69, 69, 69, 69), - ('Eacute', None, 131, 201, 201), - ('Ecircumflex', None, 230, 202, 202), - ('Edieresis', None, 232, 203, 203), - ('Egrave', None, 233, 200, 200), - ('Eth', None, None, 208, 208), - ('Euro', None, None, 128, 160), - ('F', 70, 70, 70, 70), - ('G', 71, 71, 71, 71), - ('H', 72, 72, 72, 72), - ('I', 73, 73, 73, 73), - ('Iacute', None, 234, 205, 205), - ('Icircumflex', None, 235, 206, 206), - ('Idieresis', None, 236, 207, 207), - ('Igrave', None, 237, 204, 204), - ('J', 74, 74, 74, 74), - ('K', 75, 75, 75, 75), - ('L', 76, 76, 76, 76), - ('Lslash', 232, None, None, 149), - ('M', 77, 77, 77, 77), - ('N', 78, 78, 78, 78), - ('Ntilde', None, 132, 209, 209), - ('O', 79, 79, 79, 79), - ('OE', 234, 206, 140, 150), - ('Oacute', None, 238, 211, 211), - ('Ocircumflex', None, 239, 212, 212), - ('Odieresis', None, 133, 214, 214), - ('Ograve', None, 241, 210, 210), - ('Oslash', 233, 175, 216, 216), - ('Otilde', None, 205, 213, 213), - ('P', 80, 80, 80, 80), - ('Q', 81, 81, 81, 81), - ('R', 82, 82, 82, 82), - ('S', 83, 83, 83, 83), - ('Scaron', None, None, 138, 151), - ('T', 84, 84, 84, 84), - ('Thorn', None, None, 222, 222), - ('U', 85, 85, 85, 85), - ('Uacute', None, 242, 218, 218), - ('Ucircumflex', None, 243, 219, 219), - ('Udieresis', None, 134, 220, 220), - ('Ugrave', None, 244, 217, 217), - ('V', 86, 86, 86, 86), - ('W', 87, 87, 87, 87), - ('X', 88, 88, 88, 88), - ('Y', 89, 89, 89, 89), - ('Yacute', None, None, 221, 221), - ('Ydieresis', None, 217, 159, 152), - ('Z', 90, 90, 90, 90), - ('Zcaron', None, None, 142, 153), - ('a', 97, 97, 97, 97), - ('aacute', None, 135, 225, 225), - ('acircumflex', None, 137, 226, 226), - ('acute', 194, 171, 180, 180), - ('adieresis', None, 138, 228, 228), - ('ae', 241, 190, 230, 230), - ('agrave', None, 136, 224, 224), - ('ampersand', 38, 38, 38, 38), - ('aring', None, 140, 229, 229), - ('asciicircum', 94, 94, 94, 94), - ('asciitilde', 126, 126, 126, 126), - ('asterisk', 42, 42, 42, 42), - ('at', 64, 64, 64, 64), - ('atilde', None, 139, 227, 227), - ('b', 98, 98, 98, 98), - ('backslash', 92, 92, 92, 92), - ('bar', 124, 124, 124, 124), - ('braceleft', 123, 123, 123, 123), - ('braceright', 125, 125, 125, 125), - ('bracketleft', 91, 91, 91, 91), - ('bracketright', 93, 93, 93, 93), - ('breve', 198, 249, None, 24), - ('brokenbar', None, None, 166, 166), - ('bullet', 183, 165, 149, 128), - ('c', 99, 99, 99, 99), - ('caron', 207, 255, None, 25), - ('ccedilla', None, 141, 231, 231), - ('cedilla', 203, 252, 184, 184), - ('cent', 162, 162, 162, 162), - ('circumflex', 195, 246, 136, 26), - ('colon', 58, 58, 58, 58), - ('comma', 44, 44, 44, 44), - ('copyright', None, 169, 169, 169), - ('currency', 168, 219, 164, 164), - ('d', 100, 100, 100, 100), - ('dagger', 178, 160, 134, 129), - ('daggerdbl', 179, 224, 135, 130), - ('degree', None, 161, 176, 176), - ('dieresis', 200, 172, 168, 168), - ('divide', None, 214, 247, 247), - ('dollar', 36, 36, 36, 36), - ('dotaccent', 199, 250, None, 27), - ('dotlessi', 245, 245, None, 154), - ('e', 101, 101, 101, 101), - ('eacute', None, 142, 233, 233), - ('ecircumflex', None, 144, 234, 234), - ('edieresis', None, 145, 235, 235), - ('egrave', None, 143, 232, 232), - ('eight', 56, 56, 56, 56), - ('ellipsis', 188, 201, 133, 131), - ('emdash', 208, 209, 151, 132), - ('endash', 177, 208, 150, 133), - ('equal', 61, 61, 61, 61), - ('eth', None, None, 240, 240), - ('exclam', 33, 33, 33, 33), - ('exclamdown', 161, 193, 161, 161), - ('f', 102, 102, 102, 102), - ('fi', 174, 222, None, 147), - ('five', 53, 53, 53, 53), - ('fl', 175, 223, None, 148), - ('florin', 166, 196, 131, 134), - ('four', 52, 52, 52, 52), - ('fraction', 164, 218, None, 135), - ('g', 103, 103, 103, 103), - ('germandbls', 251, 167, 223, 223), - ('grave', 193, 96, 96, 96), - ('greater', 62, 62, 62, 62), - ('guillemotleft', 171, 199, 171, 171), - ('guillemotright', 187, 200, 187, 187), - ('guilsinglleft', 172, 220, 139, 136), - ('guilsinglright', 173, 221, 155, 137), - ('h', 104, 104, 104, 104), - ('hungarumlaut', 205, 253, None, 28), - ('hyphen', 45, 45, 45, 45), - ('i', 105, 105, 105, 105), - ('iacute', None, 146, 237, 237), - ('icircumflex', None, 148, 238, 238), - ('idieresis', None, 149, 239, 239), - ('igrave', None, 147, 236, 236), - ('j', 106, 106, 106, 106), - ('k', 107, 107, 107, 107), - ('l', 108, 108, 108, 108), - ('less', 60, 60, 60, 60), - ('logicalnot', None, 194, 172, 172), - ('lslash', 248, None, None, 155), - ('m', 109, 109, 109, 109), - ('macron', 197, 248, 175, 175), - ('minus', None, None, None, 138), - ('mu', None, 181, 181, 181), - ('multiply', None, None, 215, 215), - ('n', 110, 110, 110, 110), - ('nbspace', None, 202, 160, None), - ('nine', 57, 57, 57, 57), - ('ntilde', None, 150, 241, 241), - ('numbersign', 35, 35, 35, 35), - ('o', 111, 111, 111, 111), - ('oacute', None, 151, 243, 243), - ('ocircumflex', None, 153, 244, 244), - ('odieresis', None, 154, 246, 246), - ('oe', 250, 207, 156, 156), - ('ogonek', 206, 254, None, 29), - ('ograve', None, 152, 242, 242), - ('one', 49, 49, 49, 49), - ('onehalf', None, None, 189, 189), - ('onequarter', None, None, 188, 188), - ('onesuperior', None, None, 185, 185), - ('ordfeminine', 227, 187, 170, 170), - ('ordmasculine', 235, 188, 186, 186), - ('oslash', 249, 191, 248, 248), - ('otilde', None, 155, 245, 245), - ('p', 112, 112, 112, 112), - ('paragraph', 182, 166, 182, 182), - ('parenleft', 40, 40, 40, 40), - ('parenright', 41, 41, 41, 41), - ('percent', 37, 37, 37, 37), - ('period', 46, 46, 46, 46), - ('periodcentered', 180, 225, 183, 183), - ('perthousand', 189, 228, 137, 139), - ('plus', 43, 43, 43, 43), - ('plusminus', None, 177, 177, 177), - ('q', 113, 113, 113, 113), - ('question', 63, 63, 63, 63), - ('questiondown', 191, 192, 191, 191), - ('quotedbl', 34, 34, 34, 34), - ('quotedblbase', 185, 227, 132, 140), - ('quotedblleft', 170, 210, 147, 141), - ('quotedblright', 186, 211, 148, 142), - ('quoteleft', 96, 212, 145, 143), - ('quoteright', 39, 213, 146, 144), - ('quotesinglbase', 184, 226, 130, 145), - ('quotesingle', 169, 39, 39, 39), - ('r', 114, 114, 114, 114), - ('registered', None, 168, 174, 174), - ('ring', 202, 251, None, 30), - ('s', 115, 115, 115, 115), - ('scaron', None, None, 154, 157), - ('section', 167, 164, 167, 167), - ('semicolon', 59, 59, 59, 59), - ('seven', 55, 55, 55, 55), - ('six', 54, 54, 54, 54), - ('slash', 47, 47, 47, 47), - ('space', 32, 32, 32, 32), - ('space', None, 202, 160, None), - ('space', None, 202, 173, None), - ('sterling', 163, 163, 163, 163), - ('t', 116, 116, 116, 116), - ('thorn', None, None, 254, 254), - ('three', 51, 51, 51, 51), - ('threequarters', None, None, 190, 190), - ('threesuperior', None, None, 179, 179), - ('tilde', 196, 247, 152, 31), - ('trademark', None, 170, 153, 146), - ('two', 50, 50, 50, 50), - ('twosuperior', None, None, 178, 178), - ('u', 117, 117, 117, 117), - ('uacute', None, 156, 250, 250), - ('ucircumflex', None, 158, 251, 251), - ('udieresis', None, 159, 252, 252), - ('ugrave', None, 157, 249, 249), - ('underscore', 95, 95, 95, 95), - ('v', 118, 118, 118, 118), - ('w', 119, 119, 119, 119), - ('x', 120, 120, 120, 120), - ('y', 121, 121, 121, 121), - ('yacute', None, None, 253, 253), - ('ydieresis', None, 216, 255, 255), - ('yen', 165, 180, 165, 165), - ('z', 122, 122, 122, 122), - ('zcaron', None, None, 158, 158), - ('zero', 48, 48, 48, 48), + # (name, std, mac, win, pdf) + ("A", 65, 65, 65, 65), + ("AE", 225, 174, 198, 198), + ("Aacute", None, 231, 193, 193), + ("Acircumflex", None, 229, 194, 194), + ("Adieresis", None, 128, 196, 196), + ("Agrave", None, 203, 192, 192), + ("Aring", None, 129, 197, 197), + ("Atilde", None, 204, 195, 195), + ("B", 66, 66, 66, 66), + ("C", 67, 67, 67, 67), + ("Ccedilla", None, 130, 199, 199), + ("D", 68, 68, 68, 68), + ("E", 69, 69, 69, 69), + ("Eacute", None, 131, 201, 201), + ("Ecircumflex", None, 230, 202, 202), + ("Edieresis", None, 232, 203, 203), + ("Egrave", None, 233, 200, 200), + ("Eth", None, None, 208, 208), + ("Euro", None, None, 128, 160), + ("F", 70, 70, 70, 70), + ("G", 71, 71, 71, 71), + ("H", 72, 72, 72, 72), + ("I", 73, 73, 73, 73), + ("Iacute", None, 234, 205, 205), + ("Icircumflex", None, 235, 206, 206), + ("Idieresis", None, 236, 207, 207), + ("Igrave", None, 237, 204, 204), + ("J", 74, 74, 74, 74), + ("K", 75, 75, 75, 75), + ("L", 76, 76, 76, 76), + ("Lslash", 232, None, None, 149), + ("M", 77, 77, 77, 77), + ("N", 78, 78, 78, 78), + ("Ntilde", None, 132, 209, 209), + ("O", 79, 79, 79, 79), + ("OE", 234, 206, 140, 150), + ("Oacute", None, 238, 211, 211), + ("Ocircumflex", None, 239, 212, 212), + ("Odieresis", None, 133, 214, 214), + ("Ograve", None, 241, 210, 210), + ("Oslash", 233, 175, 216, 216), + ("Otilde", None, 205, 213, 213), + ("P", 80, 80, 80, 80), + ("Q", 81, 81, 81, 81), + ("R", 82, 82, 82, 82), + ("S", 83, 83, 83, 83), + ("Scaron", None, None, 138, 151), + ("T", 84, 84, 84, 84), + ("Thorn", None, None, 222, 222), + ("U", 85, 85, 85, 85), + ("Uacute", None, 242, 218, 218), + ("Ucircumflex", None, 243, 219, 219), + ("Udieresis", None, 134, 220, 220), + ("Ugrave", None, 244, 217, 217), + ("V", 86, 86, 86, 86), + ("W", 87, 87, 87, 87), + ("X", 88, 88, 88, 88), + ("Y", 89, 89, 89, 89), + ("Yacute", None, None, 221, 221), + ("Ydieresis", None, 217, 159, 152), + ("Z", 90, 90, 90, 90), + ("Zcaron", None, None, 142, 153), + ("a", 97, 97, 97, 97), + ("aacute", None, 135, 225, 225), + ("acircumflex", None, 137, 226, 226), + ("acute", 194, 171, 180, 180), + ("adieresis", None, 138, 228, 228), + ("ae", 241, 190, 230, 230), + ("agrave", None, 136, 224, 224), + ("ampersand", 38, 38, 38, 38), + ("aring", None, 140, 229, 229), + ("asciicircum", 94, 94, 94, 94), + ("asciitilde", 126, 126, 126, 126), + ("asterisk", 42, 42, 42, 42), + ("at", 64, 64, 64, 64), + ("atilde", None, 139, 227, 227), + ("b", 98, 98, 98, 98), + ("backslash", 92, 92, 92, 92), + ("bar", 124, 124, 124, 124), + ("braceleft", 123, 123, 123, 123), + ("braceright", 125, 125, 125, 125), + ("bracketleft", 91, 91, 91, 91), + ("bracketright", 93, 93, 93, 93), + ("breve", 198, 249, None, 24), + ("brokenbar", None, None, 166, 166), + ("bullet", 183, 165, 149, 128), + ("c", 99, 99, 99, 99), + ("caron", 207, 255, None, 25), + ("ccedilla", None, 141, 231, 231), + ("cedilla", 203, 252, 184, 184), + ("cent", 162, 162, 162, 162), + ("circumflex", 195, 246, 136, 26), + ("colon", 58, 58, 58, 58), + ("comma", 44, 44, 44, 44), + ("copyright", None, 169, 169, 169), + ("currency", 168, 219, 164, 164), + ("d", 100, 100, 100, 100), + ("dagger", 178, 160, 134, 129), + ("daggerdbl", 179, 224, 135, 130), + ("degree", None, 161, 176, 176), + ("dieresis", 200, 172, 168, 168), + ("divide", None, 214, 247, 247), + ("dollar", 36, 36, 36, 36), + ("dotaccent", 199, 250, None, 27), + ("dotlessi", 245, 245, None, 154), + ("e", 101, 101, 101, 101), + ("eacute", None, 142, 233, 233), + ("ecircumflex", None, 144, 234, 234), + ("edieresis", None, 145, 235, 235), + ("egrave", None, 143, 232, 232), + ("eight", 56, 56, 56, 56), + ("ellipsis", 188, 201, 133, 131), + ("emdash", 208, 209, 151, 132), + ("endash", 177, 208, 150, 133), + ("equal", 61, 61, 61, 61), + ("eth", None, None, 240, 240), + ("exclam", 33, 33, 33, 33), + ("exclamdown", 161, 193, 161, 161), + ("f", 102, 102, 102, 102), + ("fi", 174, 222, None, 147), + ("five", 53, 53, 53, 53), + ("fl", 175, 223, None, 148), + ("florin", 166, 196, 131, 134), + ("four", 52, 52, 52, 52), + ("fraction", 164, 218, None, 135), + ("g", 103, 103, 103, 103), + ("germandbls", 251, 167, 223, 223), + ("grave", 193, 96, 96, 96), + ("greater", 62, 62, 62, 62), + ("guillemotleft", 171, 199, 171, 171), + ("guillemotright", 187, 200, 187, 187), + ("guilsinglleft", 172, 220, 139, 136), + ("guilsinglright", 173, 221, 155, 137), + ("h", 104, 104, 104, 104), + ("hungarumlaut", 205, 253, None, 28), + ("hyphen", 45, 45, 45, 45), + ("i", 105, 105, 105, 105), + ("iacute", None, 146, 237, 237), + ("icircumflex", None, 148, 238, 238), + ("idieresis", None, 149, 239, 239), + ("igrave", None, 147, 236, 236), + ("j", 106, 106, 106, 106), + ("k", 107, 107, 107, 107), + ("l", 108, 108, 108, 108), + ("less", 60, 60, 60, 60), + ("logicalnot", None, 194, 172, 172), + ("lslash", 248, None, None, 155), + ("m", 109, 109, 109, 109), + ("macron", 197, 248, 175, 175), + ("minus", None, None, None, 138), + ("mu", None, 181, 181, 181), + ("multiply", None, None, 215, 215), + ("n", 110, 110, 110, 110), + ("nbspace", None, 202, 160, None), + ("nine", 57, 57, 57, 57), + ("ntilde", None, 150, 241, 241), + ("numbersign", 35, 35, 35, 35), + ("o", 111, 111, 111, 111), + ("oacute", None, 151, 243, 243), + ("ocircumflex", None, 153, 244, 244), + ("odieresis", None, 154, 246, 246), + ("oe", 250, 207, 156, 156), + ("ogonek", 206, 254, None, 29), + ("ograve", None, 152, 242, 242), + ("one", 49, 49, 49, 49), + ("onehalf", None, None, 189, 189), + ("onequarter", None, None, 188, 188), + ("onesuperior", None, None, 185, 185), + ("ordfeminine", 227, 187, 170, 170), + ("ordmasculine", 235, 188, 186, 186), + ("oslash", 249, 191, 248, 248), + ("otilde", None, 155, 245, 245), + ("p", 112, 112, 112, 112), + ("paragraph", 182, 166, 182, 182), + ("parenleft", 40, 40, 40, 40), + ("parenright", 41, 41, 41, 41), + ("percent", 37, 37, 37, 37), + ("period", 46, 46, 46, 46), + ("periodcentered", 180, 225, 183, 183), + ("perthousand", 189, 228, 137, 139), + ("plus", 43, 43, 43, 43), + ("plusminus", None, 177, 177, 177), + ("q", 113, 113, 113, 113), + ("question", 63, 63, 63, 63), + ("questiondown", 191, 192, 191, 191), + ("quotedbl", 34, 34, 34, 34), + ("quotedblbase", 185, 227, 132, 140), + ("quotedblleft", 170, 210, 147, 141), + ("quotedblright", 186, 211, 148, 142), + ("quoteleft", 96, 212, 145, 143), + ("quoteright", 39, 213, 146, 144), + ("quotesinglbase", 184, 226, 130, 145), + ("quotesingle", 169, 39, 39, 39), + ("r", 114, 114, 114, 114), + ("registered", None, 168, 174, 174), + ("ring", 202, 251, None, 30), + ("s", 115, 115, 115, 115), + ("scaron", None, None, 154, 157), + ("section", 167, 164, 167, 167), + ("semicolon", 59, 59, 59, 59), + ("seven", 55, 55, 55, 55), + ("six", 54, 54, 54, 54), + ("slash", 47, 47, 47, 47), + ("space", 32, 32, 32, 32), + ("space", None, 202, 160, None), + ("space", None, 202, 173, None), + ("sterling", 163, 163, 163, 163), + ("t", 116, 116, 116, 116), + ("thorn", None, None, 254, 254), + ("three", 51, 51, 51, 51), + ("threequarters", None, None, 190, 190), + ("threesuperior", None, None, 179, 179), + ("tilde", 196, 247, 152, 31), + ("trademark", None, 170, 153, 146), + ("two", 50, 50, 50, 50), + ("twosuperior", None, None, 178, 178), + ("u", 117, 117, 117, 117), + ("uacute", None, 156, 250, 250), + ("ucircumflex", None, 158, 251, 251), + ("udieresis", None, 159, 252, 252), + ("ugrave", None, 157, 249, 249), + ("underscore", 95, 95, 95, 95), + ("v", 118, 118, 118, 118), + ("w", 119, 119, 119, 119), + ("x", 120, 120, 120, 120), + ("y", 121, 121, 121, 121), + ("yacute", None, None, 253, 253), + ("ydieresis", None, 216, 255, 255), + ("yen", 165, 180, 165, 165), + ("z", 122, 122, 122, 122), + ("zcaron", None, None, 158, 158), + ("zero", 48, 48, 48, 48), ] diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 778d324..3b84ce6 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -1,7 +1,19 @@ import heapq import logging -from typing import (Dict, Generic, Iterable, Iterator, List, Optional, - Sequence, Set, Tuple, TypeVar, Union, cast) +from typing import ( + Dict, + Generic, + Iterable, + Iterator, + List, + Optional, + Sequence, + Set, + Tuple, + TypeVar, + Union, + cast, +) from .pdfcolor import PDFColorSpace from .pdffont import PDFFont @@ -25,7 +37,6 @@ logger = logging.getLogger(__name__) class IndexAssigner: - def __init__(self, index: int = 0) -> None: self.index = index @@ -74,7 +85,7 @@ class LAParams: word_margin: float = 0.1, boxes_flow: Optional[float] = 0.5, detect_vertical: bool = False, - all_texts: bool = False + all_texts: bool = False, ) -> None: self.line_overlap = line_overlap self.char_margin = char_margin @@ -88,19 +99,22 @@ class LAParams: def _validate(self) -> None: if self.boxes_flow is not None: - boxes_flow_err_msg = ("LAParam boxes_flow should be None, or a " - "number between -1 and +1") - if not (isinstance(self.boxes_flow, int) or - isinstance(self.boxes_flow, float)): + boxes_flow_err_msg = ( + "LAParam boxes_flow should be None, or a " "number between -1 and +1" + ) + if not ( + isinstance(self.boxes_flow, int) or isinstance(self.boxes_flow, float) + ): raise TypeError(boxes_flow_err_msg) if not -1 <= self.boxes_flow <= 1: raise ValueError(boxes_flow_err_msg) def __repr__(self) -> str: - return '' % \ - (self.char_margin, self.line_margin, self.word_margin, - self.all_texts) + return ( + "" + % (self.char_margin, self.line_margin, self.word_margin, self.all_texts) + ) class LTItem: @@ -115,8 +129,7 @@ class LTText: """Interface for things that have text""" def __repr__(self) -> str: - return ('<%s %r>' % - (self.__class__.__name__, self.get_text())) + return "<%s %r>" % (self.__class__.__name__, self.get_text()) def get_text(self) -> str: """Text contained in this object""" @@ -131,8 +144,7 @@ class LTComponent(LTItem): self.set_bbox(bbox) def __repr__(self) -> str: - return ('<%s %s>' % - (self.__class__.__name__, bbox2str(self.bbox))) + return "<%s %s>" % (self.__class__.__name__, bbox2str(self.bbox)) # Disable comparison. def __lt__(self, _: object) -> bool: @@ -153,8 +165,8 @@ class LTComponent(LTItem): self.y0 = y0 self.x1 = x1 self.y1 = y1 - self.width = x1-x0 - self.height = y1-y0 + self.width = x1 - x0 + self.height = y1 - y0 self.bbox = bbox def is_empty(self) -> bool: @@ -169,12 +181,12 @@ class LTComponent(LTItem): if self.is_hoverlap(obj): return 0 else: - return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0)) + return min(abs(self.x0 - obj.x1), abs(self.x1 - obj.x0)) def hoverlap(self, obj: "LTComponent") -> float: assert isinstance(obj, LTComponent), str(type(obj)) if self.is_hoverlap(obj): - return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0)) + return min(abs(self.x0 - obj.x1), abs(self.x1 - obj.x0)) else: return 0 @@ -187,12 +199,12 @@ class LTComponent(LTItem): if self.is_voverlap(obj): return 0 else: - return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0)) + return min(abs(self.y0 - obj.y1), abs(self.y1 - obj.y0)) def voverlap(self, obj: "LTComponent") -> float: assert isinstance(obj, LTComponent), str(type(obj)) if self.is_voverlap(obj): - return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0)) + return min(abs(self.y0 - obj.y1), abs(self.y1 - obj.y0)) else: return 0 @@ -208,7 +220,7 @@ class LTCurve(LTComponent): fill: bool = False, evenodd: bool = False, stroking_color: Optional[Color] = None, - non_stroking_color: Optional[Color] = None + non_stroking_color: Optional[Color] = None, ) -> None: LTComponent.__init__(self, get_bound(pts)) self.pts = pts @@ -220,7 +232,7 @@ class LTCurve(LTComponent): self.non_stroking_color = non_stroking_color def get_pts(self) -> str: - return ','.join('%.3f,%.3f' % p for p in self.pts) + return ",".join("%.3f,%.3f" % p for p in self.pts) class LTLine(LTCurve): @@ -238,10 +250,18 @@ class LTLine(LTCurve): fill: bool = False, evenodd: bool = False, stroking_color: Optional[Color] = None, - non_stroking_color: Optional[Color] = None + non_stroking_color: Optional[Color] = None, ) -> None: - LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, - stroking_color, non_stroking_color) + LTCurve.__init__( + self, + linewidth, + [p0, p1], + stroke, + fill, + evenodd, + stroking_color, + non_stroking_color, + ) class LTRect(LTCurve): @@ -258,12 +278,19 @@ class LTRect(LTCurve): fill: bool = False, evenodd: bool = False, stroking_color: Optional[Color] = None, - non_stroking_color: Optional[Color] = None + non_stroking_color: Optional[Color] = None, ) -> None: (x0, y0, x1, y1) = bbox - LTCurve.__init__(self, linewidth, - [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, - fill, evenodd, stroking_color, non_stroking_color) + LTCurve.__init__( + self, + linewidth, + [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], + stroke, + fill, + evenodd, + stroking_color, + non_stroking_color, + ) class LTImage(LTComponent): @@ -276,18 +303,20 @@ class LTImage(LTComponent): LTComponent.__init__(self, bbox) self.name = name self.stream = stream - self.srcsize = (stream.get_any(('W', 'Width')), - stream.get_any(('H', 'Height'))) - self.imagemask = stream.get_any(('IM', 'ImageMask')) - self.bits = stream.get_any(('BPC', 'BitsPerComponent'), 1) - self.colorspace = stream.get_any(('CS', 'ColorSpace')) + self.srcsize = (stream.get_any(("W", "Width")), stream.get_any(("H", "Height"))) + self.imagemask = stream.get_any(("IM", "ImageMask")) + self.bits = stream.get_any(("BPC", "BitsPerComponent"), 1) + self.colorspace = stream.get_any(("CS", "ColorSpace")) if not isinstance(self.colorspace, list): self.colorspace = [self.colorspace] def __repr__(self) -> str: - return ('<%s(%s) %s %r>' % - (self.__class__.__name__, self.name, - bbox2str(self.bbox), self.srcsize)) + return "<%s(%s) %s %r>" % ( + self.__class__.__name__, + self.name, + bbox2str(self.bbox), + self.srcsize, + ) class LTAnno(LTItem, LTText): @@ -320,7 +349,7 @@ class LTChar(LTComponent, LTText): textwidth: float, textdisp: Union[float, Tuple[Optional[float], float]], ncs: PDFColorSpace, - graphicstate: PDFGraphicState + graphicstate: PDFGraphicState, ) -> None: LTText.__init__(self) self._text = text @@ -337,8 +366,8 @@ class LTChar(LTComponent, LTText): if vx is None: vx = fontsize * 0.5 else: - vx = vx * fontsize * .001 - vy = (1000 - vy) * fontsize * .001 + vx = vx * fontsize * 0.001 + vy = (1000 - vy) * fontsize * 0.001 bbox_lower_left = (-vx, vy + rise + self.adv) bbox_upper_right = (-vx + fontsize, vy + rise) else: @@ -347,7 +376,7 @@ class LTChar(LTComponent, LTText): bbox_lower_left = (0, descent + rise) bbox_upper_right = (self.adv, descent + rise + fontsize) (a, b, c, d, e, f) = self.matrix - self.upright = (0 < a*d*scaling and b*c <= 0) + self.upright = 0 < a * d * scaling and b * c <= 0 (x0, y0) = apply_matrix_pt(self.matrix, bbox_lower_left) (x1, y1) = apply_matrix_pt(self.matrix, bbox_upper_right) if x1 < x0: @@ -362,10 +391,14 @@ class LTChar(LTComponent, LTText): return def __repr__(self) -> str: - return ('<%s %s matrix=%s font=%r adv=%s text=%r>' % - (self.__class__.__name__, bbox2str(self.bbox), - matrix2str(self.matrix), self.fontname, self.adv, - self.get_text())) + return "<%s %s matrix=%s font=%r adv=%s text=%r>" % ( + self.__class__.__name__, + bbox2str(self.bbox), + matrix2str(self.matrix), + self.fontname, + self.adv, + self.get_text(), + ) def get_text(self) -> str: return self._text @@ -375,7 +408,7 @@ class LTChar(LTComponent, LTText): return True -LTItemT = TypeVar('LTItemT', bound=LTItem) +LTItemT = TypeVar("LTItemT", bound=LTItem) class LTContainer(LTComponent, Generic[LTItemT]): @@ -416,8 +449,14 @@ class LTExpandableContainer(LTContainer[LTItemT]): # super() LTContainer only considers LTItem (no bounding box). def add(self, obj: LTComponent) -> None: # type: ignore[override] LTContainer.add(self, cast(LTItemT, obj)) - self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0), - max(self.x1, obj.x1), max(self.y1, obj.y1))) + self.set_bbox( + ( + min(self.x0, obj.x0), + min(self.y0, obj.y0), + max(self.x1, obj.x1), + max(self.y1, obj.y1), + ) + ) return @@ -428,8 +467,9 @@ class LTTextContainer(LTExpandableContainer[LTItemT], LTText): return def get_text(self) -> str: - return ''.join(cast(LTText, obj).get_text() for obj in self - if isinstance(obj, LTText)) + return "".join( + cast(LTText, obj).get_text() for obj in self if isinstance(obj, LTText) + ) TextLineElement = Union[LTChar, LTAnno] @@ -448,17 +488,20 @@ class LTTextLine(LTTextContainer[TextLineElement]): return def __repr__(self) -> str: - return ('<%s %s %r>' % - (self.__class__.__name__, bbox2str(self.bbox), - self.get_text())) + return "<%s %s %r>" % ( + self.__class__.__name__, + bbox2str(self.bbox), + self.get_text(), + ) def analyze(self, laparams: LAParams) -> None: LTTextContainer.analyze(self, laparams) - LTContainer.add(self, LTAnno('\n')) + LTContainer.add(self, LTAnno("\n")) return - def find_neighbors(self, plane: Plane[LTComponentT], ratio: float - ) -> List["LTTextLine"]: + def find_neighbors( + self, plane: Plane[LTComponentT], ratio: float + ) -> List["LTTextLine"]: raise NotImplementedError @@ -474,15 +517,13 @@ class LTTextLineHorizontal(LTTextLine): if isinstance(obj, LTChar) and self.word_margin: margin = self.word_margin * max(obj.width, obj.height) if self._x1 < obj.x0 - margin: - LTContainer.add(self, LTAnno(' ')) + LTContainer.add(self, LTAnno(" ")) self._x1 = obj.x1 super().add(obj) return def find_neighbors( - self, - plane: Plane[LTComponentT], - ratio: float + self, plane: Plane[LTComponentT], ratio: float ) -> List[LTTextLine]: """ Finds neighboring LTTextLineHorizontals in the plane. @@ -494,49 +535,41 @@ class LTTextLineHorizontal(LTTextLine): """ d = ratio * self.height objs = plane.find((self.x0, self.y0 - d, self.x1, self.y1 + d)) - return [obj for obj in objs - if (isinstance(obj, LTTextLineHorizontal) and - self._is_same_height_as(obj, tolerance=d) and - (self._is_left_aligned_with(obj, tolerance=d) or - self._is_right_aligned_with(obj, tolerance=d) or - self._is_centrally_aligned_with(obj, tolerance=d)))] + return [ + obj + for obj in objs + if ( + isinstance(obj, LTTextLineHorizontal) + and self._is_same_height_as(obj, tolerance=d) + and ( + self._is_left_aligned_with(obj, tolerance=d) + or self._is_right_aligned_with(obj, tolerance=d) + or self._is_centrally_aligned_with(obj, tolerance=d) + ) + ) + ] - def _is_left_aligned_with( - self, - other: LTComponent, - tolerance: float = 0 - ) -> bool: + def _is_left_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool: """ Whether the left-hand edge of `other` is within `tolerance`. """ return abs(other.x0 - self.x0) <= tolerance - def _is_right_aligned_with( - self, - other: LTComponent, - tolerance: float = 0 - ) -> bool: + def _is_right_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool: """ Whether the right-hand edge of `other` is within `tolerance`. """ return abs(other.x1 - self.x1) <= tolerance def _is_centrally_aligned_with( - self, - other: LTComponent, - tolerance: float = 0 + self, other: LTComponent, tolerance: float = 0 ) -> bool: """ Whether the horizontal center of `other` is within `tolerance`. """ - return abs( - (other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance + return abs((other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance - def _is_same_height_as( - self, - other: LTComponent, - tolerance: float = 0 - ) -> bool: + def _is_same_height_as(self, other: LTComponent, tolerance: float = 0) -> bool: return abs(other.height - self.height) <= tolerance @@ -552,15 +585,13 @@ class LTTextLineVertical(LTTextLine): if isinstance(obj, LTChar) and self.word_margin: margin = self.word_margin * max(obj.width, obj.height) if obj.y1 + margin < self._y0: - LTContainer.add(self, LTAnno(' ')) + LTContainer.add(self, LTAnno(" ")) self._y0 = obj.y0 super().add(obj) return def find_neighbors( - self, - plane: Plane[LTComponentT], - ratio: float + self, plane: Plane[LTComponentT], ratio: float ) -> List[LTTextLine]: """ Finds neighboring LTTextLineVerticals in the plane. @@ -572,43 +603,39 @@ class LTTextLineVertical(LTTextLine): """ d = ratio * self.width objs = plane.find((self.x0 - d, self.y0, self.x1 + d, self.y1)) - return [obj for obj in objs - if (isinstance(obj, LTTextLineVertical) and - self._is_same_width_as(obj, tolerance=d) and - (self._is_lower_aligned_with(obj, tolerance=d) or - self._is_upper_aligned_with(obj, tolerance=d) or - self._is_centrally_aligned_with(obj, tolerance=d)))] + return [ + obj + for obj in objs + if ( + isinstance(obj, LTTextLineVertical) + and self._is_same_width_as(obj, tolerance=d) + and ( + self._is_lower_aligned_with(obj, tolerance=d) + or self._is_upper_aligned_with(obj, tolerance=d) + or self._is_centrally_aligned_with(obj, tolerance=d) + ) + ) + ] - def _is_lower_aligned_with( - self, - other: LTComponent, - tolerance: float = 0 - ) -> bool: + def _is_lower_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool: """ Whether the lower edge of `other` is within `tolerance`. """ return abs(other.y0 - self.y0) <= tolerance - def _is_upper_aligned_with( - self, - other: LTComponent, - tolerance: float = 0 - ) -> bool: + def _is_upper_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool: """ Whether the upper edge of `other` is within `tolerance`. """ return abs(other.y1 - self.y1) <= tolerance def _is_centrally_aligned_with( - self, - other: LTComponent, - tolerance: float = 0 + self, other: LTComponent, tolerance: float = 0 ) -> bool: """ Whether the vertical center of `other` is within `tolerance`. """ - return abs( - (other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance + return abs((other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance def _is_same_width_as(self, other: LTComponent, tolerance: float) -> bool: return abs(other.width - self.width) <= tolerance @@ -628,9 +655,12 @@ class LTTextBox(LTTextContainer[LTTextLine]): return def __repr__(self) -> str: - return ('<%s(%s) %s %r>' % - (self.__class__.__name__, - self.index, bbox2str(self.bbox), self.get_text())) + return "<%s(%s) %s %r>" % ( + self.__class__.__name__, + self.index, + bbox2str(self.bbox), + self.get_text(), + ) def get_writing_mode(self) -> str: raise NotImplementedError @@ -643,7 +673,7 @@ class LTTextBoxHorizontal(LTTextBox): return def get_writing_mode(self) -> str: - return 'lr-tb' + return "lr-tb" class LTTextBoxVertical(LTTextBox): @@ -653,7 +683,7 @@ class LTTextBoxVertical(LTTextBox): return def get_writing_mode(self) -> str: - return 'tb-rl' + return "tb-rl" TextGroupElement = Union[LTTextBox, "LTTextGroup"] @@ -674,7 +704,8 @@ class LTTextGroupLRTB(LTTextGroup): # reorder the objects from top-left to bottom-right. self._objs.sort( key=lambda obj: (1 - boxes_flow) * obj.x0 - - (1 + boxes_flow) * (obj.y0 + obj.y1)) + - (1 + boxes_flow) * (obj.y0 + obj.y1) + ) return @@ -685,8 +716,9 @@ class LTTextGroupTBRL(LTTextGroup): boxes_flow = laparams.boxes_flow # reorder the objects from top-right to bottom-left. self._objs.sort( - key=lambda obj: - (1 + boxes_flow) * (obj.x0 + obj.x1) - - (1 - boxes_flow) * obj.y1) + key=lambda obj: -(1 + boxes_flow) * (obj.x0 + obj.x1) + - (1 - boxes_flow) * obj.y1 + ) return @@ -698,9 +730,7 @@ class LTLayoutContainer(LTContainer[LTComponent]): # group_objects: group text object to textlines. def group_objects( - self, - laparams: LAParams, - objs: Iterable[LTComponent] + self, laparams: LAParams, objs: Iterable[LTComponent] ) -> Iterator[LTTextLine]: obj0 = None line = None @@ -716,13 +746,14 @@ class LTLayoutContainer(LTContainer[LTComponent]): # # |<--->| # (char_margin) - halign = \ - obj0.is_compatible(obj1) \ - and obj0.is_voverlap(obj1) \ - and min(obj0.height, obj1.height) * laparams.line_overlap \ - < obj0.voverlap(obj1) \ - and obj0.hdistance(obj1) \ + halign = ( + obj0.is_compatible(obj1) + and obj0.is_voverlap(obj1) + and min(obj0.height, obj1.height) * laparams.line_overlap + < obj0.voverlap(obj1) + and obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin + ) # valign: obj0 and obj1 is vertically aligned. # @@ -738,17 +769,19 @@ class LTLayoutContainer(LTContainer[LTComponent]): # # |<-->| # (line_overlap) - valign = \ - laparams.detect_vertical \ - and obj0.is_compatible(obj1) \ - and obj0.is_hoverlap(obj1) \ - and min(obj0.width, obj1.width) * laparams.line_overlap \ - < obj0.hoverlap(obj1) \ - and obj0.vdistance(obj1) \ + valign = ( + laparams.detect_vertical + and obj0.is_compatible(obj1) + and obj0.is_hoverlap(obj1) + and min(obj0.width, obj1.width) * laparams.line_overlap + < obj0.hoverlap(obj1) + and obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin + ) - if ((halign and isinstance(line, LTTextLineHorizontal)) or - (valign and isinstance(line, LTTextLineVertical))): + if (halign and isinstance(line, LTTextLineHorizontal)) or ( + valign and isinstance(line, LTTextLineVertical) + ): line.add(obj1) elif line is not None: @@ -777,9 +810,7 @@ class LTLayoutContainer(LTContainer[LTComponent]): return def group_textlines( - self, - laparams: LAParams, - lines: Iterable[LTTextLine] + self, laparams: LAParams, lines: Iterable[LTTextLine] ) -> Iterator[LTTextBox]: """Group neighboring lines to textboxes""" plane: Plane[LTTextLine] = Plane(self.bbox) @@ -812,9 +843,7 @@ class LTLayoutContainer(LTContainer[LTComponent]): return def group_textboxes( - self, - laparams: LAParams, - boxes: Sequence[LTTextBox] + self, laparams: LAParams, boxes: Sequence[LTTextBox] ) -> List[LTTextGroup]: """Group textboxes hierarchically. @@ -853,8 +882,11 @@ class LTLayoutContainer(LTContainer[LTComponent]): y0 = min(obj1.y0, obj2.y0) x1 = max(obj1.x1, obj2.x1) y1 = max(obj1.y1, obj2.y1) - return (x1 - x0) * (y1 - y0) \ - - obj1.width*obj1.height - obj2.width*obj2.height + return ( + (x1 - x0) * (y1 - y0) + - obj1.width * obj1.height + - obj2.width * obj2.height + ) def isany(obj1: ElementT, obj2: ElementT) -> Set[ElementT]: """Check if there's any other object between obj1 and obj2.""" @@ -868,10 +900,9 @@ class LTLayoutContainer(LTContainer[LTComponent]): dists: List[Tuple[bool, float, int, int, ElementT, ElementT]] = [] for i in range(len(boxes)): box1 = boxes[i] - for j in range(i+1, len(boxes)): + for j in range(i + 1, len(boxes)): box2 = boxes[j] - dists.append((False, dist(box1, box2), id(box1), id(box2), - box1, box2)) + dists.append((False, dist(box1, box2), id(box1), id(box2), box1, box2)) heapq.heapify(dists) plane.extend(boxes) @@ -883,8 +914,9 @@ class LTLayoutContainer(LTContainer[LTComponent]): if not skip_isany and isany(obj1, obj2): heapq.heappush(dists, (True, d, id1, id2, obj1, obj2)) continue - if isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or \ - isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL)): + if isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or isinstance( + obj2, (LTTextBoxVertical, LTTextGroupTBRL) + ): group: LTTextGroup = LTTextGroupTBRL([obj1, obj2]) else: group = LTTextGroupLRTB([obj1, obj2]) @@ -893,8 +925,10 @@ class LTLayoutContainer(LTContainer[LTComponent]): done.update([id1, id2]) for other in plane: - heapq.heappush(dists, (False, dist(group, other), - id(group), id(other), group, other)) + heapq.heappush( + dists, + (False, dist(group, other), id(group), id(other), group, other), + ) plane.add(group) # By now only groups are in the plane return list(cast(LTTextGroup, g) for g in plane) @@ -902,8 +936,7 @@ class LTLayoutContainer(LTContainer[LTComponent]): def analyze(self, laparams: LAParams) -> None: # textobjs is a list of LTChar objects, i.e. # it has all the individual characters in the page. - (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar), - self) + (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar), self) for obj in otherobjs: obj.analyze(laparams) if not textobjs: @@ -922,6 +955,7 @@ class LTLayoutContainer(LTContainer[LTComponent]): return (0, -box.x1, -box.y0) else: return (1, -box.y0, box.x0) + textboxes.sort(key=getkey) else: self.groups = self.group_textboxes(laparams, textboxes) @@ -930,8 +964,11 @@ class LTLayoutContainer(LTContainer[LTComponent]): group.analyze(laparams) assigner.run(group) textboxes.sort(key=lambda box: box.index) - self._objs = (cast(List[LTComponent], textboxes) + otherobjs - + cast(List[LTComponent], empties)) + self._objs = ( + cast(List[LTComponent], textboxes) + + otherobjs + + cast(List[LTComponent], empties) + ) return @@ -953,9 +990,12 @@ class LTFigure(LTLayoutContainer): return def __repr__(self) -> str: - return ('<%s(%s) %s matrix=%s>' % - (self.__class__.__name__, self.name, - bbox2str(self.bbox), matrix2str(self.matrix))) + return "<%s(%s) %s matrix=%s>" % ( + self.__class__.__name__, + self.name, + bbox2str(self.bbox), + matrix2str(self.matrix), + ) def analyze(self, laparams: LAParams) -> None: if not laparams.all_texts: @@ -978,6 +1018,9 @@ class LTPage(LTLayoutContainer): return def __repr__(self) -> str: - return ('<%s(%r) %s rotate=%r>' % - (self.__class__.__name__, self.pageid, - bbox2str(self.bbox), self.rotate)) + return "<%s(%r) %s rotate=%r>" % ( + self.__class__.__name__, + self.pageid, + bbox2str(self.bbox), + self.rotate, + ) diff --git a/pdfminer/lzw.py b/pdfminer/lzw.py index 3e59f6c..215e9ef 100644 --- a/pdfminer/lzw.py +++ b/pdfminer/lzw.py @@ -10,7 +10,6 @@ class CorruptDataError(Exception): class LZWDecoder: - def __init__(self, fp: BinaryIO) -> None: self.fp = fp self.buff = 0 @@ -24,19 +23,19 @@ class LZWDecoder: v = 0 while 1: # the number of remaining bits we can get from the current buffer. - r = 8-self.bpos + r = 8 - self.bpos if bits <= r: # |-----8-bits-----| # |-bpos-|-bits-| | # | |----r----| - v = (v << bits) | ((self.buff >> (r-bits)) & ((1 << bits)-1)) + v = (v << bits) | ((self.buff >> (r - bits)) & ((1 << bits) - 1)) self.bpos += bits break else: # |-----8-bits-----| # |-bpos-|---bits----... # | |----r----| - v = (v << r) | (self.buff & ((1 << r)-1)) + v = (v << r) | (self.buff & ((1 << r) - 1)) bits -= r x = self.fp.read(1) if not x: @@ -46,12 +45,12 @@ class LZWDecoder: return v def feed(self, code: int) -> bytes: - x = b'' + x = b"" if code == 256: self.table = [bytes((c,)) for c in range(256)] # 0-255 self.table.append(None) # 256 self.table.append(None) # 257 - self.prevbuf = b'' + self.prevbuf = b"" self.nbits = 9 elif code == 257: pass @@ -62,9 +61,9 @@ class LZWDecoder: assert self.table is not None if code < len(self.table): x = cast(bytes, self.table[code]) # assume not None - self.table.append(self.prevbuf+x[:1]) + self.table.append(self.prevbuf + x[:1]) elif code == len(self.table): - self.table.append(self.prevbuf+self.prevbuf[:1]) + self.table.append(self.prevbuf + self.prevbuf[:1]) x = cast(bytes, self.table[code]) else: raise CorruptDataError @@ -91,11 +90,13 @@ class LZWDecoder: break yield x assert self.table is not None - logger.debug('nbits=%d, code=%d, output=%r, table=%r' - % (self.nbits, code, x, self.table[258:])) + logger.debug( + "nbits=%d, code=%d, output=%r, table=%r" + % (self.nbits, code, x, self.table[258:]) + ) def lzwdecode(data: bytes) -> bytes: fp = BytesIO(data) s = LZWDecoder(fp).run() - return b''.join(s) + return b"".join(s) diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py index 6059056..81319e3 100644 --- a/pdfminer/pdfcolor.py +++ b/pdfminer/pdfcolor.py @@ -3,33 +3,31 @@ from typing import Dict from .psparser import LIT -LITERAL_DEVICE_GRAY = LIT('DeviceGray') -LITERAL_DEVICE_RGB = LIT('DeviceRGB') -LITERAL_DEVICE_CMYK = LIT('DeviceCMYK') +LITERAL_DEVICE_GRAY = LIT("DeviceGray") +LITERAL_DEVICE_RGB = LIT("DeviceRGB") +LITERAL_DEVICE_CMYK = LIT("DeviceCMYK") class PDFColorSpace: - def __init__(self, name: str, ncomponents: int) -> None: self.name = name self.ncomponents = ncomponents def __repr__(self) -> str: - return '' % \ - (self.name, self.ncomponents) + return "" % (self.name, self.ncomponents) PREDEFINED_COLORSPACE: Dict[str, PDFColorSpace] = collections.OrderedDict() for (name, n) in [ - ('DeviceGray', 1), # default value first - ('CalRGB', 3), - ('CalGray', 1), - ('Lab', 3), - ('DeviceRGB', 3), - ('DeviceCMYK', 4), - ('Separation', 1), - ('Indexed', 1), - ('Pattern', 1), + ("DeviceGray", 1), # default value first + ("CalRGB", 3), + ("CalGray", 1), + ("Lab", 3), + ("DeviceRGB", 3), + ("DeviceCMYK", 4), + ("Separation", 1), + ("Indexed", 1), + ("Pattern", 1), ]: PREDEFINED_COLORSPACE[name] = PDFColorSpace(name, n) diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 2053ccc..075585f 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -1,5 +1,13 @@ -from typing import (BinaryIO, Iterable, List, Optional, Sequence, - TYPE_CHECKING, Union, cast) +from typing import ( + BinaryIO, + Iterable, + List, + Optional, + Sequence, + TYPE_CHECKING, + Union, + cast, +) from pdfminer.psparser import PSLiteral from . import utils @@ -21,25 +29,19 @@ PDFTextSeq = Iterable[Union[int, float, bytes]] class PDFDevice: - """Translate the output of PDFPageInterpreter to the output that is needed - """ + """Translate the output of PDFPageInterpreter to the output that is needed""" def __init__(self, rsrcmgr: "PDFResourceManager") -> None: self.rsrcmgr = rsrcmgr self.ctm: Optional[Matrix] = None def __repr__(self) -> str: - return '' + return "" def __enter__(self) -> "PDFDevice": return self - def __exit__( - self, - exc_type: object, - exc_val: object, - exc_tb: object - ) -> None: + def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: self.close() def close(self) -> None: @@ -48,21 +50,13 @@ class PDFDevice: def set_ctm(self, ctm: Matrix) -> None: self.ctm = ctm - def begin_tag( - self, - tag: PSLiteral, - props: Optional["PDFStackT"] = None - ) -> None: + def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None: pass def end_tag(self) -> None: pass - def do_tag( - self, - tag: PSLiteral, - props: Optional["PDFStackT"] = None - ) -> None: + def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None: pass def begin_page(self, page: PDFPage, ctm: Matrix) -> None: @@ -83,7 +77,7 @@ class PDFDevice: stroke: bool, fill: bool, evenodd: bool, - path: Sequence[PathSegment] + path: Sequence[PathSegment], ) -> None: pass @@ -95,42 +89,61 @@ class PDFDevice: textstate: "PDFTextState", seq: PDFTextSeq, ncs: PDFColorSpace, - graphicstate: "PDFGraphicState" + graphicstate: "PDFGraphicState", ) -> None: pass class PDFTextDevice(PDFDevice): - def render_string( self, textstate: "PDFTextState", seq: PDFTextSeq, ncs: PDFColorSpace, - graphicstate: "PDFGraphicState" + graphicstate: "PDFGraphicState", ) -> None: assert self.ctm is not None matrix = utils.mult_matrix(textstate.matrix, self.ctm) font = textstate.font fontsize = textstate.fontsize - scaling = textstate.scaling * .01 + scaling = textstate.scaling * 0.01 charspace = textstate.charspace * scaling wordspace = textstate.wordspace * scaling rise = textstate.rise assert font is not None if font.is_multibyte(): wordspace = 0 - dxscale = .001 * fontsize * scaling + dxscale = 0.001 * fontsize * scaling if font.is_vertical(): textstate.linematrix = self.render_string_vertical( - seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale, ncs, - graphicstate) + seq, + matrix, + textstate.linematrix, + font, + fontsize, + scaling, + charspace, + wordspace, + rise, + dxscale, + ncs, + graphicstate, + ) else: textstate.linematrix = self.render_string_horizontal( - seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale, ncs, - graphicstate) + seq, + matrix, + textstate.linematrix, + font, + fontsize, + scaling, + charspace, + wordspace, + rise, + dxscale, + ncs, + graphicstate, + ) def render_string_horizontal( self, @@ -145,21 +158,28 @@ class PDFTextDevice(PDFDevice): rise: float, dxscale: float, ncs: PDFColorSpace, - graphicstate: "PDFGraphicState" + graphicstate: "PDFGraphicState", ) -> Point: (x, y) = pos needcharspace = False for obj in seq: if isinstance(obj, (int, float)): - x -= obj*dxscale + x -= obj * dxscale needcharspace = True else: for cid in font.decode(obj): if needcharspace: x += charspace x += self.render_char( - utils.translate_matrix(matrix, (x, y)), font, - fontsize, scaling, rise, cid, ncs, graphicstate) + utils.translate_matrix(matrix, (x, y)), + font, + fontsize, + scaling, + rise, + cid, + ncs, + graphicstate, + ) if cid == 32 and wordspace: x += wordspace needcharspace = True @@ -178,21 +198,28 @@ class PDFTextDevice(PDFDevice): rise: float, dxscale: float, ncs: PDFColorSpace, - graphicstate: "PDFGraphicState" + graphicstate: "PDFGraphicState", ) -> Point: (x, y) = pos needcharspace = False for obj in seq: if isinstance(obj, (int, float)): - y -= obj*dxscale + y -= obj * dxscale needcharspace = True else: for cid in font.decode(obj): if needcharspace: y += charspace y += self.render_char( - utils.translate_matrix(matrix, (x, y)), font, fontsize, - scaling, rise, cid, ncs, graphicstate) + utils.translate_matrix(matrix, (x, y)), + font, + fontsize, + scaling, + rise, + cid, + ncs, + graphicstate, + ) if cid == 32 and wordspace: y += wordspace needcharspace = True @@ -207,18 +234,14 @@ class PDFTextDevice(PDFDevice): rise: float, cid: int, ncs: PDFColorSpace, - graphicstate: "PDFGraphicState" + graphicstate: "PDFGraphicState", ) -> float: return 0 class TagExtractor(PDFDevice): - def __init__( - self, - rsrcmgr: "PDFResourceManager", - outfp: BinaryIO, - codec: str = 'utf-8' + self, rsrcmgr: "PDFResourceManager", outfp: BinaryIO, codec: str = "utf-8" ) -> None: PDFDevice.__init__(self, rsrcmgr) self.outfp = outfp @@ -231,11 +254,11 @@ class TagExtractor(PDFDevice): textstate: "PDFTextState", seq: PDFTextSeq, ncs: PDFColorSpace, - graphicstate: "PDFGraphicState" + graphicstate: "PDFGraphicState", ) -> None: font = textstate.font assert font is not None - text = '' + text = "" for obj in seq: if isinstance(obj, str): obj = utils.make_compat_bytes(obj) @@ -251,25 +274,29 @@ class TagExtractor(PDFDevice): self._write(utils.enc(text)) def begin_page(self, page: PDFPage, ctm: Matrix) -> None: - output = '' %\ - (self.pageno, utils.bbox2str(page.mediabox), page.rotate) + output = '' % ( + self.pageno, + utils.bbox2str(page.mediabox), + page.rotate, + ) self._write(output) return def end_page(self, page: PDFPage) -> None: - self._write('\n') + self._write("\n") self.pageno += 1 return - def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None - ) -> None: - s = '' + def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None: + s = "" if isinstance(props, dict): - s = ''.join([ - ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v)) - for (k, v) in sorted(props.items()) - ]) - out_s = '<{}{}>'.format(utils.enc(cast(str, tag.name)), s) + s = "".join( + [ + ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v)) + for (k, v) in sorted(props.items()) + ] + ) + out_s = "<{}{}>".format(utils.enc(cast(str, tag.name)), s) self._write(out_s) self._stack.append(tag) return @@ -277,12 +304,11 @@ class TagExtractor(PDFDevice): def end_tag(self) -> None: assert self._stack, str(self.pageno) tag = self._stack.pop(-1) - out_s = '' % utils.enc(cast(str, tag.name)) + out_s = "" % utils.enc(cast(str, tag.name)) self._write(out_s) return - def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None - ) -> None: + def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None: self.begin_tag(tag, props) self._stack.pop(-1) return diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 6be37f4..258e947 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -3,8 +3,21 @@ import logging import re import struct from hashlib import sha256, md5, sha384, sha512 -from typing import (Any, Callable, Dict, Iterable, Iterator, KeysView, List, - Optional, Sequence, Tuple, Type, Union, cast) +from typing import ( + Any, + Callable, + Dict, + Iterable, + Iterator, + KeysView, + List, + Optional, + Sequence, + Tuple, + Type, + Union, + cast, +) from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes @@ -13,12 +26,22 @@ from . import settings from .arcfour import Arcfour from .data_structures import NumberTree from .pdfparser import PDFSyntaxError, PDFParser, PDFStreamParser -from .pdftypes import DecipherCallable, PDFException, PDFTypeError, \ - PDFStream, PDFObjectNotFound, decipher_all, int_value, str_value, \ - list_value, uint_value, dict_value, stream_value +from .pdftypes import ( + DecipherCallable, + PDFException, + PDFTypeError, + PDFStream, + PDFObjectNotFound, + decipher_all, + int_value, + str_value, + list_value, + uint_value, + dict_value, + stream_value, +) from .psparser import PSEOF, literal_name, LIT, KWD -from .utils import choplist, decode_text, nunpack, format_int_roman, \ - format_int_alpha +from .utils import choplist, decode_text, nunpack, format_int_roman, format_int_alpha log = logging.getLogger(__name__) @@ -32,6 +55,7 @@ class PDFNoValidXRefWarning(SyntaxWarning): Not used anymore because warnings.warn is replaced by logger.Logger.warn. """ + pass @@ -60,6 +84,7 @@ class PDFEncryptionWarning(UserWarning): Not used anymore because warnings.warn is replaced by logger.Logger.warn. """ + pass @@ -68,6 +93,7 @@ class PDFTextExtractionNotAllowedWarning(UserWarning): Not used anymore because warnings.warn is replaced by logger.Logger.warn. """ + pass @@ -78,15 +104,19 @@ class PDFTextExtractionNotAllowed(PDFEncryptionError): class PDFTextExtractionNotAllowedError(PDFTextExtractionNotAllowed): def __init__(self, *args: object) -> None: from warnings import warn - warn('PDFTextExtractionNotAllowedError will be removed in the future. ' - 'Use PDFTextExtractionNotAllowed instead.', DeprecationWarning) + + warn( + "PDFTextExtractionNotAllowedError will be removed in the future. " + "Use PDFTextExtractionNotAllowed instead.", + DeprecationWarning, + ) super().__init__(*args) # some predefined literals and keywords. -LITERAL_OBJSTM = LIT('ObjStm') -LITERAL_XREF = LIT('XRef') -LITERAL_CATALOG = LIT('Catalog') +LITERAL_OBJSTM = LIT("ObjStm") +LITERAL_XREF = LIT("XRef") +LITERAL_CATALOG = LIT("Catalog") class PDFBaseXRef: @@ -107,13 +137,12 @@ class PDFBaseXRef: class PDFXRef(PDFBaseXRef): - def __init__(self) -> None: self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {} self.trailer: Dict[str, Any] = {} def __repr__(self) -> str: - return '' % (self.offsets.keys()) + return "" % (self.offsets.keys()) def load(self, parser: PDFParser) -> None: while True: @@ -123,51 +152,50 @@ class PDFXRef(PDFBaseXRef): if not line: continue except PSEOF: - raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - if line.startswith(b'trailer'): + raise PDFNoValidXRef("Unexpected EOF - file corrupted?") + if line.startswith(b"trailer"): parser.seek(pos) break - f = line.split(b' ') + f = line.split(b" ") if len(f) != 2: - error_msg = 'Trailer not found: {!r}: line={!r}'\ - .format(parser, line) + error_msg = "Trailer not found: {!r}: line={!r}".format(parser, line) raise PDFNoValidXRef(error_msg) try: (start, nobjs) = map(int, f) except ValueError: - error_msg = 'Invalid line: {!r}: line={!r}'\ - .format(parser, line) + error_msg = "Invalid line: {!r}: line={!r}".format(parser, line) raise PDFNoValidXRef(error_msg) - for objid in range(start, start+nobjs): + for objid in range(start, start + nobjs): try: (_, line) = parser.nextline() line = line.strip() except PSEOF: - raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.split(b' ') + raise PDFNoValidXRef("Unexpected EOF - file corrupted?") + f = line.split(b" ") if len(f) != 3: - error_msg = 'Invalid XRef format: {!r}, line={!r}'\ - .format(parser, line) + error_msg = "Invalid XRef format: {!r}, line={!r}".format( + parser, line + ) raise PDFNoValidXRef(error_msg) (pos_b, genno_b, use_b) = f - if use_b != b'n': + if use_b != b"n": continue self.offsets[objid] = (None, int(pos_b), int(genno_b)) - log.debug('xref objects: %r', self.offsets) + log.debug("xref objects: %r", self.offsets) self.load_trailer(parser) def load_trailer(self, parser: PDFParser) -> None: try: (_, kwd) = parser.nexttoken() - assert kwd is KWD(b'trailer'), str(kwd) + assert kwd is KWD(b"trailer"), str(kwd) (_, dic) = parser.nextobject() except PSEOF: x = parser.pop(1) if not x: - raise PDFNoValidXRef('Unexpected EOF - file corrupted') + raise PDFNoValidXRef("Unexpected EOF - file corrupted") (_, dic) = x[0] self.trailer.update(dict_value(dic)) - log.debug('trailer=%r', self.trailer) + log.debug("trailer=%r", self.trailer) def get_trailer(self) -> Dict[str, Any]: return self.trailer @@ -183,11 +211,10 @@ class PDFXRef(PDFBaseXRef): class PDFXRefFallback(PDFXRef): - def __repr__(self) -> str: - return '' % (self.offsets.keys()) + return "" % (self.offsets.keys()) - PDFOBJ_CUE = re.compile(r'^(\d+)\s+(\d+)\s+obj\b') + PDFOBJ_CUE = re.compile(r"^(\d+)\s+(\d+)\s+obj\b") def load(self, parser: PDFParser) -> None: parser.seek(0) @@ -196,12 +223,12 @@ class PDFXRefFallback(PDFXRef): (pos, line_bytes) = parser.nextline() except PSEOF: break - if line_bytes.startswith(b'trailer'): + if line_bytes.startswith(b"trailer"): parser.seek(pos) self.load_trailer(parser) - log.debug('trailer: %r', self.trailer) + log.debug("trailer: %r", self.trailer) break - line = line_bytes.decode('latin-1') # default pdf encoding + line = line_bytes.decode("latin-1") # default pdf encoding m = self.PDFOBJ_CUE.match(line) if not m: continue @@ -212,14 +239,13 @@ class PDFXRefFallback(PDFXRef): # expand ObjStm. parser.seek(pos) (_, obj) = parser.nextobject() - if isinstance(obj, PDFStream) \ - and obj.get('Type') is LITERAL_OBJSTM: + if isinstance(obj, PDFStream) and obj.get("Type") is LITERAL_OBJSTM: stream = stream_value(obj) try: - n = stream['N'] + n = stream["N"] except KeyError: if settings.STRICT: - raise PDFSyntaxError('N is not defined: %r' % stream) + raise PDFSyntaxError("N is not defined: %r" % stream) n = 0 parser1 = PDFStreamParser(stream.get_data()) objs: List[int] = [] @@ -229,14 +255,13 @@ class PDFXRefFallback(PDFXRef): objs.append(cast(int, obj)) except PSEOF: pass - n = min(n, len(objs)//2) + n = min(n, len(objs) // 2) for index in range(n): - objid1 = objs[index*2] + objid1 = objs[index * 2] self.offsets[objid1] = (objid, index, 0) class PDFXRefStream(PDFBaseXRef): - def __init__(self) -> None: self.data: Optional[bytes] = None self.entlen: Optional[int] = None @@ -246,31 +271,32 @@ class PDFXRefStream(PDFBaseXRef): self.ranges: List[Tuple[int, int]] = [] def __repr__(self) -> str: - return '' % (self.ranges) + return "" % (self.ranges) def load(self, parser: PDFParser) -> None: (_, objid) = parser.nexttoken() # ignored (_, genno) = parser.nexttoken() # ignored (_, kwd) = parser.nexttoken() (_, stream) = parser.nextobject() - if not isinstance(stream, PDFStream) \ - or stream.get('Type') is not LITERAL_XREF: - raise PDFNoValidXRef('Invalid PDF stream spec.') - size = stream['Size'] - index_array = stream.get('Index', (0, size)) + if not isinstance(stream, PDFStream) or stream.get("Type") is not LITERAL_XREF: + raise PDFNoValidXRef("Invalid PDF stream spec.") + size = stream["Size"] + index_array = stream.get("Index", (0, size)) if len(index_array) % 2 != 0: - raise PDFSyntaxError('Invalid index number') - self.ranges.extend(cast(Iterator[Tuple[int, int]], - choplist(2, index_array))) - (self.fl1, self.fl2, self.fl3) = stream['W'] - assert (self.fl1 is not None and self.fl2 is not None - and self.fl3 is not None) + raise PDFSyntaxError("Invalid index number") + self.ranges.extend(cast(Iterator[Tuple[int, int]], choplist(2, index_array))) + (self.fl1, self.fl2, self.fl3) = stream["W"] + assert self.fl1 is not None and self.fl2 is not None and self.fl3 is not None self.data = stream.get_data() - self.entlen = self.fl1+self.fl2+self.fl3 + self.entlen = self.fl1 + self.fl2 + self.fl3 self.trailer = stream.attrs - log.debug('xref stream: objid=%s, fields=%d,%d,%d', - ', '.join(map(repr, self.ranges)), - self.fl1, self.fl2, self.fl3) + log.debug( + "xref stream: objid=%s, fields=%d,%d,%d", + ", ".join(map(repr, self.ranges)), + self.fl1, + self.fl2, + self.fl3, + ) return def get_trailer(self) -> Dict[str, Any]: @@ -282,16 +308,16 @@ class PDFXRefStream(PDFBaseXRef): assert self.entlen is not None assert self.data is not None offset = self.entlen * i - ent = self.data[offset:offset+self.entlen] - f1 = nunpack(ent[:self.fl1], 1) + ent = self.data[offset : offset + self.entlen] + f1 = nunpack(ent[: self.fl1], 1) if f1 == 1 or f1 == 2: - yield start+i + yield start + i return def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]: index = 0 for (start, nobjs) in self.ranges: - if start <= objid and objid < start+nobjs: + if start <= objid and objid < start + nobjs: index += objid - start break else: @@ -300,13 +326,12 @@ class PDFXRefStream(PDFBaseXRef): raise KeyError(objid) assert self.entlen is not None assert self.data is not None - assert (self.fl1 is not None and self.fl2 is not None - and self.fl3 is not None) + assert self.fl1 is not None and self.fl2 is not None and self.fl3 is not None offset = self.entlen * index - ent = self.data[offset:offset+self.entlen] - f1 = nunpack(ent[:self.fl1], 1) - f2 = nunpack(ent[self.fl1:self.fl1+self.fl2]) - f3 = nunpack(ent[self.fl1+self.fl2:]) + ent = self.data[offset : offset + self.entlen] + f1 = nunpack(ent[: self.fl1], 1) + f2 = nunpack(ent[self.fl1 : self.fl1 + self.fl2]) + f3 = nunpack(ent[self.fl1 + self.fl2 :]) if f1 == 1: return (None, f2, f3) elif f1 == 2: @@ -318,15 +343,14 @@ class PDFXRefStream(PDFBaseXRef): class PDFStandardSecurityHandler: - PASSWORD_PADDING = (b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08' - b'..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz') + PASSWORD_PADDING = ( + b"(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08" + b"..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz" + ) supported_revisions: Tuple[int, ...] = (2, 3) def __init__( - self, - docid: Sequence[bytes], - param: Dict[str, Any], - password: str = '' + self, docid: Sequence[bytes], param: Dict[str, Any], password: str = "" ) -> None: self.docid = docid self.param = param @@ -337,18 +361,18 @@ class PDFStandardSecurityHandler: def init(self) -> None: self.init_params() if self.r not in self.supported_revisions: - error_msg = 'Unsupported revision: param=%r' % self.param + error_msg = "Unsupported revision: param=%r" % self.param raise PDFEncryptionError(error_msg) self.init_key() return def init_params(self) -> None: - self.v = int_value(self.param.get('V', 0)) - self.r = int_value(self.param['R']) - self.p = uint_value(self.param['P'], 32) - self.o = str_value(self.param['O']) - self.u = str_value(self.param['U']) - self.length = int_value(self.param.get('Length', 40)) + self.v = int_value(self.param.get("V", 0)) + self.r = int_value(self.param["R"]) + self.p = uint_value(self.param["P"], 32) + self.o = str_value(self.param["O"]) + self.u = str_value(self.param["U"]) + self.length = int_value(self.param.get("Length", 40)) return def init_key(self) -> None: @@ -376,7 +400,7 @@ class PDFStandardSecurityHandler: hash.update(self.docid[0]) # 3 result = Arcfour(key).encrypt(hash.digest()) # 4 for i in range(1, 20): # 5 - k = b''.join(bytes((c ^ i,)) for c in iter(key)) + k = b"".join(bytes((c ^ i,)) for c in iter(key)) result = Arcfour(k).encrypt(result) result += result # 6 return result @@ -387,11 +411,11 @@ class PDFStandardSecurityHandler: hash = md5(password) # 2 hash.update(self.o) # 3 # See https://github.com/pdfminer/pdfminer.six/issues/186 - hash.update(struct.pack('= 4: if not cast(PDFStandardSecurityHandlerV4, self).encrypt_metadata: - hash.update(b'\xff\xff\xff\xff') + hash.update(b"\xff\xff\xff\xff") result = hash.digest() n = 5 if self.r >= 3: @@ -437,7 +461,7 @@ class PDFStandardSecurityHandler: else: user_password = self.o for i in range(19, -1, -1): - k = b''.join(bytes((c ^ i,)) for c in iter(key)) + k = b"".join(bytes((c ^ i,)) for c in iter(key)) user_password = Arcfour(k).decrypt(user_password) return self.authenticate_user_password(user_password) @@ -446,16 +470,15 @@ class PDFStandardSecurityHandler: objid: int, genno: int, data: bytes, - attrs: Optional[Dict[str, Any]] = None + attrs: Optional[Dict[str, Any]] = None, ) -> bytes: return self.decrypt_rc4(objid, genno, data) def decrypt_rc4(self, objid: int, genno: int, data: bytes) -> bytes: assert self.key is not None - key = self.key + struct.pack(' None: super().init_params() self.length = 128 - self.cf = dict_value(self.param.get('CF')) - self.stmf = literal_name(self.param['StmF']) - self.strf = literal_name(self.param['StrF']) - self.encrypt_metadata = bool(self.param.get('EncryptMetadata', True)) + self.cf = dict_value(self.param.get("CF")) + self.stmf = literal_name(self.param["StmF"]) + self.strf = literal_name(self.param["StrF"]) + self.encrypt_metadata = bool(self.param.get("EncryptMetadata", True)) if self.stmf != self.strf: - error_msg = 'Unsupported crypt filter: param=%r' % self.param + error_msg = "Unsupported crypt filter: param=%r" % self.param raise PDFEncryptionError(error_msg) self.cfm = {} for k, v in self.cf.items(): - f = self.get_cfm(literal_name(v['CFM'])) + f = self.get_cfm(literal_name(v["CFM"])) if f is None: - error_msg = 'Unknown crypt filter method: param=%r' \ - % self.param + error_msg = "Unknown crypt filter method: param=%r" % self.param raise PDFEncryptionError(error_msg) self.cfm[k] = f - self.cfm['Identity'] = self.decrypt_identity + self.cfm["Identity"] = self.decrypt_identity if self.strf not in self.cfm: - error_msg = 'Undefined crypt filter: param=%r' % self.param + error_msg = "Undefined crypt filter: param=%r" % self.param raise PDFEncryptionError(error_msg) return - def get_cfm( - self, - name: str - ) -> Optional[Callable[[int, int, bytes], bytes]]: - if name == 'V2': + def get_cfm(self, name: str) -> Optional[Callable[[int, int, bytes], bytes]]: + if name == "V2": return self.decrypt_rc4 - elif name == 'AESV2': + elif name == "AESV2": return self.decrypt_aes128 else: return None @@ -504,11 +523,11 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler): genno: int, data: bytes, attrs: Optional[Dict[str, Any]] = None, - name: Optional[str] = None + name: Optional[str] = None, ) -> bytes: if not self.encrypt_metadata and attrs is not None: - t = attrs.get('Type') - if t is not None and literal_name(t) == 'Metadata': + t = attrs.get("Type") + if t is not None and literal_name(t) == "Metadata": return data if name is None: name = self.strf @@ -519,15 +538,21 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler): def decrypt_aes128(self, objid: int, genno: int, data: bytes) -> bytes: assert self.key is not None - key = self.key + struct.pack(' None: super().init_params() self.length = 256 - self.oe = str_value(self.param['OE']) - self.ue = str_value(self.param['UE']) + self.oe = str_value(self.param["OE"]) + self.ue = str_value(self.param["UE"]) self.o_hash = self.o[:32] self.o_validation_salt = self.o[32:40] self.o_key_salt = self.o[40:] @@ -548,11 +573,8 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): self.u_key_salt = self.u[40:] return - def get_cfm( - self, - name: str - ) -> Optional[Callable[[int, int, bytes], bytes]]: - if name == 'AESV3': + def get_cfm(self, name: str) -> Optional[Callable[[int, int, bytes], bytes]]: + if name == "AESV3": return self.decrypt_aes256 else: return None @@ -562,16 +584,16 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): hash = self._password_hash(password_b, self.o_validation_salt, self.u) if hash == self.o_hash: hash = self._password_hash(password_b, self.o_key_salt, self.u) - cipher = Cipher(algorithms.AES(hash), - modes.CBC(b'\0' * 16), - backend=default_backend()) # type: ignore + cipher = Cipher( + algorithms.AES(hash), modes.CBC(b"\0" * 16), backend=default_backend() + ) # type: ignore return cipher.decryptor().update(self.oe) # type: ignore hash = self._password_hash(password_b, self.u_validation_salt) if hash == self.u_hash: hash = self._password_hash(password_b, self.u_key_salt) - cipher = Cipher(algorithms.AES(hash), - modes.CBC(b'\0' * 16), - backend=default_backend()) # type: ignore + cipher = Cipher( + algorithms.AES(hash), modes.CBC(b"\0" * 16), backend=default_backend() + ) # type: ignore return cipher.decryptor().update(self.ue) # type: ignore return None @@ -579,16 +601,14 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): if self.r == 6: # saslprep expects non-empty strings, apparently if not password: - return b'' + return b"" from ._saslprep import saslprep + password = saslprep(password) - return password.encode('utf-8')[:127] + return password.encode("utf-8")[:127] def _password_hash( - self, - password: bytes, - salt: bytes, - vector: Optional[bytes] = None + self, password: bytes, salt: bytes, vector: Optional[bytes] = None ) -> bytes: """ Compute password hash depending on revision number @@ -598,10 +618,7 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): return self._r6_password(password, salt[0:8], vector) def _r5_password( - self, - password: bytes, - salt: bytes, - vector: Optional[bytes] = None + self, password: bytes, salt: bytes, vector: Optional[bytes] = None ) -> bytes: """ Compute the password for revision 5 @@ -613,10 +630,7 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): return hash.digest() def _r6_password( - self, - password: bytes, - salt: bytes, - vector: Optional[bytes] = None + self, password: bytes, salt: bytes, vector: Optional[bytes] = None ) -> bytes: """ Compute the password for revision 6 @@ -629,10 +643,8 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): hashes = (sha256, sha384, sha512) round_no = last_byte_val = 0 while round_no < 64 or last_byte_val > round_no - 32: - k1 = (password + k + (vector or b'')) * 64 - e = self._aes_cbc_encrypt( - key=k[:16], iv=k[16:32], data=k1 - ) + k1 = (password + k + (vector or b"")) * 64 + e = self._aes_cbc_encrypt(key=k[:16], iv=k[16:32], data=k1) # compute the first 16 bytes of e, # interpreted as an unsigned integer mod 3 next_hash = hashes[self._bytes_mod_3(e[:16])] @@ -646,12 +658,7 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): # 256 is 1 mod 3, so we can just sum 'em return sum(b % 3 for b in input_bytes) % 3 - def _aes_cbc_encrypt( - self, - key: bytes, - iv: bytes, - data: bytes - ) -> bytes: + def _aes_cbc_encrypt(self, key: bytes, iv: bytes, data: bytes) -> bytes: cipher = Cipher(algorithms.AES(key), modes.CBC(iv)) encryptor = cipher.encryptor() # type: ignore return encryptor.update(data) + encryptor.finalize() # type: ignore @@ -660,9 +667,11 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4): initialization_vector = data[:16] ciphertext = data[16:] assert self.key is not None - cipher = Cipher(algorithms.AES(self.key), - modes.CBC(initialization_vector), - backend=default_backend()) # type: ignore + cipher = Cipher( + algorithms.AES(self.key), + modes.CBC(initialization_vector), + backend=default_backend(), + ) # type: ignore return cipher.decryptor().update(ciphertext) # type: ignore @@ -689,9 +698,9 @@ class PDFDocument: def __init__( self, parser: PDFParser, - password: str = '', + password: str = "", caching: bool = True, - fallback: bool = True + fallback: bool = True, ) -> None: "Set the document to use a given PDFParser object." self.caching = caching @@ -723,43 +732,42 @@ class PDFDocument: if not trailer: continue # If there's an encryption info, remember it. - if 'Encrypt' in trailer: - if 'ID' in trailer: - id_value = list_value(trailer['ID']) + if "Encrypt" in trailer: + if "ID" in trailer: + id_value = list_value(trailer["ID"]) else: # Some documents may not have a /ID, use two empty # byte strings instead. Solves # https://github.com/pdfminer/pdfminer.six/issues/594 - id_value = (b'', b'') - self.encryption = (id_value, - dict_value(trailer['Encrypt'])) + id_value = (b"", b"") + self.encryption = (id_value, dict_value(trailer["Encrypt"])) self._initialize_password(password) - if 'Info' in trailer: - self.info.append(dict_value(trailer['Info'])) - if 'Root' in trailer: + if "Info" in trailer: + self.info.append(dict_value(trailer["Info"])) + if "Root" in trailer: # Every PDF file must have exactly one /Root dictionary. - self.catalog = dict_value(trailer['Root']) + self.catalog = dict_value(trailer["Root"]) break else: - raise PDFSyntaxError('No /Root object! - Is this really a PDF?') - if self.catalog.get('Type') is not LITERAL_CATALOG: + raise PDFSyntaxError("No /Root object! - Is this really a PDF?") + if self.catalog.get("Type") is not LITERAL_CATALOG: if settings.STRICT: - raise PDFSyntaxError('Catalog not found!') + raise PDFSyntaxError("Catalog not found!") return - KEYWORD_OBJ = KWD(b'obj') + KEYWORD_OBJ = KWD(b"obj") # _initialize_password(password=b'') # Perform the initialization with a given password. - def _initialize_password(self, password: str = '') -> None: + def _initialize_password(self, password: str = "") -> None: assert self.encryption is not None (docid, param) = self.encryption - if literal_name(param.get('Filter')) != 'Standard': - raise PDFEncryptionError('Unknown filter: param=%r' % param) - v = int_value(param.get('V', 0)) + if literal_name(param.get("Filter")) != "Standard": + raise PDFEncryptionError("Unknown filter: param=%r" % param) + v = int_value(param.get("V", 0)) factory = self.security_handler_registry.get(v) if factory is None: - raise PDFEncryptionError('Unknown algorithm: param=%r' % param) + raise PDFEncryptionError("Unknown algorithm: param=%r" % param) handler = factory(docid, param, password) self.decipher = handler.decrypt self.is_printable = handler.is_printable() @@ -769,12 +777,7 @@ class PDFDocument: self._parser.fallback = False # need to read streams with exact length return - def _getobj_objstm( - self, - stream: PDFStream, - index: int, - objid: int - ) -> object: + def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> object: if stream.objid in self._parsed_objs: (objs, n) = self._parsed_objs[stream.objid] else: @@ -782,22 +785,22 @@ class PDFDocument: if self.caching: assert stream.objid is not None self._parsed_objs[stream.objid] = (objs, n) - i = n*2+index + i = n * 2 + index try: obj = objs[i] except IndexError: - raise PDFSyntaxError('index too big: %r' % index) + raise PDFSyntaxError("index too big: %r" % index) return obj def _get_objects(self, stream: PDFStream) -> Tuple[List[object], int]: - if stream.get('Type') is not LITERAL_OBJSTM: + if stream.get("Type") is not LITERAL_OBJSTM: if settings.STRICT: - raise PDFSyntaxError('Not a stream object: %r' % stream) + raise PDFSyntaxError("Not a stream object: %r" % stream) try: - n = cast(int, stream['N']) + n = cast(int, stream["N"]) except KeyError: if settings.STRICT: - raise PDFSyntaxError('N is not defined: %r' % stream) + raise PDFSyntaxError("N is not defined: %r" % stream) n = 0 parser = PDFStreamParser(stream.get_data()) parser.set_document(self) @@ -830,11 +833,10 @@ class PDFDocument: objid1 = x[-2] # #### end hack around malformed pdf files if objid1 != objid: - raise PDFSyntaxError('objid mismatch: {!r}={!r}' - .format(objid1, objid)) + raise PDFSyntaxError("objid mismatch: {!r}={!r}".format(objid1, objid)) - if kwd != KWD(b'obj'): - raise PDFSyntaxError('Invalid object spec: offset=%r' % pos) + if kwd != KWD(b"obj"): + raise PDFSyntaxError("Invalid object spec: offset=%r" % pos) (_, obj) = self._parser.nextobject() return obj @@ -846,8 +848,8 @@ class PDFDocument: :raises PDFObjectNotFound if objid does not exist in PDF """ if not self.xrefs: - raise PDFException('PDFDocument is not initialized') - log.debug('getobj: objid=%r', objid) + raise PDFException("PDFDocument is not initialized") + log.debug("getobj: objid=%r", objid) if objid in self._cached_objs: (obj, genno) = self._cached_objs[objid] else: @@ -863,8 +865,7 @@ class PDFDocument: else: obj = self._getobj_parse(index, objid) if self.decipher: - obj = decipher_all(self.decipher, objid, genno, - obj) + obj = decipher_all(self.decipher, objid, genno, obj) if isinstance(obj, PDFStream): obj.set_objid(objid, genno) @@ -873,7 +874,7 @@ class PDFDocument: continue else: raise PDFObjectNotFound(objid) - log.debug('register: objid=%r: %r', objid, obj) + log.debug("register: objid=%r: %r", objid, obj) if self.caching: self._cached_objs[objid] = (obj, genno) return obj @@ -881,25 +882,25 @@ class PDFDocument: OutlineType = Tuple[Any, Any, Any, Any, Any] def get_outlines(self) -> Iterator[OutlineType]: - if 'Outlines' not in self.catalog: + if "Outlines" not in self.catalog: raise PDFNoOutlines - def search(entry: object, level: int - ) -> Iterator[PDFDocument.OutlineType]: + def search(entry: object, level: int) -> Iterator[PDFDocument.OutlineType]: entry = dict_value(entry) - if 'Title' in entry: - if 'A' in entry or 'Dest' in entry: - title = decode_text(str_value(entry['Title'])) - dest = entry.get('Dest') - action = entry.get('A') - se = entry.get('SE') + if "Title" in entry: + if "A" in entry or "Dest" in entry: + title = decode_text(str_value(entry["Title"])) + dest = entry.get("Dest") + action = entry.get("A") + se = entry.get("SE") yield (level, title, dest, action, se) - if 'First' in entry and 'Last' in entry: - yield from search(entry['First'], level+1) - if 'Next' in entry: - yield from search(entry['Next'], level) + if "First" in entry and "Last" in entry: + yield from search(entry["First"], level + 1) + if "Next" in entry: + yield from search(entry["Next"], level) return - return search(self.catalog['Outlines'], 0) + + return search(self.catalog["Outlines"], 0) def get_page_labels(self) -> Iterator[str]: """ @@ -913,51 +914,49 @@ class PDFDocument: assert self.catalog is not None try: - page_labels = PageLabels(self.catalog['PageLabels']) + page_labels = PageLabels(self.catalog["PageLabels"]) except (PDFTypeError, KeyError): raise PDFNoPageLabels return page_labels.labels - def lookup_name( - self, - cat: str, - key: Union[str, bytes] - ) -> Any: + def lookup_name(self, cat: str, key: Union[str, bytes]) -> Any: try: - names = dict_value(self.catalog['Names']) + names = dict_value(self.catalog["Names"]) except (PDFTypeError, KeyError): raise KeyError((cat, key)) # may raise KeyError d0 = dict_value(names[cat]) def lookup(d: Dict[str, Any]) -> Any: - if 'Limits' in d: - (k1, k2) = list_value(d['Limits']) + if "Limits" in d: + (k1, k2) = list_value(d["Limits"]) if key < k1 or k2 < key: return None - if 'Names' in d: - objs = list_value(d['Names']) - names = dict(cast(Iterator[Tuple[Union[str, bytes], Any]], - choplist(2, objs))) + if "Names" in d: + objs = list_value(d["Names"]) + names = dict( + cast(Iterator[Tuple[Union[str, bytes], Any]], choplist(2, objs)) + ) return names[key] - if 'Kids' in d: - for c in list_value(d['Kids']): + if "Kids" in d: + for c in list_value(d["Kids"]): v = lookup(dict_value(c)) if v: return v raise KeyError((cat, key)) + return lookup(d0) def get_dest(self, name: Union[str, bytes]) -> Any: try: # PDF-1.2 or later - obj = self.lookup_name('Dests', name) + obj = self.lookup_name("Dests", name) except KeyError: # PDF-1.1 or prior - if 'Dests' not in self.catalog: + if "Dests" not in self.catalog: raise PDFDestinationNotFound(name) - d0 = dict_value(self.catalog['Dests']) + d0 = dict_value(self.catalog["Dests"]) if name not in d0: raise PDFDestinationNotFound(name) obj = d0[name] @@ -970,23 +969,20 @@ class PDFDocument: prev = None for line in parser.revreadlines(): line = line.strip() - log.debug('find_xref: %r', line) - if line == b'startxref': + log.debug("find_xref: %r", line) + if line == b"startxref": break if line: prev = line else: - raise PDFNoValidXRef('Unexpected EOF') - log.debug('xref found: pos=%r', prev) + raise PDFNoValidXRef("Unexpected EOF") + log.debug("xref found: pos=%r", prev) assert prev is not None return int(prev) # read xref table def read_xref_from( - self, - parser: PDFParser, - start: int, - xrefs: List[PDFBaseXRef] + self, parser: PDFParser, start: int, xrefs: List[PDFBaseXRef] ) -> None: """Reads XRefs from the given location.""" parser.seek(start) @@ -994,8 +990,8 @@ class PDFDocument: try: (pos, token) = parser.nexttoken() except PSEOF: - raise PDFNoValidXRef('Unexpected EOF') - log.debug('read_xref_from: start=%d, token=%r', start, token) + raise PDFNoValidXRef("Unexpected EOF") + log.debug("read_xref_from: start=%d, token=%r", start, token) if isinstance(token, int): # XRefStream: PDF-1.5 parser.seek(pos) @@ -1009,13 +1005,13 @@ class PDFDocument: xref.load(parser) xrefs.append(xref) trailer = xref.get_trailer() - log.debug('trailer: %r', trailer) - if 'XRefStm' in trailer: - pos = int_value(trailer['XRefStm']) + log.debug("trailer: %r", trailer) + if "XRefStm" in trailer: + pos = int_value(trailer["XRefStm"]) self.read_xref_from(parser, pos, xrefs) - if 'Prev' in trailer: + if "Prev" in trailer: # find previous xref - pos = int_value(trailer['Prev']) + pos = int_value(trailer["Prev"]) self.read_xref_from(parser, pos, xrefs) return @@ -1033,16 +1029,16 @@ class PageLabels(NumberTree): # The tree must begin with page index 0 if len(ranges) == 0 or ranges[0][0] != 0: if settings.STRICT: - raise PDFSyntaxError('PageLabels is missing page index 0') + raise PDFSyntaxError("PageLabels is missing page index 0") else: # Try to cope, by assuming empty labels for the initial pages ranges.insert(0, (0, {})) for (next, (start, label_dict_unchecked)) in enumerate(ranges, 1): label_dict = dict_value(label_dict_unchecked) - style = label_dict.get('S') - prefix = decode_text(str_value(label_dict.get('P', b''))) - first_value = int_value(label_dict.get('St', 1)) + style = label_dict.get("S") + prefix = decode_text(str_value(label_dict.get("P", b""))) + first_value = int_value(label_dict.get("St", 1)) if next == len(ranges): # This is the last specified range. It continues until the end @@ -1061,18 +1057,18 @@ class PageLabels(NumberTree): def _format_page_label(value: int, style: Any) -> str: """Format page label value in a specific style""" if style is None: - label = '' - elif style is LIT('D'): # Decimal arabic numerals + label = "" + elif style is LIT("D"): # Decimal arabic numerals label = str(value) - elif style is LIT('R'): # Uppercase roman numerals + elif style is LIT("R"): # Uppercase roman numerals label = format_int_roman(value).upper() - elif style is LIT('r'): # Lowercase roman numerals + elif style is LIT("r"): # Lowercase roman numerals label = format_int_roman(value) - elif style is LIT('A'): # Uppercase letters A-Z, AA-ZZ... + elif style is LIT("A"): # Uppercase letters A-Z, AA-ZZ... label = format_int_alpha(value).upper() - elif style is LIT('a'): # Lowercase letters a-z, aa-zz... + elif style is LIT("a"): # Lowercase letters a-z, aa-zz... label = format_int_alpha(value) else: - log.warning('Unknown page label style: %r', style) - label = '' + log.warning("Unknown page label style: %r", style) + label = "" return label diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 00e325e..0b3e00a 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -2,8 +2,20 @@ import logging import struct import sys from io import BytesIO -from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, Mapping, - Optional, Tuple, Union, cast, TYPE_CHECKING) +from typing import ( + Any, + BinaryIO, + Dict, + Iterable, + Iterator, + List, + Mapping, + Optional, + Tuple, + Union, + cast, + TYPE_CHECKING, +) from . import settings from .cmapdb import CMap @@ -86,24 +98,22 @@ def get_widths2(seq: Iterable[object]) -> Dict[int, Tuple[float, Point]]: class FontMetricsDB: - @classmethod - def get_metrics(cls, fontname: str - ) -> Tuple[Dict[str, object], Dict[str, int]]: + def get_metrics(cls, fontname: str) -> Tuple[Dict[str, object], Dict[str, int]]: return FONT_METRICS[fontname] # int here means that we're not extending PSStackParser with additional types. class Type1FontHeaderParser(PSStackParser[int]): - KEYWORD_BEGIN = KWD(b'begin') - KEYWORD_END = KWD(b'end') - KEYWORD_DEF = KWD(b'def') - KEYWORD_PUT = KWD(b'put') - KEYWORD_DICT = KWD(b'dict') - KEYWORD_ARRAY = KWD(b'array') - KEYWORD_READONLY = KWD(b'readonly') - KEYWORD_FOR = KWD(b'for') + KEYWORD_BEGIN = KWD(b"begin") + KEYWORD_END = KWD(b"end") + KEYWORD_DEF = KWD(b"def") + KEYWORD_PUT = KWD(b"put") + KEYWORD_DICT = KWD(b"dict") + KEYWORD_ARRAY = KWD(b"array") + KEYWORD_READONLY = KWD(b"readonly") + KEYWORD_FOR = KWD(b"for") def __init__(self, data: BinaryIO) -> None: PSStackParser.__init__(self, data) @@ -138,19 +148,18 @@ class Type1FontHeaderParser(PSStackParser[int]): def do_keyword(self, pos: int, token: PSKeyword) -> None: if token is self.KEYWORD_PUT: ((_, key), (_, value)) = self.pop(2) - if (isinstance(key, int) and isinstance(value, PSLiteral)): + if isinstance(key, int) and isinstance(value, PSLiteral): self.add_results((key, literal_name(value))) return -NIBBLES = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', 'e', 'e-', - None, '-') +NIBBLES = ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ".", "e", "e-", None, "-") # Mapping of cmap names. Original cmap name is kept if not in the mapping. # (missing reference for why DLIdent is mapped to Identity) IDENTITY_ENCODER = { - 'DLIdent-H': 'Identity-H', - 'DLIdent-V': 'Identity-V', + "DLIdent-H": "Identity-H", + "DLIdent-V": "Identity-V", } @@ -168,7 +177,7 @@ def getdict(data: bytes) -> Dict[int, List[Union[float, int]]]: stack = [] continue if b0 == 30: - s = '' + s = "" loop = True while loop: b = ord(fp.read(1)) @@ -181,13 +190,13 @@ def getdict(data: bytes) -> Dict[int, List[Union[float, int]]]: s += nibble value = float(s) elif 32 <= b0 and b0 <= 246: - value = b0-139 + value = b0 - 139 else: b1 = ord(fp.read(1)) if 247 <= b0 and b0 <= 250: - value = ((b0-247) << 8)+b1+108 + value = ((b0 - 247) << 8) + b1 + 108 elif 251 <= b0 and b0 <= 254: - value = -((b0-251) << 8)-b1-108 + value = -((b0 - 251) << 8) - b1 - 108 else: b2 = ord(fp.read(1)) if 128 <= b1: @@ -195,8 +204,7 @@ def getdict(data: bytes) -> Dict[int, List[Union[float, int]]]: if b0 == 28: value = b1 << 8 | b2 else: - value = b1 << 24 | b2 << 16 | \ - struct.unpack('>H', fp.read(2))[0] + value = b1 << 24 | b2 << 16 | struct.unpack(">H", fp.read(2))[0] stack.append(value) return d @@ -204,107 +212,419 @@ def getdict(data: bytes) -> Dict[int, List[Union[float, int]]]: class CFFFont: STANDARD_STRINGS = ( - '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', - 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', - 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', - 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', - 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', - 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', - 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', - 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', - 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown', - 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', - 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft', - 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash', - 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet', - 'quotesinglbase', 'quotedblbase', 'quotedblright', - 'guillemotright', 'ellipsis', 'perthousand', 'questiondown', - 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', - 'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut', - 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash', - 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', - 'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu', - 'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn', - 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn', - 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', - 'multiply', 'threesuperior', 'copyright', 'Aacute', - 'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde', - 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave', - 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', - 'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', - 'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave', - 'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex', - 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute', - 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex', - 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', - 'odieresis', 'ograve', 'otilde', 'scaron', 'uacute', - 'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis', - 'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle', - 'dollarsuperior', 'ampersandsmall', 'Acutesmall', - 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', - 'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', - 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', - 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', - 'commasuperior', 'threequartersemdash', 'periodsuperior', - 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', - 'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior', - 'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior', - 'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior', - 'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall', - 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', - 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', - 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', - 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', - 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', - 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', - 'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall', - 'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior', - 'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall', - 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', - 'onethird', 'twothirds', 'zerosuperior', 'foursuperior', - 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior', - 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', - 'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior', - 'seveninferior', 'eightinferior', 'nineinferior', - 'centinferior', 'dollarinferior', 'periodinferior', - 'commainferior', 'Agravesmall', 'Aacutesmall', - 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', - 'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', - 'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall', - 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall', - 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', - 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall', - 'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', - 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall', - 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', - '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book', - 'Light', 'Medium', 'Regular', 'Roman', 'Semibold', + ".notdef", + "space", + "exclam", + "quotedbl", + "numbersign", + "dollar", + "percent", + "ampersand", + "quoteright", + "parenleft", + "parenright", + "asterisk", + "plus", + "comma", + "hyphen", + "period", + "slash", + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "colon", + "semicolon", + "less", + "equal", + "greater", + "question", + "at", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "bracketleft", + "backslash", + "bracketright", + "asciicircum", + "underscore", + "quoteleft", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "braceleft", + "bar", + "braceright", + "asciitilde", + "exclamdown", + "cent", + "sterling", + "fraction", + "yen", + "florin", + "section", + "currency", + "quotesingle", + "quotedblleft", + "guillemotleft", + "guilsinglleft", + "guilsinglright", + "fi", + "fl", + "endash", + "dagger", + "daggerdbl", + "periodcentered", + "paragraph", + "bullet", + "quotesinglbase", + "quotedblbase", + "quotedblright", + "guillemotright", + "ellipsis", + "perthousand", + "questiondown", + "grave", + "acute", + "circumflex", + "tilde", + "macron", + "breve", + "dotaccent", + "dieresis", + "ring", + "cedilla", + "hungarumlaut", + "ogonek", + "caron", + "emdash", + "AE", + "ordfeminine", + "Lslash", + "Oslash", + "OE", + "ordmasculine", + "ae", + "dotlessi", + "lslash", + "oslash", + "oe", + "germandbls", + "onesuperior", + "logicalnot", + "mu", + "trademark", + "Eth", + "onehalf", + "plusminus", + "Thorn", + "onequarter", + "divide", + "brokenbar", + "degree", + "thorn", + "threequarters", + "twosuperior", + "registered", + "minus", + "eth", + "multiply", + "threesuperior", + "copyright", + "Aacute", + "Acircumflex", + "Adieresis", + "Agrave", + "Aring", + "Atilde", + "Ccedilla", + "Eacute", + "Ecircumflex", + "Edieresis", + "Egrave", + "Iacute", + "Icircumflex", + "Idieresis", + "Igrave", + "Ntilde", + "Oacute", + "Ocircumflex", + "Odieresis", + "Ograve", + "Otilde", + "Scaron", + "Uacute", + "Ucircumflex", + "Udieresis", + "Ugrave", + "Yacute", + "Ydieresis", + "Zcaron", + "aacute", + "acircumflex", + "adieresis", + "agrave", + "aring", + "atilde", + "ccedilla", + "eacute", + "ecircumflex", + "edieresis", + "egrave", + "iacute", + "icircumflex", + "idieresis", + "igrave", + "ntilde", + "oacute", + "ocircumflex", + "odieresis", + "ograve", + "otilde", + "scaron", + "uacute", + "ucircumflex", + "udieresis", + "ugrave", + "yacute", + "ydieresis", + "zcaron", + "exclamsmall", + "Hungarumlautsmall", + "dollaroldstyle", + "dollarsuperior", + "ampersandsmall", + "Acutesmall", + "parenleftsuperior", + "parenrightsuperior", + "twodotenleader", + "onedotenleader", + "zerooldstyle", + "oneoldstyle", + "twooldstyle", + "threeoldstyle", + "fouroldstyle", + "fiveoldstyle", + "sixoldstyle", + "sevenoldstyle", + "eightoldstyle", + "nineoldstyle", + "commasuperior", + "threequartersemdash", + "periodsuperior", + "questionsmall", + "asuperior", + "bsuperior", + "centsuperior", + "dsuperior", + "esuperior", + "isuperior", + "lsuperior", + "msuperior", + "nsuperior", + "osuperior", + "rsuperior", + "ssuperior", + "tsuperior", + "ff", + "ffi", + "ffl", + "parenleftinferior", + "parenrightinferior", + "Circumflexsmall", + "hyphensuperior", + "Gravesmall", + "Asmall", + "Bsmall", + "Csmall", + "Dsmall", + "Esmall", + "Fsmall", + "Gsmall", + "Hsmall", + "Ismall", + "Jsmall", + "Ksmall", + "Lsmall", + "Msmall", + "Nsmall", + "Osmall", + "Psmall", + "Qsmall", + "Rsmall", + "Ssmall", + "Tsmall", + "Usmall", + "Vsmall", + "Wsmall", + "Xsmall", + "Ysmall", + "Zsmall", + "colonmonetary", + "onefitted", + "rupiah", + "Tildesmall", + "exclamdownsmall", + "centoldstyle", + "Lslashsmall", + "Scaronsmall", + "Zcaronsmall", + "Dieresissmall", + "Brevesmall", + "Caronsmall", + "Dotaccentsmall", + "Macronsmall", + "figuredash", + "hypheninferior", + "Ogoneksmall", + "Ringsmall", + "Cedillasmall", + "questiondownsmall", + "oneeighth", + "threeeighths", + "fiveeighths", + "seveneighths", + "onethird", + "twothirds", + "zerosuperior", + "foursuperior", + "fivesuperior", + "sixsuperior", + "sevensuperior", + "eightsuperior", + "ninesuperior", + "zeroinferior", + "oneinferior", + "twoinferior", + "threeinferior", + "fourinferior", + "fiveinferior", + "sixinferior", + "seveninferior", + "eightinferior", + "nineinferior", + "centinferior", + "dollarinferior", + "periodinferior", + "commainferior", + "Agravesmall", + "Aacutesmall", + "Acircumflexsmall", + "Atildesmall", + "Adieresissmall", + "Aringsmall", + "AEsmall", + "Ccedillasmall", + "Egravesmall", + "Eacutesmall", + "Ecircumflexsmall", + "Edieresissmall", + "Igravesmall", + "Iacutesmall", + "Icircumflexsmall", + "Idieresissmall", + "Ethsmall", + "Ntildesmall", + "Ogravesmall", + "Oacutesmall", + "Ocircumflexsmall", + "Otildesmall", + "Odieresissmall", + "OEsmall", + "Oslashsmall", + "Ugravesmall", + "Uacutesmall", + "Ucircumflexsmall", + "Udieresissmall", + "Yacutesmall", + "Thornsmall", + "Ydieresissmall", + "001.000", + "001.001", + "001.002", + "001.003", + "Black", + "Bold", + "Book", + "Light", + "Medium", + "Regular", + "Roman", + "Semibold", ) class INDEX: - def __init__(self, fp: BinaryIO) -> None: self.fp = fp self.offsets: List[int] = [] - (count, offsize) = struct.unpack('>HB', self.fp.read(3)) - for i in range(count+1): + (count, offsize) = struct.unpack(">HB", self.fp.read(3)) + for i in range(count + 1): self.offsets.append(nunpack(self.fp.read(offsize))) - self.base = self.fp.tell()-1 - self.fp.seek(self.base+self.offsets[-1]) + self.base = self.fp.tell() - 1 + self.fp.seek(self.base + self.offsets[-1]) return def __repr__(self) -> str: - return '' % len(self) + return "" % len(self) def __len__(self) -> int: - return len(self.offsets)-1 + return len(self.offsets) - 1 def __getitem__(self, i: int) -> bytes: - self.fp.seek(self.base+self.offsets[i]) - return self.fp.read(self.offsets[i+1]-self.offsets[i]) + self.fp.seek(self.base + self.offsets[i]) + return self.fp.read(self.offsets[i + 1] - self.offsets[i]) def __iter__(self) -> Iterator[bytes]: return iter(self[i] for i in range(len(self))) @@ -313,9 +633,8 @@ class CFFFont: self.name = name self.fp = fp # Header - (_major, _minor, hdrsize, offsize) = struct.unpack('BBBB', - self.fp.read(4)) - self.fp.read(hdrsize-4) + (_major, _minor, hdrsize, offsize) = struct.unpack("BBBB", self.fp.read(4)) + self.fp.read(hdrsize - 4) # Name INDEX self.name_index = self.INDEX(self.fp) # Top DICT INDEX @@ -338,56 +657,55 @@ class CFFFont: self.gid2code = {} self.fp.seek(cast(int, encoding_pos)) format = self.fp.read(1) - if format == b'\x00': + if format == b"\x00": # Format 0 - (n,) = struct.unpack('B', self.fp.read(1)) - for (code, gid) in enumerate(struct.unpack('B'*n, - self.fp.read(n))): + (n,) = struct.unpack("B", self.fp.read(1)) + for (code, gid) in enumerate(struct.unpack("B" * n, self.fp.read(n))): self.code2gid[code] = gid self.gid2code[gid] = code - elif format == b'\x01': + elif format == b"\x01": # Format 1 - (n,) = struct.unpack('B', self.fp.read(1)) + (n,) = struct.unpack("B", self.fp.read(1)) code = 0 for i in range(n): - (first, nleft) = struct.unpack('BB', self.fp.read(2)) - for gid in range(first, first+nleft+1): + (first, nleft) = struct.unpack("BB", self.fp.read(2)) + for gid in range(first, first + nleft + 1): self.code2gid[code] = gid self.gid2code[gid] = code code += 1 else: - raise ValueError('unsupported encoding format: %r' % format) + raise ValueError("unsupported encoding format: %r" % format) # Charsets self.name2gid = {} self.gid2name = {} self.fp.seek(cast(int, charset_pos)) format = self.fp.read(1) - if format == b'\x00': + if format == b"\x00": # Format 0 - n = self.nglyphs-1 + n = self.nglyphs - 1 for (gid, sid) in enumerate( - cast(Tuple[int, ...], - struct.unpack('>' + 'H' * n, self.fp.read(2 * n)))): + cast(Tuple[int, ...], struct.unpack(">" + "H" * n, self.fp.read(2 * n))) + ): gid += 1 sidname = self.getstr(sid) self.name2gid[sidname] = gid self.gid2name[gid] = sidname - elif format == b'\x01': + elif format == b"\x01": # Format 1 - (n,) = struct.unpack('B', self.fp.read(1)) + (n,) = struct.unpack("B", self.fp.read(1)) sid = 0 for i in range(n): - (first, nleft) = struct.unpack('BB', self.fp.read(2)) - for gid in range(first, first+nleft+1): + (first, nleft) = struct.unpack("BB", self.fp.read(2)) + for gid in range(first, first + nleft + 1): sidname = self.getstr(sid) self.name2gid[sidname] = gid self.gid2name[gid] = sidname sid += 1 - elif format == b'\x02': + elif format == b"\x02": # Format 2 - assert False, str(('Unhandled', format)) + assert False, str(("Unhandled", format)) else: - raise ValueError('unsupported charset format: %r' % format) + raise ValueError("unsupported charset format: %r" % format) return def getstr(self, sid: int) -> Union[str, bytes]: @@ -395,11 +713,10 @@ class CFFFont: # and appears to be a needless source of type complexity. if sid < len(self.STANDARD_STRINGS): return self.STANDARD_STRINGS[sid] - return self.string_index[sid-len(self.STANDARD_STRINGS)] + return self.string_index[sid - len(self.STANDARD_STRINGS)] class TrueTypeFont: - class CMapNotFound(Exception): pass @@ -409,12 +726,13 @@ class TrueTypeFont: self.tables: Dict[bytes, Tuple[int, int]] = {} self.fonttype = fp.read(4) try: - (ntables, _1, _2, _3) = cast(Tuple[int, int, int, int], - struct.unpack('>HHHH', fp.read(8))) + (ntables, _1, _2, _3) = cast( + Tuple[int, int, int, int], struct.unpack(">HHHH", fp.read(8)) + ) for _ in range(ntables): - (name_bytes, tsum, offset, length) = \ - cast(Tuple[bytes, int, int, int], - struct.unpack('>4sLLL', fp.read(16))) + (name_bytes, tsum, offset, length) = cast( + Tuple[bytes, int, int, int], struct.unpack(">4sLLL", fp.read(16)) + ) self.tables[name_bytes] = (offset, length) except struct.error: # Do not fail if there are not enough bytes to read. Even for @@ -424,83 +742,88 @@ class TrueTypeFont: return def create_unicode_map(self) -> FileUnicodeMap: - if b'cmap' not in self.tables: + if b"cmap" not in self.tables: raise TrueTypeFont.CMapNotFound - (base_offset, length) = self.tables[b'cmap'] + (base_offset, length) = self.tables[b"cmap"] fp = self.fp fp.seek(base_offset) - (version, nsubtables) = \ - cast(Tuple[int, int], struct.unpack('>HH', fp.read(4))) + (version, nsubtables) = cast(Tuple[int, int], struct.unpack(">HH", fp.read(4))) subtables: List[Tuple[int, int, int]] = [] for i in range(nsubtables): subtables.append( - cast(Tuple[int, int, int], struct.unpack('>HHL', fp.read(8)))) + cast(Tuple[int, int, int], struct.unpack(">HHL", fp.read(8))) + ) char2gid: Dict[int, int] = {} # Only supports subtable type 0, 2 and 4. for (_1, _2, st_offset) in subtables: - fp.seek(base_offset+st_offset) - (fmttype, fmtlen, fmtlang) = \ - cast(Tuple[int, int, int], struct.unpack('>HHH', fp.read(6))) + fp.seek(base_offset + st_offset) + (fmttype, fmtlen, fmtlang) = cast( + Tuple[int, int, int], struct.unpack(">HHH", fp.read(6)) + ) if fmttype == 0: - char2gid.update(enumerate( - cast(Tuple[int, ...], - struct.unpack('>256B', fp.read(256))))) + char2gid.update( + enumerate( + cast(Tuple[int, ...], struct.unpack(">256B", fp.read(256))) + ) + ) elif fmttype == 2: - subheaderkeys = cast(Tuple[int, ...], - struct.unpack('>256H', fp.read(512))) - firstbytes = [0]*8192 + subheaderkeys = cast( + Tuple[int, ...], struct.unpack(">256H", fp.read(512)) + ) + firstbytes = [0] * 8192 for (i, k) in enumerate(subheaderkeys): - firstbytes[k//8] = i - nhdrs = max(subheaderkeys)//8 + 1 + firstbytes[k // 8] = i + nhdrs = max(subheaderkeys) // 8 + 1 hdrs: List[Tuple[int, int, int, int, int]] = [] for i in range(nhdrs): - (firstcode, entcount, delta, offset) = \ - cast(Tuple[int, int, int, int], - struct.unpack('>HHhH', fp.read(8))) - hdrs.append((i, firstcode, entcount, delta, - fp.tell()-2+offset)) + (firstcode, entcount, delta, offset) = cast( + Tuple[int, int, int, int], struct.unpack(">HHhH", fp.read(8)) + ) + hdrs.append((i, firstcode, entcount, delta, fp.tell() - 2 + offset)) for (i, firstcode, entcount, delta, pos) in hdrs: if not entcount: continue first = firstcode + (firstbytes[i] << 8) fp.seek(pos) for c in range(entcount): - gid = cast(Tuple[int], - struct.unpack('>H', fp.read(2)))[0] + gid = cast(Tuple[int], struct.unpack(">H", fp.read(2)))[0] if gid: gid += delta - char2gid[first+c] = gid + char2gid[first + c] = gid elif fmttype == 4: - (segcount, _1, _2, _3) = \ - cast(Tuple[int, int, int, int], - struct.unpack('>HHHH', fp.read(8))) + (segcount, _1, _2, _3) = cast( + Tuple[int, int, int, int], struct.unpack(">HHHH", fp.read(8)) + ) segcount //= 2 - ecs = cast(Tuple[int, ...], - struct.unpack('>%dH' % segcount, - fp.read(2*segcount))) + ecs = cast( + Tuple[int, ...], + struct.unpack(">%dH" % segcount, fp.read(2 * segcount)), + ) fp.read(2) - scs = cast(Tuple[int, ...], - struct.unpack('>%dH' % segcount, - fp.read(2*segcount))) - idds = cast(Tuple[int, ...], - struct.unpack('>%dh' % segcount, - fp.read(2*segcount))) + scs = cast( + Tuple[int, ...], + struct.unpack(">%dH" % segcount, fp.read(2 * segcount)), + ) + idds = cast( + Tuple[int, ...], + struct.unpack(">%dh" % segcount, fp.read(2 * segcount)), + ) pos = fp.tell() - idrs = cast(Tuple[int, ...], - struct.unpack('>%dH' % segcount, - fp.read(2*segcount))) + idrs = cast( + Tuple[int, ...], + struct.unpack(">%dH" % segcount, fp.read(2 * segcount)), + ) for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs): if idr: - fp.seek(pos+idr) - for c in range(sc, ec+1): - b = cast(Tuple[int], - struct.unpack('>H', fp.read(2)))[0] - char2gid[c] = (b + idd) & 0xffff + fp.seek(pos + idr) + for c in range(sc, ec + 1): + b = cast(Tuple[int], struct.unpack(">H", fp.read(2)))[0] + char2gid[c] = (b + idd) & 0xFFFF else: - for c in range(sc, ec+1): - char2gid[c] = (c + idd) & 0xffff + for c in range(sc, ec + 1): + char2gid[c] = (c + idd) & 0xFFFF else: - assert False, str(('Unhandled', fmttype)) + assert False, str(("Unhandled", fmttype)) # create unicode map unicode_map = FileUnicodeMap() for (char, gid) in char2gid.items(): @@ -516,8 +839,8 @@ class PDFUnicodeNotDefined(PDFFontError): pass -LITERAL_STANDARD_ENCODING = LIT('StandardEncoding') -LITERAL_TYPE1C = LIT('Type1C') +LITERAL_STANDARD_ENCODING = LIT("StandardEncoding") +LITERAL_TYPE1C = LIT("Type1C") # Font widths are maintained in a dict type that maps from *either* unicode # chars or integer character IDs. @@ -525,30 +848,30 @@ FontWidthDict = Union[Dict[int, float], Dict[str, float]] class PDFFont: - def __init__( self, descriptor: Mapping[str, Any], widths: FontWidthDict, - default_width: Optional[float] = None + default_width: Optional[float] = None, ) -> None: self.descriptor = descriptor self.widths: FontWidthDict = resolve_all(widths) - self.fontname = resolve1(descriptor.get('FontName', 'unknown')) + self.fontname = resolve1(descriptor.get("FontName", "unknown")) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) - self.flags = int_value(descriptor.get('Flags', 0)) - self.ascent = num_value(descriptor.get('Ascent', 0)) - self.descent = num_value(descriptor.get('Descent', 0)) - self.italic_angle = num_value(descriptor.get('ItalicAngle', 0)) + self.flags = int_value(descriptor.get("Flags", 0)) + self.ascent = num_value(descriptor.get("Ascent", 0)) + self.descent = num_value(descriptor.get("Descent", 0)) + self.italic_angle = num_value(descriptor.get("ItalicAngle", 0)) if default_width is None: - self.default_width = num_value(descriptor.get('MissingWidth', 0)) + self.default_width = num_value(descriptor.get("MissingWidth", 0)) else: self.default_width = default_width - self.leading = num_value(descriptor.get('Leading', 0)) - self.bbox = cast(Rect, list_value( - resolve_all(descriptor.get('FontBBox', (0, 0, 0, 0))))) - self.hscale = self.vscale = .001 + self.leading = num_value(descriptor.get("Leading", 0)) + self.bbox = cast( + Rect, list_value(resolve_all(descriptor.get("FontBBox", (0, 0, 0, 0)))) + ) + self.hscale = self.vscale = 0.001 # PDF RM 9.8.1 specifies /Descent should always be a negative number. # PScript5.dll seems to produce Descent with a positive number, but @@ -559,7 +882,7 @@ class PDFFont: return def __repr__(self) -> str: - return '' + return "" def is_vertical(self) -> bool: return False @@ -579,13 +902,13 @@ class PDFFont: return self.descent * self.vscale def get_width(self) -> float: - w = self.bbox[2]-self.bbox[0] + w = self.bbox[2] - self.bbox[0] if w == 0: w = -self.default_width return w * self.hscale def get_height(self) -> float: - h = self.bbox[3]-self.bbox[1] + h = self.bbox[3] - self.bbox[1] if h == 0: h = self.ascent - self.descent return h * self.vscale @@ -602,10 +925,7 @@ class PDFFont: except (KeyError, PDFUnicodeNotDefined): return self.default_width * self.hscale - def char_disp( - self, - cid: int - ) -> Union[float, Tuple[Optional[float], float]]: + def char_disp(self, cid: int) -> Union[float, Tuple[Optional[float], float]]: "Returns an integer for horizontal fonts, a tuple for vertical fonts." return 0 @@ -617,30 +937,28 @@ class PDFFont: class PDFSimpleFont(PDFFont): - def __init__( self, descriptor: Mapping[str, Any], widths: FontWidthDict, - spec: Mapping[str, Any] + spec: Mapping[str, Any], ) -> None: # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. - if 'Encoding' in spec: - encoding = resolve1(spec['Encoding']) + if "Encoding" in spec: + encoding = resolve1(spec["Encoding"]) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): - name = literal_name(encoding.get('BaseEncoding', - LITERAL_STANDARD_ENCODING)) - diff = list_value(encoding.get('Differences', [])) + name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING)) + diff = list_value(encoding.get("Differences", [])) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) self.unicode_map: Optional[UnicodeMap] = None - if 'ToUnicode' in spec: - strm = stream_value(spec['ToUnicode']) + if "ToUnicode" in spec: + strm = stream_value(spec["ToUnicode"]) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) @@ -659,73 +977,61 @@ class PDFSimpleFont(PDFFont): class PDFType1Font(PDFSimpleFont): - - def __init__( - self, - rsrcmgr: "PDFResourceManager", - spec: Mapping[str, Any] - ) -> None: + def __init__(self, rsrcmgr: "PDFResourceManager", spec: Mapping[str, Any]) -> None: try: - self.basefont = literal_name(spec['BaseFont']) + self.basefont = literal_name(spec["BaseFont"]) except KeyError: if settings.STRICT: - raise PDFFontError('BaseFont is missing') - self.basefont = 'unknown' + raise PDFFontError("BaseFont is missing") + self.basefont = "unknown" widths: FontWidthDict try: (descriptor, int_widths) = FontMetricsDB.get_metrics(self.basefont) widths = cast(Dict[str, float], int_widths) # implicit int->float except KeyError: - descriptor = dict_value(spec.get('FontDescriptor', {})) - firstchar = int_value(spec.get('FirstChar', 0)) + descriptor = dict_value(spec.get("FontDescriptor", {})) + firstchar = int_value(spec.get("FirstChar", 0)) # lastchar = int_value(spec.get('LastChar', 255)) - width_list = list_value(spec.get('Widths', [0]*256)) - widths = {i+firstchar: w for (i, w) in enumerate(width_list)} + width_list = list_value(spec.get("Widths", [0] * 256)) + widths = {i + firstchar: w for (i, w) in enumerate(width_list)} PDFSimpleFont.__init__(self, descriptor, widths, spec) - if 'Encoding' not in spec and 'FontFile' in descriptor: + if "Encoding" not in spec and "FontFile" in descriptor: # try to recover the missing encoding info from the font file. - self.fontfile = stream_value(descriptor.get('FontFile')) - length1 = int_value(self.fontfile['Length1']) + self.fontfile = stream_value(descriptor.get("FontFile")) + length1 = int_value(self.fontfile["Length1"]) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(BytesIO(data)) self.cid2unicode = parser.get_encoding() return def __repr__(self) -> str: - return '' % self.basefont + return "" % self.basefont class PDFTrueTypeFont(PDFType1Font): - def __repr__(self) -> str: - return '' % self.basefont + return "" % self.basefont class PDFType3Font(PDFSimpleFont): - - def __init__( - self, - rsrcmgr: "PDFResourceManager", - spec: Mapping[str, Any] - ) -> None: - firstchar = int_value(spec.get('FirstChar', 0)) + def __init__(self, rsrcmgr: "PDFResourceManager", spec: Mapping[str, Any]) -> None: + firstchar = int_value(spec.get("FirstChar", 0)) # lastchar = int_value(spec.get('LastChar', 0)) - width_list = list_value(spec.get('Widths', [0]*256)) - widths = {i+firstchar: w for (i, w) in enumerate(width_list)} - if 'FontDescriptor' in spec: - descriptor = dict_value(spec['FontDescriptor']) + width_list = list_value(spec.get("Widths", [0] * 256)) + widths = {i + firstchar: w for (i, w) in enumerate(width_list)} + if "FontDescriptor" in spec: + descriptor = dict_value(spec["FontDescriptor"]) else: - descriptor = {'Ascent': 0, 'Descent': 0, - 'FontBBox': spec['FontBBox']} + descriptor = {"Ascent": 0, "Descent": 0, "FontBBox": spec["FontBBox"]} PDFSimpleFont.__init__(self, descriptor, widths, spec) - self.matrix = cast(Matrix, tuple(list_value(spec.get('FontMatrix')))) + self.matrix = cast(Matrix, tuple(list_value(spec.get("FontMatrix")))) (_, self.descent, _, self.ascent) = self.bbox (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1)) return def __repr__(self) -> str: - return '' + return "" class PDFCIDFont(PDFFont): @@ -735,47 +1041,50 @@ class PDFCIDFont(PDFFont): self, rsrcmgr: "PDFResourceManager", spec: Mapping[str, Any], - strict: bool = settings.STRICT + strict: bool = settings.STRICT, ) -> None: try: - self.basefont = literal_name(spec['BaseFont']) + self.basefont = literal_name(spec["BaseFont"]) except KeyError: if strict: - raise PDFFontError('BaseFont is missing') - self.basefont = 'unknown' - self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) - cid_registry = resolve1( - self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1") - cid_ordering = resolve1( - self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1") - self.cidcoding = '{}-{}'.format(cid_registry, cid_ordering) + raise PDFFontError("BaseFont is missing") + self.basefont = "unknown" + self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {})) + cid_registry = resolve1(self.cidsysteminfo.get("Registry", b"unknown")).decode( + "latin1" + ) + cid_ordering = resolve1(self.cidsysteminfo.get("Ordering", b"unknown")).decode( + "latin1" + ) + self.cidcoding = "{}-{}".format(cid_registry, cid_ordering) self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict) try: - descriptor = dict_value(spec['FontDescriptor']) + descriptor = dict_value(spec["FontDescriptor"]) except KeyError: if strict: - raise PDFFontError('FontDescriptor is missing') + raise PDFFontError("FontDescriptor is missing") descriptor = {} ttf = None - if 'FontFile2' in descriptor: - self.fontfile = stream_value(descriptor.get('FontFile2')) - ttf = TrueTypeFont(self.basefont, - BytesIO(self.fontfile.get_data())) + if "FontFile2" in descriptor: + self.fontfile = stream_value(descriptor.get("FontFile2")) + ttf = TrueTypeFont(self.basefont, BytesIO(self.fontfile.get_data())) self.unicode_map: Optional[UnicodeMap] = None - if 'ToUnicode' in spec: - if isinstance(spec['ToUnicode'], PDFStream): - strm = stream_value(spec['ToUnicode']) + if "ToUnicode" in spec: + if isinstance(spec["ToUnicode"], PDFStream): + strm = stream_value(spec["ToUnicode"]) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() else: - cmap_name = literal_name(spec['ToUnicode']) - encoding = literal_name(spec['Encoding']) - if 'Identity' in cid_ordering \ - or 'Identity' in cmap_name \ - or 'Identity' in encoding: + cmap_name = literal_name(spec["ToUnicode"]) + encoding = literal_name(spec["Encoding"]) + if ( + "Identity" in cid_ordering + or "Identity" in cmap_name + or "Identity" in encoding + ): self.unicode_map = IdentityUnicodeMap() - elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): + elif self.cidcoding in ("Adobe-Identity", "Adobe-UCS"): if ttf: try: self.unicode_map = ttf.create_unicode_map() @@ -784,17 +1093,17 @@ class PDFCIDFont(PDFFont): else: try: self.unicode_map = CMapDB.get_unicode_map( - self.cidcoding, self.cmap.is_vertical()) + self.cidcoding, self.cmap.is_vertical() + ) except CMapDB.CMapNotFound: pass self.vertical = self.cmap.is_vertical() if self.vertical: # writing mode: vertical - widths2 = get_widths2(list_value(spec.get('W2', []))) - self.disps = {cid: (vx, vy) - for (cid, (_, (vx, vy))) in widths2.items()} - (vy, w) = resolve1(spec.get('DW2', [880, -1000])) + widths2 = get_widths2(list_value(spec.get("W2", []))) + self.disps = {cid: (vx, vy) for (cid, (_, (vx, vy))) in widths2.items()} + (vy, w) = resolve1(spec.get("DW2", [880, -1000])) self.default_disp = (None, vy) widths = {cid: w for (cid, (w, _)) in widths2.items()} default_width = w @@ -802,16 +1111,12 @@ class PDFCIDFont(PDFFont): # writing mode: horizontal self.disps = {} self.default_disp = 0 - widths = get_widths(list_value(spec.get('W', []))) - default_width = spec.get('DW', 1000) + widths = get_widths(list_value(spec.get("W", []))) + default_width = spec.get("DW", 1000) PDFFont.__init__(self, descriptor, widths, default_width=default_width) return - def get_cmap_from_spec( - self, - spec: Mapping[str, Any], - strict: bool - ) -> CMapBase: + def get_cmap_from_spec(self, spec: Mapping[str, Any], strict: bool) -> CMapBase: """Get cmap from font specification For certain PDFs, Encoding Type isn't mentioned as an attribute of @@ -832,31 +1137,32 @@ class PDFCIDFont(PDFFont): @staticmethod def _get_cmap_name(spec: Mapping[str, Any], strict: bool) -> str: """Get cmap name from font specification""" - cmap_name = 'unknown' # default value + cmap_name = "unknown" # default value try: - spec_encoding = spec['Encoding'] - if hasattr(spec_encoding, 'name'): - cmap_name = literal_name(spec['Encoding']) + spec_encoding = spec["Encoding"] + if hasattr(spec_encoding, "name"): + cmap_name = literal_name(spec["Encoding"]) else: - cmap_name = literal_name(spec_encoding['CMapName']) + cmap_name = literal_name(spec_encoding["CMapName"]) except KeyError: if strict: - raise PDFFontError('Encoding is unspecified') + raise PDFFontError("Encoding is unspecified") if type(cmap_name) is PDFStream: # type: ignore[comparison-overlap] cmap_name_stream: PDFStream = cast(PDFStream, cmap_name) - if 'CMapName' in cmap_name_stream: - cmap_name = cmap_name_stream.get('CMapName').name + if "CMapName" in cmap_name_stream: + cmap_name = cmap_name_stream.get("CMapName").name else: if strict: - raise PDFFontError('CMapName unspecified for encoding') + raise PDFFontError("CMapName unspecified for encoding") return IDENTITY_ENCODER.get(cmap_name, cmap_name) def __repr__(self) -> str: - return ''\ - .format(self.basefont, self.cidcoding) + return "".format( + self.basefont, self.cidcoding + ) def is_vertical(self) -> bool: return self.vertical @@ -867,10 +1173,7 @@ class PDFCIDFont(PDFFont): def decode(self, bytes: bytes) -> Iterable[int]: return self.cmap.decode(bytes) - def char_disp( - self, - cid: int - ) -> Union[float, Tuple[Optional[float], float]]: + def char_disp(self, cid: int) -> Union[float, Tuple[Optional[float], float]]: "Returns an integer for horizontal fonts, a tuple for vertical fonts." return self.disps.get(cid, self.default_disp) @@ -885,12 +1188,12 @@ class PDFCIDFont(PDFFont): def main(argv: List[str]) -> None: for fname in argv[1:]: - fp = open(fname, 'rb') + fp = open(fname, "rb") font = CFFFont(fname, fp) print(font) fp.close() return -if __name__ == '__main__': +if __name__ == "__main__": main(sys.argv) diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 7f60ec3..bc049b3 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -50,11 +50,11 @@ class PDFInterpreterError(PDFException): pass -LITERAL_PDF = LIT('PDF') -LITERAL_TEXT = LIT('Text') -LITERAL_FONT = LIT('Font') -LITERAL_FORM = LIT('Form') -LITERAL_IMAGE = LIT('Image') +LITERAL_PDF = LIT("PDF") +LITERAL_TEXT = LIT("Text") +LITERAL_FONT = LIT("Font") +LITERAL_FORM = LIT("Form") +LITERAL_IMAGE = LIT("Image") class PDFTextState: @@ -75,12 +75,23 @@ class PDFTextState: # self.linematrix is set def __repr__(self) -> str: - return '' \ - % (self.font, self.fontsize, self.charspace, self.wordspace, - self.scaling, self.leading, self.render, self.rise, - self.matrix, self.linematrix) + return ( + "" + % ( + self.font, + self.fontsize, + self.charspace, + self.wordspace, + self.scaling, + self.leading, + self.render, + self.rise, + self.matrix, + self.linematrix, + ) + ) def copy(self) -> "PDFTextState": obj = PDFTextState() @@ -102,13 +113,13 @@ class PDFTextState: Color = Union[ - float, # Greyscale - Tuple[float, float, float], # R, G, B - Tuple[float, float, float, float]] # C, M, Y, K + float, # Greyscale + Tuple[float, float, float], # R, G, B + Tuple[float, float, float, float], +] # C, M, Y, K class PDFGraphicState: - def __init__(self) -> None: self.linewidth: float = 0 self.linecap: Optional[object] = None @@ -138,12 +149,22 @@ class PDFGraphicState: return obj def __repr__(self) -> str: - return ('' % - (self.linewidth, self.linecap, self.linejoin, - self.miterlimit, self.dash, self.intent, self.flatness, - self.scolor, self.ncolor)) + return ( + "" + % ( + self.linewidth, + self.linecap, + self.linejoin, + self.miterlimit, + self.dash, + self.intent, + self.flatness, + self.scolor, + self.ncolor, + ) + ) class PDFResourceManager: @@ -179,41 +200,41 @@ class PDFResourceManager: if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: - log.debug('get_font: create: objid=%r, spec=%r', objid, spec) + log.debug("get_font: create: objid=%r, spec=%r", objid, spec) if settings.STRICT: - if spec['Type'] is not LITERAL_FONT: - raise PDFFontError('Type is not /Font') + if spec["Type"] is not LITERAL_FONT: + raise PDFFontError("Type is not /Font") # Create a Font object. - if 'Subtype' in spec: - subtype = literal_name(spec['Subtype']) + if "Subtype" in spec: + subtype = literal_name(spec["Subtype"]) else: if settings.STRICT: - raise PDFFontError('Font Subtype is not specified.') - subtype = 'Type1' - if subtype in ('Type1', 'MMType1'): + raise PDFFontError("Font Subtype is not specified.") + subtype = "Type1" + if subtype in ("Type1", "MMType1"): # Type1 Font font = PDFType1Font(self, spec) - elif subtype == 'TrueType': + elif subtype == "TrueType": # TrueType Font font = PDFTrueTypeFont(self, spec) - elif subtype == 'Type3': + elif subtype == "Type3": # Type3 Font font = PDFType3Font(self, spec) - elif subtype in ('CIDFontType0', 'CIDFontType2'): + elif subtype in ("CIDFontType0", "CIDFontType2"): # CID Font font = PDFCIDFont(self, spec) - elif subtype == 'Type0': + elif subtype == "Type0": # Type0 Font - dfonts = list_value(spec['DescendantFonts']) + dfonts = list_value(spec["DescendantFonts"]) assert dfonts subspec = dict_value(dfonts[0]).copy() - for k in ('Encoding', 'ToUnicode'): + for k in ("Encoding", "ToUnicode"): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if settings.STRICT: - raise PDFFontError('Invalid Font spec: %r' % spec) + raise PDFFontError("Invalid Font spec: %r" % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: self._cached_fonts[objid] = font @@ -221,7 +242,6 @@ class PDFResourceManager: class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): - def __init__(self, streams: Sequence[object]) -> None: self.streams = streams self.istream = 0 @@ -236,7 +256,7 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): strm = stream_value(self.streams[self.istream]) self.istream += 1 else: - raise PSEOF('Unexpected EOF, file truncated?') + raise PSEOF("Unexpected EOF, file truncated?") self.fp = BytesIO(strm.get_data()) def seek(self, pos: int) -> None: @@ -255,14 +275,10 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): self.fp = None # type: ignore[assignment] self.charpos = 0 - def get_inline_data( - self, - pos: int, - target: bytes = b'EI' - ) -> Tuple[int, bytes]: + def get_inline_data(self, pos: int, target: bytes = b"EI") -> Tuple[int, bytes]: self.seek(pos) i = 0 - data = b'' + data = b"" while i <= len(target): self.fillbuf() if i: @@ -279,36 +295,35 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]): else: try: j = self.buf.index(target[0], self.charpos) - data += self.buf[self.charpos:j+1] - self.charpos = j+1 + data += self.buf[self.charpos : j + 1] + self.charpos = j + 1 i = 1 except ValueError: - data += self.buf[self.charpos:] + data += self.buf[self.charpos :] self.charpos = len(self.buf) - data = data[:-(len(target)+1)] # strip the last part - data = re.sub(br'(\x0d\x0a|[\x0d\x0a])$', b'', data) + data = data[: -(len(target) + 1)] # strip the last part + data = re.sub(rb"(\x0d\x0a|[\x0d\x0a])$", b"", data) return (pos, data) def flush(self) -> None: self.add_results(*self.popall()) - KEYWORD_BI = KWD(b'BI') - KEYWORD_ID = KWD(b'ID') - KEYWORD_EI = KWD(b'EI') + KEYWORD_BI = KWD(b"BI") + KEYWORD_ID = KWD(b"ID") + KEYWORD_EI = KWD(b"EI") def do_keyword(self, pos: int, token: PSKeyword) -> None: if token is self.KEYWORD_BI: # inline image within a content stream - self.start_type(pos, 'inline') + self.start_type(pos, "inline") elif token is self.KEYWORD_ID: try: - (_, objs) = self.end_type('inline') + (_, objs) = self.end_type("inline") if len(objs) % 2 != 0: - error_msg = 'Invalid dictionary construct: {!r}' \ - .format(objs) + error_msg = "Invalid dictionary construct: {!r}".format(objs) raise PSTypeError(error_msg) d = {literal_name(k): v for (k, v) in choplist(2, objs)} - (pos, data) = self.get_inline_data(pos+len(b'ID ')) + (pos, data) = self.get_inline_data(pos + len(b"ID ")) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) @@ -351,32 +366,30 @@ class PDFPageInterpreter: name = literal_name(spec[0]) else: name = literal_name(spec) - if name == 'ICCBased' and isinstance(spec, list) \ - and 2 <= len(spec): - return PDFColorSpace(name, stream_value(spec[1])['N']) - elif name == 'DeviceN' and isinstance(spec, list) \ - and 2 <= len(spec): + if name == "ICCBased" and isinstance(spec, list) and 2 <= len(spec): + return PDFColorSpace(name, stream_value(spec[1])["N"]) + elif name == "DeviceN" and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE.get(name) for (k, v) in dict_value(resources).items(): - log.debug('Resource: %r: %r', k, v) - if k == 'Font': + log.debug("Resource: %r: %r", k, v) + if k == "Font": for (fontid, spec) in dict_value(v).items(): objid = None if isinstance(spec, PDFObjRef): objid = spec.objid spec = dict_value(spec) self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) - elif k == 'ColorSpace': + elif k == "ColorSpace": for (csid, spec) in dict_value(v).items(): colorspace = get_colorspace(resolve1(spec)) if colorspace is not None: self.csmap[csid] = colorspace - elif k == 'ProcSet': + elif k == "ProcSet": self.rsrcmgr.get_procset(list_value(v)) - elif k == 'XObject': + elif k == "XObject": for (xobjid, xobjstrm) in dict_value(v).items(): self.xobjmap[xobjid] = xobjstrm return @@ -410,14 +423,11 @@ class PDFPageInterpreter: self.argstack = self.argstack[:-n] return x - def get_current_state( - self - ) -> Tuple[Matrix, PDFTextState, PDFGraphicState]: + def get_current_state(self) -> Tuple[Matrix, PDFTextState, PDFGraphicState]: return (self.ctm, self.textstate.copy(), self.graphicstate.copy()) def set_current_state( - self, - state: Tuple[Matrix, PDFTextState, PDFGraphicState] + self, state: Tuple[Matrix, PDFTextState, PDFGraphicState] ) -> None: (self.ctm, self.textstate, self.graphicstate) = state self.device.set_ctm(self.ctm) @@ -441,11 +451,10 @@ class PDFPageInterpreter: c1: PDFStackT, d1: PDFStackT, e1: PDFStackT, - f1: PDFStackT + f1: PDFStackT, ) -> None: """Concatenate matrix to current transformation matrix""" - self.ctm = \ - mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm) + self.ctm = mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm) self.device.set_ctm(self.ctm) return @@ -491,12 +500,12 @@ class PDFPageInterpreter: def do_m(self, x: PDFStackT, y: PDFStackT) -> None: """Begin new subpath""" - self.curpath.append(('m', cast(float, x), cast(float, y))) + self.curpath.append(("m", cast(float, x), cast(float, y))) return def do_l(self, x: PDFStackT, y: PDFStackT) -> None: """Append straight line segment to path""" - self.curpath.append(('l', cast(float, x), cast(float, y))) + self.curpath.append(("l", cast(float, x), cast(float, y))) return def do_c( @@ -506,66 +515,57 @@ class PDFPageInterpreter: x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, - y3: PDFStackT + y3: PDFStackT, ) -> None: """Append curved segment to path (three control points)""" - self.curpath.append(('c', cast(float, x1), cast(float, y1), - cast(float, x2), cast(float, y2), - cast(float, x3), cast(float, y3))) + self.curpath.append( + ( + "c", + cast(float, x1), + cast(float, y1), + cast(float, x2), + cast(float, y2), + cast(float, x3), + cast(float, y3), + ) + ) return - def do_v( - self, - x2: PDFStackT, - y2: PDFStackT, - x3: PDFStackT, - y3: PDFStackT - ) -> None: + def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: """Append curved segment to path (initial point replicated)""" - self.curpath.append(('v', cast(float, x2), cast(float, y2), - cast(float, x3), cast(float, y3))) + self.curpath.append( + ("v", cast(float, x2), cast(float, y2), cast(float, x3), cast(float, y3)) + ) return - def do_y( - self, - x1: PDFStackT, - y1: PDFStackT, - x3: PDFStackT, - y3: PDFStackT - ) -> None: + def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None: """Append curved segment to path (final point replicated)""" - self.curpath.append(('y', cast(float, x1), cast(float, y1), - cast(float, x3), cast(float, y3))) + self.curpath.append( + ("y", cast(float, x1), cast(float, y1), cast(float, x3), cast(float, y3)) + ) return def do_h(self) -> None: """Close subpath""" - self.curpath.append(('h',)) + self.curpath.append(("h",)) return - def do_re( - self, - x: PDFStackT, - y: PDFStackT, - w: PDFStackT, - h: PDFStackT - ) -> None: + def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None: """Append rectangle to path""" x = cast(float, x) y = cast(float, y) w = cast(float, w) h = cast(float, h) - self.curpath.append(('m', x, y)) - self.curpath.append(('l', x+w, y)) - self.curpath.append(('l', x+w, y+h)) - self.curpath.append(('l', x, y+h)) - self.curpath.append(('h',)) + self.curpath.append(("m", x, y)) + self.curpath.append(("l", x + w, y)) + self.curpath.append(("l", x + w, y + h)) + self.curpath.append(("l", x, y + h)) + self.curpath.append(("h",)) return def do_S(self) -> None: """Stroke path""" - self.device.paint_path(self.graphicstate, True, False, False, - self.curpath) + self.device.paint_path(self.graphicstate, True, False, False, self.curpath) self.curpath = [] return @@ -577,8 +577,7 @@ class PDFPageInterpreter: def do_f(self) -> None: """Fill path using nonzero winding number rule""" - self.device.paint_path(self.graphicstate, False, True, False, - self.curpath) + self.device.paint_path(self.graphicstate, False, True, False, self.curpath) self.curpath = [] return @@ -588,22 +587,19 @@ class PDFPageInterpreter: def do_f_a(self) -> None: """Fill path using even-odd rule""" - self.device.paint_path(self.graphicstate, False, True, True, - self.curpath) + self.device.paint_path(self.graphicstate, False, True, True, self.curpath) self.curpath = [] return def do_B(self) -> None: """Fill and stroke path using nonzero winding number rule""" - self.device.paint_path(self.graphicstate, True, True, False, - self.curpath) + self.device.paint_path(self.graphicstate, True, True, False, self.curpath) self.curpath = [] return def do_B_a(self) -> None: """Fill and stroke path using even-odd rule""" - self.device.paint_path(self.graphicstate, True, True, True, - self.curpath) + self.device.paint_path(self.graphicstate, True, True, True, self.curpath) self.curpath = [] return @@ -641,7 +637,7 @@ class PDFPageInterpreter: self.scs = self.csmap[literal_name(name)] except KeyError: if settings.STRICT: - raise PDFInterpreterError('Undefined ColorSpace: %r' % name) + raise PDFInterpreterError("Undefined ColorSpace: %r" % name) return def do_cs(self, name: PDFStackT) -> None: @@ -650,7 +646,7 @@ class PDFPageInterpreter: self.ncs = self.csmap[literal_name(name)] except KeyError: if settings.STRICT: - raise PDFInterpreterError('Undefined ColorSpace: %r' % name) + raise PDFInterpreterError("Undefined ColorSpace: %r" % name) return def do_G(self, gray: PDFStackT) -> None: @@ -665,38 +661,32 @@ class PDFPageInterpreter: def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: """Set RGB color for stroking operations""" - self.graphicstate.scolor = \ - (cast(float, r), cast(float, g), cast(float, b)) + self.graphicstate.scolor = (cast(float, r), cast(float, g), cast(float, b)) return def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None: """Set RGB color for nonstroking operations""" - self.graphicstate.ncolor = \ - (cast(float, r), cast(float, g), cast(float, b)) + self.graphicstate.ncolor = (cast(float, r), cast(float, g), cast(float, b)) return - def do_K( - self, - c: PDFStackT, - m: PDFStackT, - y: PDFStackT, - k: PDFStackT - ) -> None: + def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: """Set CMYK color for stroking operations""" - self.graphicstate.scolor = \ - (cast(float, c), cast(float, m), cast(float, y), cast(float, k)) + self.graphicstate.scolor = ( + cast(float, c), + cast(float, m), + cast(float, y), + cast(float, k), + ) return - def do_k( - self, - c: PDFStackT, - m: PDFStackT, - y: PDFStackT, - k: PDFStackT - ) -> None: + def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None: """Set CMYK color for nonstroking operations""" - self.graphicstate.ncolor = \ - (cast(float, c), cast(float, m), cast(float, y), cast(float, k)) + self.graphicstate.ncolor = ( + cast(float, c), + cast(float, m), + cast(float, y), + cast(float, k), + ) return def do_SCN(self) -> None: @@ -705,7 +695,7 @@ class PDFPageInterpreter: n = self.scs.ncomponents else: if settings.STRICT: - raise PDFInterpreterError('No colorspace specified!') + raise PDFInterpreterError("No colorspace specified!") n = 1 self.graphicstate.scolor = cast(Color, self.pop(n)) return @@ -716,7 +706,7 @@ class PDFPageInterpreter: n = self.ncs.ncomponents else: if settings.STRICT: - raise PDFInterpreterError('No colorspace specified!') + raise PDFInterpreterError("No colorspace specified!") n = 1 self.graphicstate.ncolor = cast(Color, self.pop(n)) return @@ -831,7 +821,7 @@ class PDFPageInterpreter: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: if settings.STRICT: - raise PDFInterpreterError('Undefined Font id: %r' % fontid) + raise PDFInterpreterError("Undefined Font id: %r" % fontid) self.textstate.font = self.rsrcmgr.get_font(None, {}) self.textstate.fontsize = cast(float, fontsize) return @@ -854,7 +844,7 @@ class PDFPageInterpreter: tx = cast(float, tx) ty = cast(float, ty) (a, b, c, d, e, f) = self.textstate.matrix - self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f) + self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f) self.textstate.linematrix = (0, 0) return @@ -863,7 +853,7 @@ class PDFPageInterpreter: tx = cast(float, tx) ty = cast(float, ty) (a, b, c, d, e, f) = self.textstate.matrix - self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f) + self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f) self.textstate.leading = ty self.textstate.linematrix = (0, 0) return @@ -875,7 +865,7 @@ class PDFPageInterpreter: c: PDFStackT, d: PDFStackT, e: PDFStackT, - f: PDFStackT + f: PDFStackT, ) -> None: """Set text matrix and text line matrix""" self.textstate.matrix = cast(Matrix, (a, b, c, d, e, f)) @@ -885,8 +875,14 @@ class PDFPageInterpreter: def do_T_a(self) -> None: """Move to start of next text line""" (a, b, c, d, e, f) = self.textstate.matrix - self.textstate.matrix = (a, b, c, d, self.textstate.leading*c+e, - self.textstate.leading*d+f) + self.textstate.matrix = ( + a, + b, + c, + d, + self.textstate.leading * c + e, + self.textstate.leading * d + f, + ) self.textstate.linematrix = (0, 0) return @@ -894,11 +890,12 @@ class PDFPageInterpreter: """Show text, allowing individual glyph positioning""" if self.textstate.font is None: if settings.STRICT: - raise PDFInterpreterError('No font specified!') + raise PDFInterpreterError("No font specified!") return assert self.ncs is not None - self.device.render_string(self.textstate, cast(PDFTextSeq, seq), - self.ncs, self.graphicstate.copy()) + self.device.render_string( + self.textstate, cast(PDFTextSeq, seq), self.ncs, self.graphicstate.copy() + ) return def do_Tj(self, s: PDFStackT) -> None: @@ -935,7 +932,7 @@ class PDFPageInterpreter: def do_EI(self, obj: PDFStackT) -> None: """End inline image object""" - if isinstance(obj, PDFStream) and 'W' in obj and 'H' in obj: + if isinstance(obj, PDFStream) and "W" in obj and "H" in obj: iobjid = str(id(obj)) self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY) self.device.render_image(iobjid, obj) @@ -949,28 +946,28 @@ class PDFPageInterpreter: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: if settings.STRICT: - raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) + raise PDFInterpreterError("Undefined xobject id: %r" % xobjid) return - log.debug('Processing xobj: %r', xobj) - subtype = xobj.get('Subtype') - if subtype is LITERAL_FORM and 'BBox' in xobj: + log.debug("Processing xobj: %r", xobj) + subtype = xobj.get("Subtype") + if subtype is LITERAL_FORM and "BBox" in xobj: interpreter = self.dup() - bbox = cast(Rect, list_value(xobj['BBox'])) - matrix = cast(Matrix, list_value( - xobj.get('Matrix', MATRIX_IDENTITY))) + bbox = cast(Rect, list_value(xobj["BBox"])) + matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY))) # According to PDF reference 1.7 section 4.9.1, XObjects in # earlier PDFs (prior to v1.2) use the page's Resources entry # instead of having their own Resources entry. - xobjres = xobj.get('Resources') + xobjres = xobj.get("Resources") if xobjres: resources = dict_value(xobjres) else: resources = self.resources.copy() self.device.begin_figure(xobjid, bbox, matrix) - interpreter.render_contents(resources, [xobj], - ctm=mult_matrix(matrix, self.ctm)) + interpreter.render_contents( + resources, [xobj], ctm=mult_matrix(matrix, self.ctm) + ) self.device.end_figure(xobjid) - elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj: + elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY) self.device.render_image(xobjid, xobj) self.device.end_figure(xobjid) @@ -980,7 +977,7 @@ class PDFPageInterpreter: return def process_page(self, page: PDFPage) -> None: - log.debug('Processing page: %r', page) + log.debug("Processing page: %r", page) (x0, y0, x1, y1) = page.mediabox if page.rotate == 90: ctm = (0, -1, 1, 0, -y0, x1) @@ -999,14 +996,15 @@ class PDFPageInterpreter: self, resources: Dict[object, object], streams: Sequence[object], - ctm: Matrix = MATRIX_IDENTITY + ctm: Matrix = MATRIX_IDENTITY, ) -> None: """Render the content streams. This method may be called recursively. """ - log.debug('render_contents: resources=%r, streams=%r, ctm=%r', - resources, streams, ctm) + log.debug( + "render_contents: resources=%r, streams=%r, ctm=%r", resources, streams, ctm + ) self.init_resources(resources) self.init_state(ctm) self.execute(list_value(streams)) @@ -1025,22 +1023,23 @@ class PDFPageInterpreter: break if isinstance(obj, PSKeyword): name = keyword_name(obj) - method = 'do_%s' % name.replace('*', '_a').replace('"', '_w')\ - .replace("'", '_q') + method = "do_%s" % name.replace("*", "_a").replace('"', "_w").replace( + "'", "_q" + ) if hasattr(self, method): func = getattr(self, method) - nargs = func.__code__.co_argcount-1 + nargs = func.__code__.co_argcount - 1 if nargs: args = self.pop(nargs) - log.debug('exec: %s %r', name, args) + log.debug("exec: %s %r", name, args) if len(args) == nargs: func(*args) else: - log.debug('exec: %s', name) + log.debug("exec: %s", name) func() else: if settings.STRICT: - error_msg = 'Unknown operator: %r' % name + error_msg = "Unknown operator: %r" % name raise PDFInterpreterError(error_msg) else: self.push(obj) diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index 5eb8709..13bbb14 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -4,8 +4,7 @@ from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple from pdfminer.utils import Rect from . import settings -from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed, \ - PDFNoPageLabels +from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed, PDFNoPageLabels from .pdfparser import PDFParser from .pdftypes import PDFObjectNotFound from .pdftypes import dict_value @@ -17,8 +16,8 @@ from .psparser import LIT log = logging.getLogger(__name__) # some predefined literals and keywords. -LITERAL_PAGE = LIT('Page') -LITERAL_PAGES = LIT('Pages') +LITERAL_PAGE = LIT("Page") +LITERAL_PAGES = LIT("Pages") class PDFPage: @@ -44,11 +43,7 @@ class PDFPage: """ def __init__( - self, - doc: PDFDocument, - pageid: object, - attrs: object, - label: Optional[str] + self, doc: PDFDocument, pageid: object, attrs: object, label: Optional[str] ) -> None: """Initialize a page object. @@ -61,19 +56,20 @@ class PDFPage: self.pageid = pageid self.attrs = dict_value(attrs) self.label = label - self.lastmod = resolve1(self.attrs.get('LastModified')) - self.resources: Dict[object, object] = \ - resolve1(self.attrs.get('Resources', dict())) - self.mediabox: Rect = resolve1(self.attrs['MediaBox']) - if 'CropBox' in self.attrs: - self.cropbox: Rect = resolve1(self.attrs['CropBox']) + self.lastmod = resolve1(self.attrs.get("LastModified")) + self.resources: Dict[object, object] = resolve1( + self.attrs.get("Resources", dict()) + ) + self.mediabox: Rect = resolve1(self.attrs["MediaBox"]) + if "CropBox" in self.attrs: + self.cropbox: Rect = resolve1(self.attrs["CropBox"]) else: self.cropbox = self.mediabox - self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360 - self.annots = self.attrs.get('Annots') - self.beads = self.attrs.get('B') - if 'Contents' in self.attrs: - contents = resolve1(self.attrs['Contents']) + self.rotate = (int_value(self.attrs.get("Rotate", 0)) + 360) % 360 + self.annots = self.attrs.get("Annots") + self.beads = self.attrs.get("B") + if "Contents" in self.attrs: + contents = resolve1(self.attrs["Contents"]) else: contents = [] if not isinstance(contents, list): @@ -81,16 +77,16 @@ class PDFPage: self.contents: List[object] = contents def __repr__(self) -> str: - return ''\ - .format(self.resources, self.mediabox) + return "".format( + self.resources, self.mediabox + ) - INHERITABLE_ATTRS = {'Resources', 'MediaBox', 'CropBox', 'Rotate'} + INHERITABLE_ATTRS = {"Resources", "MediaBox", "CropBox", "Rotate"} @classmethod def create_pages(cls, document: PDFDocument) -> Iterator["PDFPage"]: def search( - obj: object, - parent: Dict[str, object] + obj: object, parent: Dict[str, object] ) -> Iterator[Tuple[int, Dict[object, Dict[object, object]]]]: if isinstance(obj, int): objid = obj @@ -104,16 +100,16 @@ class PDFPage: if k in cls.INHERITABLE_ATTRS and k not in tree: tree[k] = v - tree_type = tree.get('Type') + tree_type = tree.get("Type") if tree_type is None and not settings.STRICT: # See #64 - tree_type = tree.get('type') + tree_type = tree.get("type") - if tree_type is LITERAL_PAGES and 'Kids' in tree: - log.debug('Pages: Kids=%r', tree['Kids']) - for c in list_value(tree['Kids']): + if tree_type is LITERAL_PAGES and "Kids" in tree: + log.debug("Pages: Kids=%r", tree["Kids"]) + for c in list_value(tree["Kids"]): yield from search(c, tree) elif tree_type is LITERAL_PAGE: - log.debug('Page: %r', tree) + log.debug("Page: %r", tree) yield (objid, tree) try: @@ -122,8 +118,8 @@ class PDFPage: page_labels = itertools.repeat(None) pages = False - if 'Pages' in document.catalog: - objects = search(document.catalog['Pages'], document.catalog) + if "Pages" in document.catalog: + objects = search(document.catalog["Pages"], document.catalog) for (objid, tree) in objects: yield cls(document, objid, tree, next(page_labels)) pages = True @@ -133,8 +129,7 @@ class PDFPage: for objid in xref.get_objids(): try: obj = document.getobj(objid) - if isinstance(obj, dict) \ - and obj.get('Type') is LITERAL_PAGE: + if isinstance(obj, dict) and obj.get("Type") is LITERAL_PAGE: yield cls(document, objid, obj, next(page_labels)) except PDFObjectNotFound: pass @@ -146,9 +141,9 @@ class PDFPage: fp: BinaryIO, pagenos: Optional[Container[int]] = None, maxpages: int = 0, - password: str = '', + password: str = "", caching: bool = True, - check_extractable: bool = False + check_extractable: bool = False, ) -> Iterator["PDFPage"]: # Create a PDF parser object associated with the file object. parser = PDFParser(fp) @@ -158,20 +153,22 @@ class PDFPage: # If not, warn the user and proceed. if not doc.is_extractable: if check_extractable: - error_msg = 'Text extraction is not allowed: %r' % fp + error_msg = "Text extraction is not allowed: %r" % fp raise PDFTextExtractionNotAllowed(error_msg) else: - warning_msg = 'The PDF %r contains a metadata field '\ - 'indicating that it should not allow ' \ - 'text extraction. Ignoring this field ' \ - 'and proceeding. Use the check_extractable ' \ - 'if you want to raise an error in this case' % fp + warning_msg = ( + "The PDF %r contains a metadata field " + "indicating that it should not allow " + "text extraction. Ignoring this field " + "and proceeding. Use the check_extractable " + "if you want to raise an error in this case" % fp + ) log.warning(warning_msg) # Process each page contained in the document. for (pageno, page) in enumerate(cls.create_pages(doc)): if pagenos and (pageno not in pagenos): continue yield page - if maxpages and maxpages <= pageno+1: + if maxpages and maxpages <= pageno + 1: break return diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index e0a5a61..992d884 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -51,12 +51,12 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): """Associates the parser with a PDFDocument object.""" self.doc = doc - KEYWORD_R = KWD(b'R') - KEYWORD_NULL = KWD(b'null') - KEYWORD_ENDOBJ = KWD(b'endobj') - KEYWORD_STREAM = KWD(b'stream') - KEYWORD_XREF = KWD(b'xref') - KEYWORD_STARTXREF = KWD(b'startxref') + KEYWORD_R = KWD(b"R") + KEYWORD_NULL = KWD(b"null") + KEYWORD_ENDOBJ = KWD(b"endobj") + KEYWORD_STREAM = KWD(b"stream") + KEYWORD_XREF = KWD(b"xref") + KEYWORD_STARTXREF = KWD(b"startxref") def do_keyword(self, pos: int, token: PSKeyword) -> None: """Handles PDF-related keywords.""" @@ -76,8 +76,7 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): if len(self.curstack) >= 2: try: ((_, objid), (_, genno)) = self.pop(2) - (objid, genno) = ( - int(objid), int(genno)) # type: ignore[arg-type] + (objid, genno) = (int(objid), int(genno)) # type: ignore[arg-type] assert self.doc is not None obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) @@ -90,30 +89,30 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): objlen = 0 if not self.fallback: try: - objlen = int_value(dic['Length']) + objlen = int_value(dic["Length"]) except KeyError: if settings.STRICT: - raise PDFSyntaxError('/Length is undefined: %r' % dic) + raise PDFSyntaxError("/Length is undefined: %r" % dic) self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: if settings.STRICT: - raise PDFSyntaxError('Unexpected EOF') + raise PDFSyntaxError("Unexpected EOF") return pos += len(line) self.fp.seek(pos) data = bytearray(self.fp.read(objlen)) - self.seek(pos+objlen) + self.seek(pos + objlen) while 1: try: (linepos, line) = self.nextline() except PSEOF: if settings.STRICT: - raise PDFSyntaxError('Unexpected EOF') + raise PDFSyntaxError("Unexpected EOF") break - if b'endstream' in line: - i = line.index(b'endstream') + if b"endstream" in line: + i = line.index(b"endstream") objlen += i if self.fallback: data += line[:i] @@ -121,10 +120,15 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]): objlen += len(line) if self.fallback: data += line - self.seek(pos+objlen) + self.seek(pos + objlen) # XXX limit objlen not to exceed object boundary - log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, - objlen, dic, data[:10]) + log.debug( + "Stream: pos=%d, objlen=%d, dic=%r, data=%r...", + pos, + objlen, + dic, + data[:10], + ) assert self.doc is not None stream = PDFStream(dic, bytes(data), self.doc.decipher) self.push((pos, stream)) @@ -149,15 +153,14 @@ class PDFStreamParser(PDFParser): def flush(self) -> None: self.add_results(*self.popall()) - KEYWORD_OBJ = KWD(b'obj') + KEYWORD_OBJ = KWD(b"obj") def do_keyword(self, pos: int, token: PSKeyword) -> None: if token is self.KEYWORD_R: # reference to indirect object try: ((_, objid), (_, genno)) = self.pop(2) - (objid, genno) = ( - int(objid), int(genno)) # type: ignore[arg-type] + (objid, genno) = (int(objid), int(genno)) # type: ignore[arg-type] obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) except PSSyntaxError: @@ -167,7 +170,7 @@ class PDFStreamParser(PDFParser): if settings.STRICT: # See PDF Spec 3.4.6: Only the object values are stored in the # stream; the obj and endobj keywords are not used. - raise PDFSyntaxError('Keyword endobj found in stream') + raise PDFSyntaxError("Keyword endobj found in stream") return # others self.push((pos, token)) diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index e10af5b..f4543b9 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -2,8 +2,17 @@ import io import logging import sys import zlib -from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List, - Tuple, cast) +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + Optional, + Union, + List, + Tuple, + cast, +) from . import settings from .ascii85 import ascii85decode @@ -21,18 +30,18 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -LITERAL_CRYPT = LIT('Crypt') +LITERAL_CRYPT = LIT("Crypt") # Abbreviation of Filter names in PDF 4.8.6. "Inline Images" -LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl')) -LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW')) -LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85')) -LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx')) -LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL')) -LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF')) -LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT')) -LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),) -LITERALS_JPX_DECODE = (LIT('JPXDecode'),) +LITERALS_FLATE_DECODE = (LIT("FlateDecode"), LIT("Fl")) +LITERALS_LZW_DECODE = (LIT("LZWDecode"), LIT("LZW")) +LITERALS_ASCII85_DECODE = (LIT("ASCII85Decode"), LIT("A85")) +LITERALS_ASCIIHEX_DECODE = (LIT("ASCIIHexDecode"), LIT("AHx")) +LITERALS_RUNLENGTH_DECODE = (LIT("RunLengthDecode"), LIT("RL")) +LITERALS_CCITTFAX_DECODE = (LIT("CCITTFaxDecode"), LIT("CCF")) +LITERALS_DCT_DECODE = (LIT("DCTDecode"), LIT("DCT")) +LITERALS_JBIG2_DECODE = (LIT("JBIG2Decode"),) +LITERALS_JPX_DECODE = (LIT("JPXDecode"),) if sys.version_info >= (3, 8): @@ -40,8 +49,14 @@ if sys.version_info >= (3, 8): class DecipherCallable(Protocol): """Fully typed a decipher callback, with optional parameter.""" - def __call__(self, objid: int, genno: int, data: bytes, - attrs: Optional[Dict[str, Any]] = None) -> bytes: + + def __call__( + self, + objid: int, + genno: int, + data: bytes, + attrs: Optional[Dict[str, Any]] = None, + ) -> bytes: raise NotImplementedError else: # Fallback for older Python @@ -75,21 +90,15 @@ class PDFNotImplementedError(PDFException): class PDFObjRef(PDFObject): - - def __init__( - self, - doc: Optional["PDFDocument"], - objid: int, - _: object - ) -> None: + def __init__(self, doc: Optional["PDFDocument"], objid: int, _: object) -> None: if objid == 0: if settings.STRICT: - raise PDFValueError('PDF object id cannot be 0.') + raise PDFValueError("PDF object id cannot be 0.") self.doc = doc self.objid = objid def __repr__(self) -> str: - return '' % (self.objid) + return "" % (self.objid) def resolve(self, default: object = None) -> Any: assert self.doc is not None @@ -126,14 +135,8 @@ def resolve_all(x: object, default: object = None) -> Any: return x -def decipher_all( - decipher: DecipherCallable, - objid: int, - genno: int, - x: object -) -> Any: - """Recursively deciphers the given object. - """ +def decipher_all(decipher: DecipherCallable, objid: int, genno: int, x: object) -> Any: + """Recursively deciphers the given object.""" if isinstance(x, bytes): return decipher(objid, genno, x) if isinstance(x, list): @@ -148,7 +151,7 @@ def int_value(x: object) -> int: x = resolve1(x) if not isinstance(x, int): if settings.STRICT: - raise PDFTypeError('Integer required: %r' % x) + raise PDFTypeError("Integer required: %r" % x) return 0 return x @@ -157,7 +160,7 @@ def float_value(x: object) -> float: x = resolve1(x) if not isinstance(x, float): if settings.STRICT: - raise PDFTypeError('Float required: %r' % x) + raise PDFTypeError("Float required: %r" % x) return 0.0 return x @@ -166,7 +169,7 @@ def num_value(x: object) -> float: x = resolve1(x) if not isinstance(x, (int, float)): # == utils.isnumber(x) if settings.STRICT: - raise PDFTypeError('Int or Float required: %r' % x) + raise PDFTypeError("Int or Float required: %r" % x) return 0 return x @@ -184,8 +187,8 @@ def str_value(x: object) -> bytes: x = resolve1(x) if not isinstance(x, bytes): if settings.STRICT: - raise PDFTypeError('String required: %r' % x) - return b'' + raise PDFTypeError("String required: %r" % x) + return b"" return x @@ -193,7 +196,7 @@ def list_value(x: object) -> Union[List[Any], Tuple[Any, ...]]: x = resolve1(x) if not isinstance(x, (list, tuple)): if settings.STRICT: - raise PDFTypeError('List required: %r' % x) + raise PDFTypeError("List required: %r" % x) return [] return x @@ -202,8 +205,8 @@ def dict_value(x: object) -> Dict[Any, Any]: x = resolve1(x) if not isinstance(x, dict): if settings.STRICT: - logger.error('PDFTypeError : Dict required: %r', x) - raise PDFTypeError('Dict required: %r' % x) + logger.error("PDFTypeError : Dict required: %r", x) + raise PDFTypeError("Dict required: %r" % x) return {} return x @@ -212,8 +215,8 @@ def stream_value(x: object) -> "PDFStream": x = resolve1(x) if not isinstance(x, PDFStream): if settings.STRICT: - raise PDFTypeError('PDFStream required: %r' % x) - return PDFStream({}, b'') + raise PDFTypeError("PDFStream required: %r" % x) + return PDFStream({}, b"") return x @@ -223,7 +226,7 @@ def decompress_corrupted(data: bytes) -> bytes: """ d = zlib.decompressobj() f = io.BytesIO(data) - result_str = b'' + result_str = b"" buffer = f.read(1) i = 0 try: @@ -239,12 +242,11 @@ def decompress_corrupted(data: bytes) -> bytes: class PDFStream(PDFObject): - def __init__( self, attrs: Dict[str, Any], rawdata: bytes, - decipher: Optional[DecipherCallable] = None + decipher: Optional[DecipherCallable] = None, ) -> None: assert isinstance(attrs, dict), str(type(attrs)) self.attrs = attrs @@ -261,12 +263,18 @@ class PDFStream(PDFObject): def __repr__(self) -> str: if self.data is None: assert self.rawdata is not None - return '' % \ - (self.objid, len(self.rawdata), self.attrs) + return "" % ( + self.objid, + len(self.rawdata), + self.attrs, + ) else: assert self.data is not None - return '' % \ - (self.objid, len(self.data), self.attrs) + return "" % ( + self.objid, + len(self.data), + self.attrs, + ) def __contains__(self, name: object) -> bool: return name in self.attrs @@ -284,8 +292,8 @@ class PDFStream(PDFObject): return default def get_filters(self) -> List[Tuple[Any, Any]]: - filters = self.get_any(('F', 'Filter')) - params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) + filters = self.get_any(("F", "Filter")) + params = self.get_any(("DP", "DecodeParms", "FDecodeParms"), {}) if not filters: return [] if not isinstance(filters, list): @@ -298,15 +306,16 @@ class PDFStream(PDFObject): # resolve filter if possible _filters = [] for fltr in filters: - if hasattr(fltr, 'resolve'): + if hasattr(fltr, "resolve"): fltr = fltr.resolve()[0] _filters.append(fltr) # return list solves https://github.com/pdfminer/pdfminer.six/issues/15 return list(zip(_filters, params)) def decode(self) -> None: - assert self.data is None \ - and self.rawdata is not None, str((self.data, self.rawdata)) + assert self.data is None and self.rawdata is not None, str( + (self.data, self.rawdata) + ) data = self.rawdata if self.decipher: # Handle encryption @@ -326,14 +335,13 @@ class PDFStream(PDFObject): except zlib.error as e: if settings.STRICT: - error_msg = 'Invalid zlib bytes: {!r}, {!r}'\ - .format(e, data) + error_msg = "Invalid zlib bytes: {!r}, {!r}".format(e, data) raise PDFException(error_msg) try: data = decompress_corrupted(data) except zlib.error: - data = b'' + data = b"" elif f in LITERALS_LZW_DECODE: data = lzwdecode(data) @@ -356,25 +364,26 @@ class PDFStream(PDFObject): pass elif f == LITERAL_CRYPT: # not yet.. - raise PDFNotImplementedError('/Crypt filter is unsupported') + raise PDFNotImplementedError("/Crypt filter is unsupported") else: - raise PDFNotImplementedError('Unsupported filter: %r' % f) + raise PDFNotImplementedError("Unsupported filter: %r" % f) # apply predictors - if params and 'Predictor' in params: - pred = int_value(params['Predictor']) + if params and "Predictor" in params: + pred = int_value(params["Predictor"]) if pred == 1: # no predictor pass elif 10 <= pred: # PNG predictor - colors = int_value(params.get('Colors', 1)) - columns = int_value(params.get('Columns', 1)) - raw_bits_per_component = params.get('BitsPerComponent', 8) + colors = int_value(params.get("Colors", 1)) + columns = int_value(params.get("Columns", 1)) + raw_bits_per_component = params.get("BitsPerComponent", 8) bitspercomponent = int_value(raw_bits_per_component) - data = apply_png_predictor(pred, colors, columns, - bitspercomponent, data) + data = apply_png_predictor( + pred, colors, columns, bitspercomponent, data + ) else: - error_msg = 'Unsupported predictor: %r' % pred + error_msg = "Unsupported predictor: %r" % pred raise PDFNotImplementedError(error_msg) self.data = data self.rawdata = None diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index e691f40..c7f8a17 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -4,8 +4,19 @@ import logging import re -from typing import (Any, BinaryIO, Dict, Generic, Iterator, List, - Optional, Tuple, Type, TypeVar, Union) +from typing import ( + Any, + BinaryIO, + Dict, + Generic, + Iterator, + List, + Optional, + Tuple, + Type, + TypeVar, + Union, +) from . import settings from .utils import choplist @@ -59,7 +70,7 @@ class PSLiteral(PSObject): def __repr__(self) -> str: name = self.name - return '/%r' % name + return "/%r" % name class PSKeyword(PSObject): @@ -79,10 +90,10 @@ class PSKeyword(PSObject): def __repr__(self) -> str: name = self.name - return '/%r' % name + return "/%r" % name -_SymbolT = TypeVar('_SymbolT', PSLiteral, PSKeyword) +_SymbolT = TypeVar("_SymbolT", PSLiteral, PSKeyword) class PSSymbolTable(Generic[_SymbolT]): @@ -110,25 +121,25 @@ PSLiteralTable = PSSymbolTable(PSLiteral) PSKeywordTable = PSSymbolTable(PSKeyword) LIT = PSLiteralTable.intern KWD = PSKeywordTable.intern -KEYWORD_PROC_BEGIN = KWD(b'{') -KEYWORD_PROC_END = KWD(b'}') -KEYWORD_ARRAY_BEGIN = KWD(b'[') -KEYWORD_ARRAY_END = KWD(b']') -KEYWORD_DICT_BEGIN = KWD(b'<<') -KEYWORD_DICT_END = KWD(b'>>') +KEYWORD_PROC_BEGIN = KWD(b"{") +KEYWORD_PROC_END = KWD(b"}") +KEYWORD_ARRAY_BEGIN = KWD(b"[") +KEYWORD_ARRAY_END = KWD(b"]") +KEYWORD_DICT_BEGIN = KWD(b"<<") +KEYWORD_DICT_END = KWD(b">>") def literal_name(x: object) -> Any: if not isinstance(x, PSLiteral): if settings.STRICT: - raise PSTypeError('Literal required: {!r}'.format(x)) + raise PSTypeError("Literal required: {!r}".format(x)) else: name = x else: name = x.name if not isinstance(name, str): try: - name = str(name, 'utf-8') + name = str(name, "utf-8") except Exception: pass return name @@ -137,34 +148,34 @@ def literal_name(x: object) -> Any: def keyword_name(x: object) -> Any: if not isinstance(x, PSKeyword): if settings.STRICT: - raise PSTypeError('Keyword required: %r' % x) + raise PSTypeError("Keyword required: %r" % x) else: name = x else: - name = str(x.name, 'utf-8', 'ignore') + name = str(x.name, "utf-8", "ignore") return name -EOL = re.compile(br'[\r\n]') -SPC = re.compile(br'\s') -NONSPC = re.compile(br'\S') -HEX = re.compile(br'[0-9a-fA-F]') -END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]') -END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]') -HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.') -END_NUMBER = re.compile(br'[^0-9]') -END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]') -END_STRING = re.compile(br'[()\134]') -OCT_STRING = re.compile(br'[0-7]') +EOL = re.compile(rb"[\r\n]") +SPC = re.compile(rb"\s") +NONSPC = re.compile(rb"\S") +HEX = re.compile(rb"[0-9a-fA-F]") +END_LITERAL = re.compile(rb"[#/%\[\]()<>{}\s]") +END_HEX_STRING = re.compile(rb"[^\s0-9a-fA-F]") +HEX_PAIR = re.compile(rb"[0-9a-fA-F]{2}|.") +END_NUMBER = re.compile(rb"[^0-9]") +END_KEYWORD = re.compile(rb"[#/%\[\]()<>{}\s]") +END_STRING = re.compile(rb"[()\134]") +OCT_STRING = re.compile(rb"[0-7]") ESC_STRING = { - b'b': 8, - b't': 9, - b'n': 10, - b'f': 12, - b'r': 13, - b'(': 40, - b')': 41, - b'\\': 92 + b"b": 8, + b"t": 9, + b"n": 10, + b"f": 12, + b"r": 13, + b"(": 40, + b")": 41, + b"\\": 92, } @@ -173,8 +184,8 @@ PSBaseParserToken = Union[float, bool, PSLiteral, PSKeyword, bytes] class PSBaseParser: - """Most basic PostScript parser that performs only tokenization. - """ + """Most basic PostScript parser that performs only tokenization.""" + BUFSIZ = 4096 def __init__(self, fp: BinaryIO) -> None: @@ -182,8 +193,7 @@ class PSBaseParser: self.seek(0) def __repr__(self) -> str: - return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp, - self.bufpos) + return "<%s: %r, bufpos=%d>" % (self.__class__.__name__, self.fp, self.bufpos) def flush(self) -> None: return @@ -193,29 +203,28 @@ class PSBaseParser: return def tell(self) -> int: - return self.bufpos+self.charpos + return self.bufpos + self.charpos def poll(self, pos: Optional[int] = None, n: int = 80) -> None: pos0 = self.fp.tell() if not pos: - pos = self.bufpos+self.charpos + pos = self.bufpos + self.charpos self.fp.seek(pos) - log.debug('poll(%d): %r', pos, self.fp.read(n)) + log.debug("poll(%d): %r", pos, self.fp.read(n)) self.fp.seek(pos0) return def seek(self, pos: int) -> None: - """Seeks the parser to the given position. - """ - log.debug('seek: %r', pos) + """Seeks the parser to the given position.""" + log.debug("seek: %r", pos) self.fp.seek(pos) # reset the status for nextline() self.bufpos = pos - self.buf = b'' + self.buf = b"" self.charpos = 0 # reset the status for nexttoken() self._parse1 = self._parse_main - self._curtoken = b'' + self._curtoken = b"" self._curtokenpos = 0 self._tokens: List[Tuple[int, PSBaseParserToken]] = [] return @@ -227,37 +236,36 @@ class PSBaseParser: self.bufpos = self.fp.tell() self.buf = self.fp.read(self.BUFSIZ) if not self.buf: - raise PSEOF('Unexpected EOF') + raise PSEOF("Unexpected EOF") self.charpos = 0 return def nextline(self) -> Tuple[int, bytes]: - """Fetches a next line that ends either with \\r or \\n. - """ - linebuf = b'' + """Fetches a next line that ends either with \\r or \\n.""" + linebuf = b"" linepos = self.bufpos + self.charpos eol = False while 1: self.fillbuf() if eol: - c = self.buf[self.charpos:self.charpos+1] + c = self.buf[self.charpos : self.charpos + 1] # handle b'\r\n' - if c == b'\n': + if c == b"\n": linebuf += c self.charpos += 1 break m = EOL.search(self.buf, self.charpos) if m: - linebuf += self.buf[self.charpos:m.end(0)] + linebuf += self.buf[self.charpos : m.end(0)] self.charpos = m.end(0) - if linebuf[-1:] == b'\r': + if linebuf[-1:] == b"\r": eol = True else: break else: - linebuf += self.buf[self.charpos:] + linebuf += self.buf[self.charpos :] self.charpos = len(self.buf) - log.debug('nextline: %r, %r', linepos, linebuf) + log.debug("nextline: %r, %r", linepos, linebuf) return (linepos, linebuf) @@ -268,22 +276,22 @@ class PSBaseParser: """ self.fp.seek(0, 2) pos = self.fp.tell() - buf = b'' + buf = b"" while 0 < pos: prevpos = pos - pos = max(0, pos-self.BUFSIZ) + pos = max(0, pos - self.BUFSIZ) self.fp.seek(pos) - s = self.fp.read(prevpos-pos) + s = self.fp.read(prevpos - pos) if not s: break while 1: - n = max(s.rfind(b'\r'), s.rfind(b'\n')) + n = max(s.rfind(b"\r"), s.rfind(b"\n")) if n == -1: buf = s + buf break yield s[n:] + buf s = s[:n] - buf = b'' + buf = b"" return def _parse_main(self, s: bytes, i: int) -> int: @@ -291,44 +299,44 @@ class PSBaseParser: if not m: return len(s) j = m.start(0) - c = s[j:j+1] - self._curtokenpos = self.bufpos+j - if c == b'%': - self._curtoken = b'%' + c = s[j : j + 1] + self._curtokenpos = self.bufpos + j + if c == b"%": + self._curtoken = b"%" self._parse1 = self._parse_comment - return j+1 - elif c == b'/': - self._curtoken = b'' + return j + 1 + elif c == b"/": + self._curtoken = b"" self._parse1 = self._parse_literal - return j+1 - elif c in b'-+' or c.isdigit(): + return j + 1 + elif c in b"-+" or c.isdigit(): self._curtoken = c self._parse1 = self._parse_number - return j+1 - elif c == b'.': + return j + 1 + elif c == b".": self._curtoken = c self._parse1 = self._parse_float - return j+1 + return j + 1 elif c.isalpha(): self._curtoken = c self._parse1 = self._parse_keyword - return j+1 - elif c == b'(': - self._curtoken = b'' + return j + 1 + elif c == b"(": + self._curtoken = b"" self.paren = 1 self._parse1 = self._parse_string - return j+1 - elif c == b'<': - self._curtoken = b'' + return j + 1 + elif c == b"<": + self._curtoken = b"" self._parse1 = self._parse_wopen - return j+1 - elif c == b'>': - self._curtoken = b'' + return j + 1 + elif c == b">": + self._curtoken = b"" self._parse1 = self._parse_wclose - return j+1 + return j + 1 else: self._add_token(KWD(c)) - return j+1 + return j + 1 def _add_token(self, obj: PSBaseParserToken) -> None: self._tokens.append((self._curtokenpos, obj)) @@ -353,13 +361,13 @@ class PSBaseParser: return len(s) j = m.start(0) self._curtoken += s[i:j] - c = s[j:j+1] - if c == b'#': - self.hex = b'' + c = s[j : j + 1] + if c == b"#": + self.hex = b"" self._parse1 = self._parse_literal_hex - return j+1 + return j + 1 try: - name: Union[str, bytes] = str(self._curtoken, 'utf-8') + name: Union[str, bytes] = str(self._curtoken, "utf-8") except Exception: name = self._curtoken self._add_token(LIT(name)) @@ -367,10 +375,10 @@ class PSBaseParser: return j def _parse_literal_hex(self, s: bytes, i: int) -> int: - c = s[i:i+1] + c = s[i : i + 1] if HEX.match(c) and len(self.hex) < 2: self.hex += c - return i+1 + return i + 1 if self.hex: self._curtoken += bytes((int(self.hex, 16),)) self._parse1 = self._parse_literal @@ -383,11 +391,11 @@ class PSBaseParser: return len(s) j = m.start(0) self._curtoken += s[i:j] - c = s[j:j+1] - if c == b'.': + c = s[j : j + 1] + if c == b".": self._curtoken += c self._parse1 = self._parse_float - return j+1 + return j + 1 try: self._add_token(int(self._curtoken)) except ValueError: @@ -416,9 +424,9 @@ class PSBaseParser: return len(s) j = m.start(0) self._curtoken += s[i:j] - if self._curtoken == b'true': + if self._curtoken == b"true": token: Union[bool, PSKeyword] = True - elif self._curtoken == b'false': + elif self._curtoken == b"false": token = False else: token = KWD(self._curtoken) @@ -433,34 +441,34 @@ class PSBaseParser: return len(s) j = m.start(0) self._curtoken += s[i:j] - c = s[j:j+1] - if c == b'\\': - self.oct = b'' + c = s[j : j + 1] + if c == b"\\": + self.oct = b"" self._parse1 = self._parse_string_1 - return j+1 - if c == b'(': + return j + 1 + if c == b"(": self.paren += 1 self._curtoken += c - return j+1 - if c == b')': + return j + 1 + if c == b")": self.paren -= 1 if self.paren: # WTF, they said balanced parens need no special treatment. self._curtoken += c - return j+1 + return j + 1 self._add_token(self._curtoken) self._parse1 = self._parse_main - return j+1 + return j + 1 def _parse_string_1(self, s: bytes, i: int) -> int: """Parse literal strings PDF Reference 3.2.3 """ - c = s[i:i+1] + c = s[i : i + 1] if OCT_STRING.match(c) and len(self.oct) < 3: self.oct += c - return i+1 + return i + 1 elif self.oct: self._curtoken += bytes((int(self.oct, 8),)) @@ -470,18 +478,18 @@ class PSBaseParser: elif c in ESC_STRING: self._curtoken += bytes((ESC_STRING[c],)) - elif c == b'\r' and len(s) > i+1 and s[i+1:i+2] == b'\n': + elif c == b"\r" and len(s) > i + 1 and s[i + 1 : i + 2] == b"\n": # If current and next character is \r\n skip both because enters # after a \ are ignored i += 1 # default action self._parse1 = self._parse_string - return i+1 + return i + 1 def _parse_wopen(self, s: bytes, i: int) -> int: - c = s[i:i+1] - if c == b'<': + c = s[i : i + 1] + if c == b"<": self._add_token(KEYWORD_DICT_BEGIN) self._parse1 = self._parse_main i += 1 @@ -490,8 +498,8 @@ class PSBaseParser: return i def _parse_wclose(self, s: bytes, i: int) -> int: - c = s[i:i+1] - if c == b'>': + c = s[i : i + 1] + if c == b">": self._add_token(KEYWORD_DICT_END) i += 1 self._parse1 = self._parse_main @@ -504,8 +512,9 @@ class PSBaseParser: return len(s) j = m.start(0) self._curtoken += s[i:j] - token = HEX_PAIR.sub(lambda m: bytes((int(m.group(0), 16),)), - SPC.sub(b'', self._curtoken)) + token = HEX_PAIR.sub( + lambda m: bytes((int(m.group(0), 16),)), SPC.sub(b"", self._curtoken) + ) self._add_token(token) self._parse1 = self._parse_main return j @@ -515,7 +524,7 @@ class PSBaseParser: self.fillbuf() self.charpos = self._parse1(self.buf, self.charpos) token = self._tokens.pop(0) - log.debug('nexttoken: %r', token) + log.debug("nexttoken: %r", token) return token @@ -530,15 +539,13 @@ PSStackEntry = Tuple[int, PSStackType[ExtraT]] class PSStackParser(PSBaseParser, Generic[ExtraT]): - def __init__(self, fp: BinaryIO) -> None: PSBaseParser.__init__(self, fp) self.reset() return def reset(self) -> None: - self.context: List[Tuple[int, Optional[str], - List[PSStackEntry[ExtraT]]]] = [] + self.context: List[Tuple[int, Optional[str], List[PSStackEntry[ExtraT]]]] = [] self.curtype: Optional[str] = None self.curstack: List[PSStackEntry[ExtraT]] = [] self.results: List[PSStackEntry[ExtraT]] = [] @@ -565,25 +572,24 @@ class PSStackParser(PSBaseParser, Generic[ExtraT]): def add_results(self, *objs: PSStackEntry[ExtraT]) -> None: try: - log.debug('add_results: %r', objs) + log.debug("add_results: %r", objs) except Exception: - log.debug('add_results: (unprintable object)') + log.debug("add_results: (unprintable object)") self.results.extend(objs) return def start_type(self, pos: int, type: str) -> None: self.context.append((pos, self.curtype, self.curstack)) (self.curtype, self.curstack) = (type, []) - log.debug('start_type: pos=%r, type=%r', pos, type) + log.debug("start_type: pos=%r, type=%r", pos, type) return def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]: if self.curtype != type: - raise PSTypeError('Type mismatch: {!r} != {!r}' - .format(self.curtype, type)) + raise PSTypeError("Type mismatch: {!r} != {!r}".format(self.curtype, type)) objs = [obj for (_, obj) in self.curstack] (pos, self.curtype, self.curstack) = self.context.pop() - log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs) + log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs) return (pos, objs) def do_keyword(self, pos: int, token: PSKeyword) -> None: @@ -604,47 +610,55 @@ class PSStackParser(PSBaseParser, Generic[ExtraT]): self.push((pos, token)) elif token == KEYWORD_ARRAY_BEGIN: # begin array - self.start_type(pos, 'a') + self.start_type(pos, "a") elif token == KEYWORD_ARRAY_END: # end array try: - self.push(self.end_type('a')) + self.push(self.end_type("a")) except PSTypeError: if settings.STRICT: raise elif token == KEYWORD_DICT_BEGIN: # begin dictionary - self.start_type(pos, 'd') + self.start_type(pos, "d") elif token == KEYWORD_DICT_END: # end dictionary try: - (pos, objs) = self.end_type('d') + (pos, objs) = self.end_type("d") if len(objs) % 2 != 0: - error_msg = 'Invalid dictionary construct: %r' % objs + error_msg = "Invalid dictionary construct: %r" % objs raise PSSyntaxError(error_msg) - d = {literal_name(k): v - for (k, v) in choplist(2, objs) if v is not None} + d = { + literal_name(k): v + for (k, v) in choplist(2, objs) + if v is not None + } self.push((pos, d)) except PSTypeError: if settings.STRICT: raise elif token == KEYWORD_PROC_BEGIN: # begin proc - self.start_type(pos, 'p') + self.start_type(pos, "p") elif token == KEYWORD_PROC_END: # end proc try: - self.push(self.end_type('p')) + self.push(self.end_type("p")) except PSTypeError: if settings.STRICT: raise elif isinstance(token, PSKeyword): - log.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, - token, self.curstack) + log.debug( + "do_keyword: pos=%r, token=%r, stack=%r", pos, token, self.curstack + ) self.do_keyword(pos, token) else: - log.error('unknown token: pos=%r, token=%r, stack=%r', pos, - token, self.curstack) + log.error( + "unknown token: pos=%r, token=%r, stack=%r", + pos, + token, + self.curstack, + ) self.do_keyword(pos, token) raise if self.context: @@ -653,7 +667,7 @@ class PSStackParser(PSBaseParser, Generic[ExtraT]): self.flush() obj = self.results.pop(0) try: - log.debug('nextobject: %r', obj) + log.debug("nextobject: %r", obj) except Exception: - log.debug('nextobject: (unprintable object)') + log.debug("nextobject: (unprintable object)") return obj diff --git a/pdfminer/runlength.py b/pdfminer/runlength.py index b79e18e..7209660 100644 --- a/pdfminer/runlength.py +++ b/pdfminer/runlength.py @@ -20,7 +20,7 @@ def rldecode(data: bytes) -> bytes: (2 to 128) times during decompression. A length value of 128 denotes EOD. """ - decoded = b'' + decoded = b"" i = 0 while i < len(data): length = data[i] @@ -28,13 +28,13 @@ def rldecode(data: bytes) -> bytes: break if length >= 0 and length < 128: - for j in range(i+1, (i+1)+(length+1)): + for j in range(i + 1, (i + 1) + (length + 1)): decoded += bytes((data[j],)) - i = (i+1) + (length+1) + i = (i + 1) + (length + 1) if length > 128: - run = bytes((data[i+1],))*(257-length) + run = bytes((data[i + 1],)) * (257 - length) decoded += run - i = (i+1) + 1 + i = (i + 1) + 1 return decoded diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 77d5f9b..6a35d34 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -6,9 +6,24 @@ import pathlib import string import struct from html import escape -from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator, - List, Optional, Set, TextIO, Tuple, TypeVar, Union, - TYPE_CHECKING, cast) +from typing import ( + Any, + BinaryIO, + Callable, + Dict, + Generic, + Iterable, + Iterator, + List, + Optional, + Set, + TextIO, + Tuple, + TypeVar, + Union, + TYPE_CHECKING, + cast, +) if TYPE_CHECKING: from .layout import LTComponent @@ -30,12 +45,8 @@ class open_filename(object): (str or pathlib.PurePath type is supported) and closes it on exit, (just like `open`), but does nothing for file-like objects. """ - def __init__( - self, - filename: FileOrName, - *args: Any, - **kwargs: Any - ) -> None: + + def __init__(self, filename: FileOrName, *args: Any, **kwargs: Any) -> None: if isinstance(filename, pathlib.PurePath): filename = str(filename) if isinstance(filename, str): @@ -45,17 +56,12 @@ class open_filename(object): self.file_handler = cast(AnyIO, filename) self.closing = False else: - raise TypeError('Unsupported input type: %s' % type(filename)) + raise TypeError("Unsupported input type: %s" % type(filename)) def __enter__(self) -> AnyIO: return self.file_handler - def __exit__( - self, - exc_type: object, - exc_val: object, - exc_tb: object - ) -> None: + def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: if self.closing: self.file_handler.close() @@ -70,7 +76,7 @@ def make_compat_str(o: object) -> str: """Converts everything to string, if bytes guessing the encoding.""" if isinstance(o, bytes): enc = chardet.detect(o) - return o.decode(enc['encoding']) + return o.decode(enc["encoding"]) else: return str(o) @@ -80,20 +86,18 @@ def shorten_str(s: str, size: int) -> str: return s[:size] if len(s) > size: length = (size - 5) // 2 - return '{} ... {}'.format(s[:length], s[-length:]) + return "{} ... {}".format(s[:length], s[-length:]) else: return s def compatible_encode_method( - bytesorstring: Union[bytes, str], - encoding: str = 'utf-8', - erraction: str = 'ignore' + bytesorstring: Union[bytes, str], encoding: str = "utf-8", erraction: str = "ignore" ) -> str: """When Py2 str.encode is called, it often means bytes.encode in Py3. - This does either. - """ + This does either. + """ if isinstance(bytesorstring, str): return bytesorstring assert isinstance(bytesorstring, bytes), str(type(bytesorstring)) @@ -119,11 +123,7 @@ def paeth_predictor(left: int, above: int, upper_left: int) -> int: def apply_png_predictor( - pred: int, - colors: int, - columns: int, - bitspercomponent: int, - data: bytes + pred: int, colors: int, columns: int, bitspercomponent: int, data: bytes ) -> bytes: """Reverse the effect of the PNG predictor @@ -135,12 +135,12 @@ def apply_png_predictor( nbytes = colors * columns * bitspercomponent // 8 bpp = colors * bitspercomponent // 8 # number of bytes per complete pixel - buf = b'' - line_above = b'\x00' * columns + buf = b"" + line_above = b"\x00" * columns for scanline_i in range(0, len(data), nbytes + 1): filter_type = data[scanline_i] - line_encoded = data[scanline_i + 1:scanline_i + 1 + nbytes] - raw = b'' + line_encoded = data[scanline_i + 1 : scanline_i + 1 + nbytes] + raw = b"" if filter_type == 0: # Filter type 0: None @@ -223,10 +223,11 @@ Point = Tuple[float, float] Rect = Tuple[float, float, float, float] Matrix = Tuple[float, float, float, float, float, float] PathSegment = Union[ - Tuple[str], # Literal['h'] - Tuple[str, float, float], # Literal['m', 'l'] - Tuple[str, float, float, float, float], # Literal['v', 'y'] - Tuple[str, float, float, float, float, float, float]] # Literal['c'] + Tuple[str], # Literal['h'] + Tuple[str, float, float], # Literal['m', 'l'] + Tuple[str, float, float, float, float], # Literal['v', 'y'] + Tuple[str, float, float, float, float, float, float], +] # Literal['c'] # Matrix operations MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0) @@ -236,9 +237,14 @@ def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix: (a1, b1, c1, d1, e1, f1) = m1 (a0, b0, c0, d0, e0, f0) = m0 """Returns the multiplication of two matrices.""" - return (a0 * a1 + c0 * b1, b0 * a1 + d0 * b1, - a0 * c1 + c0 * d1, b0 * c1 + d0 * d1, - a0 * e1 + c0 * f1 + e0, b0 * e1 + d0 * f1 + f0) + return ( + a0 * a1 + c0 * b1, + b0 * a1 + d0 * b1, + a0 * c1 + c0 * d1, + b0 * c1 + d0 * d1, + a0 * e1 + c0 * f1 + e0, + b0 * e1 + d0 * f1 + f0, + ) def translate_matrix(m: Matrix, v: Point) -> Matrix: @@ -264,11 +270,12 @@ def apply_matrix_norm(m: Matrix, v: Point) -> Point: # Utility functions + def isnumber(x: object) -> bool: return isinstance(x, (int, float)) -_T = TypeVar('_T') +_T = TypeVar("_T") def uniq(objs: Iterable[_T]) -> Iterator[_T]: @@ -282,10 +289,7 @@ def uniq(objs: Iterable[_T]) -> Iterator[_T]: return -def fsplit( - pred: Callable[[_T], bool], - objs: Iterable[_T] -) -> Tuple[List[_T], List[_T]]: +def fsplit(pred: Callable[[_T], bool], objs: Iterable[_T]) -> Tuple[List[_T], List[_T]]: """Split a list into two classes according to the predicate.""" t = [] f = [] @@ -315,9 +319,7 @@ def get_bound(pts: Iterable[Point]) -> Rect: def pick( - seq: Iterable[_T], - func: Callable[[_T], float], - maxobj: Optional[_T] = None + seq: Iterable[_T], func: Callable[[_T], float], maxobj: Optional[_T] = None ) -> Optional[_T]: """Picks the object obj where func(obj) has the highest value.""" maxscore = None @@ -347,77 +349,303 @@ def nunpack(s: bytes, default: int = 0) -> int: elif length == 1: return ord(s) elif length == 2: - return cast(int, struct.unpack('>H', s)[0]) + return cast(int, struct.unpack(">H", s)[0]) elif length == 3: - return cast(int, struct.unpack('>L', b'\x00' + s)[0]) + return cast(int, struct.unpack(">L", b"\x00" + s)[0]) elif length == 4: - return cast(int, struct.unpack('>L', s)[0]) + return cast(int, struct.unpack(">L", s)[0]) elif length == 8: - return cast(int, struct.unpack('>Q', s)[0]) + return cast(int, struct.unpack(">Q", s)[0]) else: - raise TypeError('invalid length: %d' % length) + raise TypeError("invalid length: %d" % length) -PDFDocEncoding = ''.join(chr(x) for x in ( - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0017, 0x0017, - 0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, - 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, - 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, - 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, - 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, - 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, - 0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, - 0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, - 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, - 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, - 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, -)) +PDFDocEncoding = "".join( + chr(x) + for x in ( + 0x0000, + 0x0001, + 0x0002, + 0x0003, + 0x0004, + 0x0005, + 0x0006, + 0x0007, + 0x0008, + 0x0009, + 0x000A, + 0x000B, + 0x000C, + 0x000D, + 0x000E, + 0x000F, + 0x0010, + 0x0011, + 0x0012, + 0x0013, + 0x0014, + 0x0015, + 0x0017, + 0x0017, + 0x02D8, + 0x02C7, + 0x02C6, + 0x02D9, + 0x02DD, + 0x02DB, + 0x02DA, + 0x02DC, + 0x0020, + 0x0021, + 0x0022, + 0x0023, + 0x0024, + 0x0025, + 0x0026, + 0x0027, + 0x0028, + 0x0029, + 0x002A, + 0x002B, + 0x002C, + 0x002D, + 0x002E, + 0x002F, + 0x0030, + 0x0031, + 0x0032, + 0x0033, + 0x0034, + 0x0035, + 0x0036, + 0x0037, + 0x0038, + 0x0039, + 0x003A, + 0x003B, + 0x003C, + 0x003D, + 0x003E, + 0x003F, + 0x0040, + 0x0041, + 0x0042, + 0x0043, + 0x0044, + 0x0045, + 0x0046, + 0x0047, + 0x0048, + 0x0049, + 0x004A, + 0x004B, + 0x004C, + 0x004D, + 0x004E, + 0x004F, + 0x0050, + 0x0051, + 0x0052, + 0x0053, + 0x0054, + 0x0055, + 0x0056, + 0x0057, + 0x0058, + 0x0059, + 0x005A, + 0x005B, + 0x005C, + 0x005D, + 0x005E, + 0x005F, + 0x0060, + 0x0061, + 0x0062, + 0x0063, + 0x0064, + 0x0065, + 0x0066, + 0x0067, + 0x0068, + 0x0069, + 0x006A, + 0x006B, + 0x006C, + 0x006D, + 0x006E, + 0x006F, + 0x0070, + 0x0071, + 0x0072, + 0x0073, + 0x0074, + 0x0075, + 0x0076, + 0x0077, + 0x0078, + 0x0079, + 0x007A, + 0x007B, + 0x007C, + 0x007D, + 0x007E, + 0x0000, + 0x2022, + 0x2020, + 0x2021, + 0x2026, + 0x2014, + 0x2013, + 0x0192, + 0x2044, + 0x2039, + 0x203A, + 0x2212, + 0x2030, + 0x201E, + 0x201C, + 0x201D, + 0x2018, + 0x2019, + 0x201A, + 0x2122, + 0xFB01, + 0xFB02, + 0x0141, + 0x0152, + 0x0160, + 0x0178, + 0x017D, + 0x0131, + 0x0142, + 0x0153, + 0x0161, + 0x017E, + 0x0000, + 0x20AC, + 0x00A1, + 0x00A2, + 0x00A3, + 0x00A4, + 0x00A5, + 0x00A6, + 0x00A7, + 0x00A8, + 0x00A9, + 0x00AA, + 0x00AB, + 0x00AC, + 0x0000, + 0x00AE, + 0x00AF, + 0x00B0, + 0x00B1, + 0x00B2, + 0x00B3, + 0x00B4, + 0x00B5, + 0x00B6, + 0x00B7, + 0x00B8, + 0x00B9, + 0x00BA, + 0x00BB, + 0x00BC, + 0x00BD, + 0x00BE, + 0x00BF, + 0x00C0, + 0x00C1, + 0x00C2, + 0x00C3, + 0x00C4, + 0x00C5, + 0x00C6, + 0x00C7, + 0x00C8, + 0x00C9, + 0x00CA, + 0x00CB, + 0x00CC, + 0x00CD, + 0x00CE, + 0x00CF, + 0x00D0, + 0x00D1, + 0x00D2, + 0x00D3, + 0x00D4, + 0x00D5, + 0x00D6, + 0x00D7, + 0x00D8, + 0x00D9, + 0x00DA, + 0x00DB, + 0x00DC, + 0x00DD, + 0x00DE, + 0x00DF, + 0x00E0, + 0x00E1, + 0x00E2, + 0x00E3, + 0x00E4, + 0x00E5, + 0x00E6, + 0x00E7, + 0x00E8, + 0x00E9, + 0x00EA, + 0x00EB, + 0x00EC, + 0x00ED, + 0x00EE, + 0x00EF, + 0x00F0, + 0x00F1, + 0x00F2, + 0x00F3, + 0x00F4, + 0x00F5, + 0x00F6, + 0x00F7, + 0x00F8, + 0x00F9, + 0x00FA, + 0x00FB, + 0x00FC, + 0x00FD, + 0x00FE, + 0x00FF, + ) +) def decode_text(s: bytes) -> str: """Decodes a PDFDocEncoding string to Unicode.""" - if s.startswith(b'\xfe\xff'): - return str(s[2:], 'utf-16be', 'ignore') + if s.startswith(b"\xfe\xff"): + return str(s[2:], "utf-16be", "ignore") else: - return ''.join(PDFDocEncoding[c] for c in s) + return "".join(PDFDocEncoding[c] for c in s) def enc(x: str) -> str: """Encodes a string for SGML/XML/HTML""" if isinstance(x, bytes): - return '' + return "" return escape(x) def bbox2str(bbox: Rect) -> str: (x0, y0, x1, y1) = bbox - return '{:.3f},{:.3f},{:.3f},{:.3f}'.format(x0, y0, x1, y1) + return "{:.3f},{:.3f},{:.3f},{:.3f}".format(x0, y0, x1, y1) def matrix2str(m: Matrix) -> str: (a, b, c, d, e, f) = m - return '[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]'\ - .format(a, b, c, d, e, f) + return "[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]".format(a, b, c, d, e, f) def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point: @@ -446,7 +674,7 @@ def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point: return max(0, iw), max(0, ih) -LTComponentT = TypeVar('LTComponentT', bound='LTComponent') +LTComponentT = TypeVar("LTComponentT", bound="LTComponent") class Plane(Generic[LTComponentT]): @@ -465,7 +693,7 @@ class Plane(Generic[LTComponentT]): (self.x0, self.y0, self.x1, self.y1) = bbox def __repr__(self) -> str: - return '' % list(self) + return "" % list(self) def __iter__(self) -> Iterator[LTComponentT]: return (obj for obj in self._seq if obj in self._objs) @@ -524,14 +752,13 @@ class Plane(Generic[LTComponentT]): if obj in done: continue done.add(obj) - if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 \ - or y1 <= obj.y0: + if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 or y1 <= obj.y0: continue yield obj -ROMAN_ONES = ['i', 'x', 'c', 'm'] -ROMAN_FIVES = ['v', 'l', 'd'] +ROMAN_ONES = ["i", "x", "c", "m"] +ROMAN_FIVES = ["v", "l", "d"] def format_int_roman(value: int) -> str: @@ -557,7 +784,7 @@ def format_int_roman(value: int) -> str: result.insert(1 if over_five else 0, ROMAN_ONES[index] * remainder) index += 1 - return ''.join(result) + return "".join(result) def format_int_alpha(value: int) -> str: @@ -571,4 +798,4 @@ def format_int_alpha(value: int) -> str: result.append(string.ascii_lowercase[remainder]) result.reverse() - return ''.join(result) + return "".join(result) diff --git a/setup.py b/setup.py index 66ca485..db2e512 100644 --- a/setup.py +++ b/setup.py @@ -8,52 +8,52 @@ sys.path.append(str(Path(__file__).parent)) import pdfminer as package -with open(path.join(path.abspath(path.dirname(__file__)), 'README.md')) as f: +with open(path.join(path.abspath(path.dirname(__file__)), "README.md")) as f: readme = f.read() setup( - name='pdfminer.six', + name="pdfminer.six", version=package.__version__, - packages=['pdfminer'], - package_data={'pdfminer': ['cmap/*.pickle.gz', 'py.typed']}, + packages=["pdfminer"], + package_data={"pdfminer": ["cmap/*.pickle.gz", "py.typed"]}, install_requires=[ 'chardet ; python_version > "3.0"', - 'cryptography', + "cryptography", ], extras_require={ - "dev": ["pytest", "nox", "mypy == 0.931"], + "dev": ["pytest", "nox", "black", "mypy == 0.931"], "docs": ["sphinx", "sphinx-argparse"], }, - description='PDF parser and analyzer', + description="PDF parser and analyzer", long_description=readme, - long_description_content_type='text/markdown', - license='MIT/X', - author='Yusuke Shinyama + Philippe Guglielmetti', - author_email='pdfminer@goulu.net', - url='https://github.com/pdfminer/pdfminer.six', + long_description_content_type="text/markdown", + license="MIT/X", + author="Yusuke Shinyama + Philippe Guglielmetti", + author_email="pdfminer@goulu.net", + url="https://github.com/pdfminer/pdfminer.six", scripts=[ - 'tools/pdf2txt.py', - 'tools/dumppdf.py', + "tools/pdf2txt.py", + "tools/dumppdf.py", ], keywords=[ - 'pdf parser', - 'pdf converter', - 'layout analysis', - 'text mining', + "pdf parser", + "pdf converter", + "layout analysis", + "text mining", ], - python_requires='>=3.6', + python_requires=">=3.6", classifiers=[ - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3 :: Only', - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Topic :: Text Processing', + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3 :: Only", + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Topic :: Text Processing", ], ) diff --git a/tests/helpers.py b/tests/helpers.py index a623fe0..dc35430 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -2,7 +2,6 @@ import os def absolute_sample_path(relative_sample_path): - sample_dir = os.path.abspath( - os.path.join(os.path.dirname(__file__), '../samples')) + sample_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../samples")) sample_file = os.path.join(sample_dir, relative_sample_path) return sample_file diff --git a/tests/tempfilepath.py b/tests/tempfilepath.py index 39fb816..f847d73 100644 --- a/tests/tempfilepath.py +++ b/tests/tempfilepath.py @@ -4,7 +4,7 @@ import tempfile import os -class TemporaryFilePath(): +class TemporaryFilePath: """Context manager class, which generates temporary file name Coonroraly to standard tempfile.NamedTemporaryFile(), it does not @@ -40,9 +40,9 @@ class TemporaryFilePath(): `tempfile.NamedTemporaryFile` will create and delete a file, and this method only returns the filepath of the non-existing file. """ - with tempfile.NamedTemporaryFile(suffix=self.suffix, - prefix=self.prefix, - dir=self.dir) as file: + with tempfile.NamedTemporaryFile( + suffix=self.suffix, prefix=self.prefix, dir=self.dir + ) as file: self.temp_file_name = file.name return self.temp_file_name diff --git a/tests/test_converter.py b/tests/test_converter.py index da2496f..e9d18e8 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -9,14 +9,14 @@ from pdfminer.pdfinterp import PDFGraphicState class TestPaintPath: def test_paint_path(self): - path = [('m', 6, 7), ('l', 7, 7)] + path = [("m", 6, 7), ("l", 7, 7)] analyzer = self._get_analyzer() analyzer.cur_item = LTContainer([0, 100, 0, 100]) analyzer.paint_path(PDFGraphicState(), False, False, False, path) assert len(analyzer.cur_item._objs) == 1 def test_paint_path_mlllh(self): - path = [('m', 6, 7), ('l', 7, 7), ('l', 7, 91), ('l', 6, 91), ('h',)] + path = [("m", 6, 7), ("l", 7, 7), ("l", 7, 91), ("l", 6, 91), ("h",)] analyzer = self._get_analyzer() analyzer.cur_item = LTContainer([0, 100, 0, 100]) analyzer.paint_path(PDFGraphicState(), False, False, False, path) @@ -25,9 +25,21 @@ class TestPaintPath: def test_paint_path_multiple_mlllh(self): """Path from samples/contrib/issue-00369-excel.pdf""" path = [ - ('m', 6, 7), ('l', 7, 7), ('l', 7, 91), ('l', 6, 91), ('h',), - ('m', 4, 7), ('l', 6, 7), ('l', 6, 91), ('l', 4, 91), ('h',), - ('m', 67, 2), ('l', 68, 2), ('l', 68, 3), ('l', 67, 3), ('h',) + ("m", 6, 7), + ("l", 7, 7), + ("l", 7, 91), + ("l", 6, 91), + ("h",), + ("m", 4, 7), + ("l", 6, 7), + ("l", 6, 91), + ("l", 4, 91), + ("h",), + ("m", 67, 2), + ("l", 68, 2), + ("l", 68, 3), + ("l", 67, 3), + ("h",), ] analyzer = self._get_analyzer() analyzer.cur_item = LTContainer([0, 100, 0, 100]) @@ -177,34 +189,34 @@ class TestPaintPath: return analyzer.cur_item._objs # "c" operator - assert parse([ - ("m", 72.41, 433.89), - ("c", 72.41, 434.45, 71.96, 434.89, 71.41, 434.89), - ])[0].pts == [ + assert parse( + [ + ("m", 72.41, 433.89), + ("c", 72.41, 434.45, 71.96, 434.89, 71.41, 434.89), + ] + )[0].pts == [ (72.41, 433.89), (71.41, 434.89), ] # "v" operator - assert parse([ - ("m", 72.41, 433.89), - ("v", 71.96, 434.89, 71.41, 434.89), - ])[0].pts == [ + assert parse([("m", 72.41, 433.89), ("v", 71.96, 434.89, 71.41, 434.89)])[ + 0 + ].pts == [ (72.41, 433.89), (71.41, 434.89), ] # "y" operator - assert parse([ - ("m", 72.41, 433.89), - ("y", 72.41, 434.45, 71.41, 434.89), - ])[0].pts == [ + assert parse([("m", 72.41, 433.89), ("y", 72.41, 434.45, 71.41, 434.89)])[ + 0 + ].pts == [ (72.41, 433.89), (71.41, 434.89), ] -class TestBinaryDetector(): +class TestBinaryDetector: def test_stringio(self): assert not PDFConverter._is_binary_stream(io.StringIO()) @@ -212,11 +224,11 @@ class TestBinaryDetector(): assert PDFConverter._is_binary_stream(io.BytesIO()) def test_tmpfile(self): - with TemporaryFile(mode='w') as f: + with TemporaryFile(mode="w") as f: assert not PDFConverter._is_binary_stream(f) def test_binary_tmpfile(self): - with TemporaryFile(mode='wb') as f: + with TemporaryFile(mode="wb") as f: assert PDFConverter._is_binary_stream(f) def test_non_file_like_object_defaults_to_binary(self): diff --git a/tests/test_encodingdb.py b/tests/test_encodingdb.py index 455d437..1166492 100644 --- a/tests/test_encodingdb.py +++ b/tests/test_encodingdb.py @@ -13,31 +13,31 @@ from pdfminer.psparser import PSLiteral def test_name2unicode_name_in_agl(): """The name "Lcommaaccent" has a single component, which is mapped to the string U+013B by AGL""" - assert '\u013B' == name2unicode('Lcommaaccent') + assert "\u013B" == name2unicode("Lcommaaccent") def test_name2unicode_uni(): """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B""" - assert '\u013B' == name2unicode('uni013B') + assert "\u013B" == name2unicode("uni013B") def test_name2unicode_uni_lowercase(): """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B""" - assert '\u013B' == name2unicode('uni013b') + assert "\u013B" == name2unicode("uni013b") def test_name2unicode_uni_with_sequence_of_digits(): """The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308""" - assert '\u20AC\u0308' == name2unicode('uni20AC0308') + assert "\u20AC\u0308" == name2unicode("uni20AC0308") def test_name2unicode_uni_with_sequence_of_digits_lowercase(): """The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308""" - assert '\u20AC\u0308' == name2unicode('uni20ac0308') + assert "\u20AC\u0308" == name2unicode("uni20ac0308") def test_name2unicode_uni_empty_string(): @@ -46,7 +46,7 @@ def test_name2unicode_uni_empty_string(): According to the specification this should be mapped to an empty string, but we also want to support lowercase hexadecimals""" - assert '\u20ac' == name2unicode('uni20ac') + assert "\u20ac" == name2unicode("uni20ac") def test_name2unicode_uni_empty_string_long(): @@ -60,7 +60,7 @@ def test_name2unicode_uni_empty_string_long(): glyph name "u1040C. """ with pytest.raises(KeyError): - name2unicode('uniD801DC0C') + name2unicode("uniD801DC0C") def test_name2unicode_uni_empty_string_long_lowercase(): @@ -73,57 +73,59 @@ def test_name2unicode_uni_empty_string_long_lowercase(): This character can be correctly mapped by using the glyph name "u1040C.""" with pytest.raises(KeyError): - name2unicode('uniD801DC0C') + name2unicode("uniD801DC0C") def test_name2unicode_uni_pua(): - """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to - U+F6FB.""" - assert '\uF6FB' == name2unicode('uniF6FB') + """ "Ogoneksmall" and "uniF6FB" both map to the string that corresponds to + U+F6FB.""" + assert "\uF6FB" == name2unicode("uniF6FB") def test_name2unicode_uni_pua_lowercase(): - """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to - U+F6FB.""" - assert '\uF6FB' == name2unicode('unif6fb') + """ "Ogoneksmall" and "uniF6FB" both map to the string that corresponds to + U+F6FB.""" + assert "\uF6FB" == name2unicode("unif6fb") def test_name2unicode_u_with_4_digits(): """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B""" - assert '\u013B' == name2unicode('u013B') + assert "\u013B" == name2unicode("u013B") def test_name2unicode_u_with_4_digits_lowercase(): """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B""" - assert '\u013B' == name2unicode('u013b') + assert "\u013B" == name2unicode("u013b") def test_name2unicode_u_with_5_digits(): """The name "u1040C" has a single component, which is mapped to the string - U+1040C""" - assert '\U0001040C' == name2unicode('u1040C') + U+1040C""" + assert "\U0001040C" == name2unicode("u1040C") def test_name2unicode_u_with_5_digits_lowercase(): """The name "u1040C" has a single component, which is mapped to the string - U+1040C""" - assert '\U0001040C' == name2unicode('u1040c') + U+1040C""" + assert "\U0001040C" == name2unicode("u1040c") def test_name2unicode_multiple_components(): """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the string U+013B U+20AC U+0308 U+1040C""" - assert '\u013B\u20AC\u0308\U0001040C' == \ - name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate') + assert "\u013B\u20AC\u0308\U0001040C" == name2unicode( + "Lcommaaccent_uni20AC0308_u1040C.alternate" + ) def test_name2unicode_multiple_components_lowercase(): """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the - string U+013B U+20AC U+0308 U+1040C""" - assert '\u013B\u20AC\u0308\U0001040C' == \ - name2unicode('Lcommaaccent_uni20ac0308_u1040c.alternate') + string U+013B U+20AC U+0308 U+1040C""" + assert "\u013B\u20AC\u0308\U0001040C" == name2unicode( + "Lcommaaccent_uni20ac0308_u1040c.alternate" + ) def test_name2unicode_foo(): @@ -131,26 +133,26 @@ def test_name2unicode_foo(): because 'foo' is not in AGL, and because it does not start with a 'u.'""" with pytest.raises(KeyError): - name2unicode('foo') + name2unicode("foo") def test_name2unicode_notdef(): """The name ".notdef" is reduced to an empty string (step 1) and mapped to an empty string (step 3)""" with pytest.raises(KeyError): - name2unicode('.notdef') + name2unicode(".notdef") def test_name2unicode_pua_ogoneksmall(): - """" + """ " Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB.""" - assert '\uF6FB' == name2unicode('Ogoneksmall') + assert "\uF6FB" == name2unicode("Ogoneksmall") def test_name2unicode_overflow_error(): with pytest.raises(KeyError): - name2unicode('226215240241240240240240') + name2unicode("226215240241240240240240") def test_get_encoding_with_invalid_differences(): @@ -158,5 +160,5 @@ def test_get_encoding_with_invalid_differences(): Regression test for https://github.com/pdfminer/pdfminer.six/issues/385 """ - invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')] - EncodingDB.get_encoding('StandardEncoding', invalid_differences) + invalid_differences = [PSLiteral("ubuntu"), PSLiteral("1234")] + EncodingDB.get_encoding("StandardEncoding", invalid_differences) diff --git a/tests/test_font_size.py b/tests/test_font_size.py index 1c388a6..fca808c 100644 --- a/tests/test_font_size.py +++ b/tests/test_font_size.py @@ -4,7 +4,7 @@ from pdfminer.layout import LTChar, LTTextBox def test_font_size(): - path = absolute_sample_path('font-size-test.pdf') + path = absolute_sample_path("font-size-test.pdf") for page in extract_pages(path): for text_box in page: if isinstance(text_box, LTTextBox): diff --git a/tests/test_highlevel_extracttext.py b/tests/test_highlevel_extracttext.py index 0fa1c17..b7733c0 100644 --- a/tests/test_highlevel_extracttext.py +++ b/tests/test_highlevel_extracttext.py @@ -22,19 +22,19 @@ def run_with_file(sample_path): test_strings = { "simple1.pdf": "Hello \n\nWorld\n\nHello \n\nWorld\n\n" - "H e l l o \n\nW o r l d\n\n" - "H e l l o \n\nW o r l d\n\n\f", + "H e l l o \n\nW o r l d\n\n" + "H e l l o \n\nW o r l d\n\n\f", "simple1.pdf_no_boxes_flow": "Hello \n\nWorld\n\nHello \n\nWorld\n\n" - "H e l l o \n\nW o r l d\n\n" - "H e l l o \n\nW o r l d\n\n\f", + "H e l l o \n\nW o r l d\n\n" + "H e l l o \n\nW o r l d\n\n\f", "simple2.pdf": "\f", "simple3.pdf": "Hello\n\nHello\nあ\nい\nう\nえ\nお\nあ\nい\nう\nえ\nお\n" - "World\n\nWorld\n\n\f", + "World\n\nWorld\n\n\f", "simple4.pdf": "Text1\nText2\nText3\n\n\f", "simple5.pdf": "Heading\n\n" - "Link to heading that is working with vim-pandoc.\n\n" - "Link to heading “that is” not working with vim-pandoc.\n\n" - "Subheading\n\nSome “more text”\n\n1\n\n\f", + "Link to heading that is working with vim-pandoc.\n\n" + "Link to heading “that is” not working with vim-pandoc.\n\n" + "Subheading\n\nSome “more text”\n\n1\n\n\f", "zen_of_python_corrupted.pdf": "Mai 30, 18 13:27\n\nzen_of_python.txt", "contrib/issue_566_test_1.pdf": "ISSUE Date:2019-4-25 Buyer:黎荣", "contrib/issue_566_test_2.pdf": "甲方:中国饮料有限公司(盖章)", @@ -102,7 +102,7 @@ class TestExtractText(unittest.TestCase): test_file = "zen_of_python_corrupted.pdf" s = run_with_file(test_file) expected = test_strings[test_file] - self.assertEqual(s[:len(expected)], expected) + self.assertEqual(s[: len(expected)], expected) def test_issue_566_cmap_bytes(self): test_file = "contrib/issue_566_test_1.pdf" @@ -129,37 +129,43 @@ class TestExtractPages(unittest.TestCase): def test_line_margin(self): # The lines have margin 0.2 relative to the height. # Extract with line_margin 0.19 should break into 3 separate textboxes. - pages = list(extract_pages( - self._get_test_file_path(), laparams=LAParams(line_margin=0.19))) + pages = list( + extract_pages( + self._get_test_file_path(), laparams=LAParams(line_margin=0.19) + ) + ) self.assertEqual(len(pages), 1) page = pages[0] - elements = [element for element in page - if isinstance(element, LTTextContainer)] + elements = [element for element in page if isinstance(element, LTTextContainer)] self.assertEqual(len(elements), 3) self.assertEqual(elements[0].get_text(), "Text1\n") self.assertEqual(elements[1].get_text(), "Text2\n") self.assertEqual(elements[2].get_text(), "Text3\n") # Extract with line_margin 0.21 should merge into one textbox. - pages = list(extract_pages( - self._get_test_file_path(), laparams=LAParams(line_margin=0.21))) + pages = list( + extract_pages( + self._get_test_file_path(), laparams=LAParams(line_margin=0.21) + ) + ) self.assertEqual(len(pages), 1) page = pages[0] - elements = [element for element in page - if isinstance(element, LTTextContainer)] + elements = [element for element in page if isinstance(element, LTTextContainer)] self.assertEqual(len(elements), 1) self.assertEqual(elements[0].get_text(), "Text1\nText2\nText3\n") def test_no_boxes_flow(self): - pages = list(extract_pages( - self._get_test_file_path(), laparams=LAParams(boxes_flow=None))) + pages = list( + extract_pages( + self._get_test_file_path(), laparams=LAParams(boxes_flow=None) + ) + ) self.assertEqual(len(pages), 1) page = pages[0] - elements = [element for element in page - if isinstance(element, LTTextContainer)] + elements = [element for element in page if isinstance(element, LTTextContainer)] self.assertEqual(len(elements), 1) self.assertEqual(elements[0].get_text(), "Text1\nText2\nText3\n") diff --git a/tests/test_layout.py b/tests/test_layout.py index 1d06231..ce13fc9 100644 --- a/tests/test_layout.py +++ b/tests/test_layout.py @@ -46,8 +46,7 @@ class TestFindNeigbors(unittest.TestCase): right_aligned_below.set_bbox((15, 2, 20, 4)) plane.add(right_aligned_below) - centrally_aligned_overlapping = LTTextLineHorizontal( - laparams.word_margin) + centrally_aligned_overlapping = LTTextLineHorizontal(laparams.word_margin) centrally_aligned_overlapping.set_bbox((13, 5, 17, 7)) plane.add(centrally_aligned_overlapping) @@ -86,8 +85,7 @@ class TestFindNeigbors(unittest.TestCase): top_aligned_left.set_bbox((2, 15, 4, 20)) plane.add(top_aligned_left) - centrally_aligned_overlapping = LTTextLineVertical( - laparams.word_margin) + centrally_aligned_overlapping = LTTextLineVertical(laparams.word_margin) centrally_aligned_overlapping.set_bbox((5, 13, 7, 17)) plane.add(centrally_aligned_overlapping) diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py index 8530b0b..3c1f243 100644 --- a/tests/test_pdfdocument.py +++ b/tests/test_pdfdocument.py @@ -9,9 +9,8 @@ from pdfminer.pdftypes import PDFObjectNotFound, dict_value, int_value class TestPdfDocument(object): - def test_get_zero_objid_raises_pdfobjectnotfound(self): - with open(absolute_sample_path('simple1.pdf'), 'rb') as in_file: + with open(absolute_sample_path("simple1.pdf"), "rb") as in_file: parser = PDFParser(in_file) doc = PDFDocument(parser) with pytest.raises(PDFObjectNotFound): @@ -21,24 +20,29 @@ class TestPdfDocument(object): # Some documents may be encrypted but not have an /ID key in # their trailer. Tests # https://github.com/pdfminer/pdfminer.six/issues/594 - path = absolute_sample_path('encryption/encrypted_doc_no_id.pdf') - with open(path, 'rb') as fp: + path = absolute_sample_path("encryption/encrypted_doc_no_id.pdf") + with open(path, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser) - assert doc.info == [{'Producer': b'European Patent Office'}] + assert doc.info == [{"Producer": b"European Patent Office"}] def test_page_labels(self): - path = absolute_sample_path('contrib/pagelabels.pdf') - with open(path, 'rb') as fp: + path = absolute_sample_path("contrib/pagelabels.pdf") + with open(path, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser) - total_pages = int_value(dict_value(doc.catalog['Pages'])['Count']) - assert list(itertools.islice(doc.get_page_labels(), total_pages)) \ - == ['iii', 'iv', '1', '2', '1'] + total_pages = int_value(dict_value(doc.catalog["Pages"])["Count"]) + assert list(itertools.islice(doc.get_page_labels(), total_pages)) == [ + "iii", + "iv", + "1", + "2", + "1", + ] def test_no_page_labels(self): - path = absolute_sample_path('simple1.pdf') - with open(path, 'rb') as fp: + path = absolute_sample_path("simple1.pdf") + with open(path, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser) diff --git a/tests/test_pdfencoding.py b/tests/test_pdfencoding.py index bbbe887..ebf6592 100644 --- a/tests/test_pdfencoding.py +++ b/tests/test_pdfencoding.py @@ -9,96 +9,95 @@ from pdfminer.psparser import PSLiteral class TestPDFEncoding: - def test_cmapname_onebyteidentityV(self): - stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityV')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("OneByteIdentityV")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMapByte) def test_cmapname_onebyteidentityH(self): - stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityH')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("OneByteIdentityH")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMapByte) def test_cmapname_V(self): - stream = PDFStream({'CMapName': PSLiteral('V')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("V")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, CMap) def test_cmapname_H(self): - stream = PDFStream({'CMapName': PSLiteral('H')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("H")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, CMap) def test_encoding_identityH(self): - spec = {'Encoding': PSLiteral('Identity-H')} + spec = {"Encoding": PSLiteral("Identity-H")} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_identityV(self): - spec = {'Encoding': PSLiteral('Identity-V')} + spec = {"Encoding": PSLiteral("Identity-V")} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_identityH_as_PSLiteral_stream(self): - stream = PDFStream({'CMapName': PSLiteral('Identity-H')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("Identity-H")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_identityV_as_PSLiteral_stream(self): - stream = PDFStream({'CMapName': PSLiteral('Identity-V')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("Identity-V")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_identityH_as_stream(self): - stream = PDFStream({'CMapName': 'Identity-H'}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": "Identity-H"}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_identityV_as_stream(self): - stream = PDFStream({'CMapName': 'Identity-V'}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": "Identity-V"}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_DLIdentH(self): - spec = {'Encoding': PSLiteral('DLIdent-H')} + spec = {"Encoding": PSLiteral("DLIdent-H")} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_DLIdentV(self): - spec = {'Encoding': PSLiteral('DLIdent-V')} + spec = {"Encoding": PSLiteral("DLIdent-V")} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_DLIdentH_as_PSLiteral_stream(self): - stream = PDFStream({'CMapName': PSLiteral('DLIdent-H')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("DLIdent-H")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_DLIdentV_as_PSLiteral_stream(self): - stream = PDFStream({'CMapName': PSLiteral('DLIdent-V')}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": PSLiteral("DLIdent-V")}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_DLIdentH_as_stream(self): - stream = PDFStream({'CMapName': 'DLIdent-H'}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": "DLIdent-H"}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) def test_encoding_DLIdentV_as_stream(self): - stream = PDFStream({'CMapName': 'DLIdent-V'}, '') - spec = {'Encoding': stream} + stream = PDFStream({"CMapName": "DLIdent-V"}, "") + spec = {"Encoding": stream} font = PDFCIDFont(None, spec) assert isinstance(font.cmap, IdentityCMap) diff --git a/tests/test_pdffont.py b/tests/test_pdffont.py index e880b49..6633255 100644 --- a/tests/test_pdffont.py +++ b/tests/test_pdffont.py @@ -8,12 +8,12 @@ def test_get_cmap_from_pickle(): Regression test for https://github.com/pdfminer/pdfminer.six/issues/391 """ - cmap_name = 'UniGB-UCS2-H' - spec = {'Encoding': PSLiteral(cmap_name)} + cmap_name = "UniGB-UCS2-H" + spec = {"Encoding": PSLiteral(cmap_name)} resource_manager = PDFResourceManager() font = PDFCIDFont(resource_manager, spec) cmap = font.get_cmap_from_spec(spec, False) - assert cmap.attrs.get('CMapName') == cmap_name + assert cmap.attrs.get("CMapName") == cmap_name assert len(cmap.code2cid) > 0 diff --git a/tests/test_pdfminer_ccitt.py b/tests/test_pdfminer_ccitt.py index 785ad88..5f2b707 100644 --- a/tests/test_pdfminer_ccitt.py +++ b/tests/test_pdfminer_ccitt.py @@ -1,7 +1,7 @@ from pdfminer.ccitt import CCITTG4Parser, CCITTFaxDecoder -class TestCCITTG4Parser(): +class TestCCITTG4Parser: def get_parser(self, bits): parser = CCITTG4Parser(len(bits)) parser._curline = [int(c) for c in bits] @@ -9,60 +9,60 @@ class TestCCITTG4Parser(): return parser def test_b1(self): - parser = self.get_parser('00000') + parser = self.get_parser("00000") parser._do_vertical(0) assert parser._curpos == 0 return def test_b2(self): - parser = self.get_parser('10000') + parser = self.get_parser("10000") parser._do_vertical(-1) assert parser._curpos == 0 return def test_b3(self): - parser = self.get_parser('000111') + parser = self.get_parser("000111") parser._do_pass() assert parser._curpos == 3 - assert parser._get_bits() == '111' + assert parser._get_bits() == "111" return def test_b4(self): - parser = self.get_parser('00000') + parser = self.get_parser("00000") parser._do_vertical(+2) assert parser._curpos == 2 - assert parser._get_bits() == '11' + assert parser._get_bits() == "11" return def test_b5(self): - parser = self.get_parser('11111111100') + parser = self.get_parser("11111111100") parser._do_horizontal(0, 3) assert parser._curpos == 3 parser._do_vertical(1) assert parser._curpos == 10 - assert parser._get_bits() == '0001111111' + assert parser._get_bits() == "0001111111" return def test_e1(self): - parser = self.get_parser('10000') + parser = self.get_parser("10000") parser._do_vertical(0) assert parser._curpos == 1 parser._do_vertical(0) assert parser._curpos == 5 - assert parser._get_bits() == '10000' + assert parser._get_bits() == "10000" return def test_e2(self): - parser = self.get_parser('10011') + parser = self.get_parser("10011") parser._do_vertical(0) assert parser._curpos == 1 parser._do_vertical(2) assert parser._curpos == 5 - assert parser._get_bits() == '10000' + assert parser._get_bits() == "10000" return def test_e3(self): - parser = self.get_parser('011111') + parser = self.get_parser("011111") parser._color = 0 parser._do_vertical(0) assert parser._color == 1 @@ -72,90 +72,90 @@ class TestCCITTG4Parser(): assert parser._curpos == 4 parser._do_vertical(0) assert parser._curpos == 6 - assert parser._get_bits() == '011100' + assert parser._get_bits() == "011100" return def test_e4(self): - parser = self.get_parser('10000') + parser = self.get_parser("10000") parser._do_vertical(0) assert parser._curpos == 1 parser._do_vertical(-2) assert parser._curpos == 3 parser._do_vertical(0) assert parser._curpos == 5 - assert parser._get_bits() == '10011' + assert parser._get_bits() == "10011" return def test_e5(self): - parser = self.get_parser('011000') + parser = self.get_parser("011000") parser._color = 0 parser._do_vertical(0) assert parser._curpos == 1 parser._do_vertical(3) assert parser._curpos == 6 - assert parser._get_bits() == '011111' + assert parser._get_bits() == "011111" return def test_e6(self): - parser = self.get_parser('11001') + parser = self.get_parser("11001") parser._do_pass() assert parser._curpos == 4 parser._do_vertical(0) assert parser._curpos == 5 - assert parser._get_bits() == '11111' + assert parser._get_bits() == "11111" return def test_e7(self): - parser = self.get_parser('0000000000') + parser = self.get_parser("0000000000") parser._curpos = 2 parser._color = 1 parser._do_horizontal(2, 6) assert parser._curpos == 10 - assert parser._get_bits() == '1111000000' + assert parser._get_bits() == "1111000000" return def test_e8(self): - parser = self.get_parser('001100000') + parser = self.get_parser("001100000") parser._curpos = 1 parser._color = 0 parser._do_vertical(0) assert parser._curpos == 2 parser._do_horizontal(7, 0) assert parser._curpos == 9 - assert parser._get_bits() == '101111111' + assert parser._get_bits() == "101111111" return def test_m1(self): - parser = self.get_parser('10101') + parser = self.get_parser("10101") parser._do_pass() assert parser._curpos == 2 parser._do_pass() assert parser._curpos == 4 - assert parser._get_bits() == '1111' + assert parser._get_bits() == "1111" return def test_m2(self): - parser = self.get_parser('101011') + parser = self.get_parser("101011") parser._do_vertical(-1) parser._do_vertical(-1) parser._do_vertical(1) parser._do_horizontal(1, 1) - assert parser._get_bits() == '011101' + assert parser._get_bits() == "011101" return def test_m3(self): - parser = self.get_parser('10111011') + parser = self.get_parser("10111011") parser._do_vertical(-1) parser._do_pass() parser._do_vertical(1) parser._do_vertical(1) - assert parser._get_bits() == '00000001' + assert parser._get_bits() == "00000001" return class TestCCITTFaxDecoder: def test_b1(self): decoder = CCITTFaxDecoder(5) - decoder.output_line(0, b'0') - assert decoder.close() == b'\x80' + decoder.output_line(0, b"0") + assert decoder.close() == b"\x80" return diff --git a/tests/test_pdfminer_crypto.py b/tests/test_pdfminer_crypto.py index cb7f733..022d516 100644 --- a/tests/test_pdfminer_crypto.py +++ b/tests/test_pdfminer_crypto.py @@ -18,36 +18,37 @@ def dehex(b): return binascii.unhexlify(b) -class TestAscii85(): +class TestAscii85: def test_ascii85decode(self): """The sample string is taken from: http://en.wikipedia.org/w/index.php?title=Ascii85""" - assert ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q') \ - == b'Man is distinguished' - assert ascii85decode(b'E,9)oF*2M7/c~>') == b'pleasure.' + assert ascii85decode(b"9jqo^BlbD-BleB1DJ+*+F(f,q") == b"Man is distinguished" + assert ascii85decode(b"E,9)oF*2M7/c~>") == b"pleasure." def test_asciihexdecode(self): - assert asciihexdecode(b'61 62 2e6364 65') == b'ab.cde' - assert asciihexdecode(b'61 62 2e6364 657>') == b'ab.cdep' - assert asciihexdecode(b'7>') == b'p' + assert asciihexdecode(b"61 62 2e6364 65") == b"ab.cde" + assert asciihexdecode(b"61 62 2e6364 657>") == b"ab.cdep" + assert asciihexdecode(b"7>") == b"p" -class TestArcfour(): +class TestArcfour: def test(self): - assert hex(Arcfour(b'Key').process(b'Plaintext')) \ - == b'bbf316e8d940af0ad3' - assert hex(Arcfour(b'Wiki').process(b'pedia')) == b'1021bf0420' - assert hex(Arcfour(b'Secret').process(b'Attack at dawn')) \ - == b'45a01f645fc35b383552544b9bf5' + assert hex(Arcfour(b"Key").process(b"Plaintext")) == b"bbf316e8d940af0ad3" + assert hex(Arcfour(b"Wiki").process(b"pedia")) == b"1021bf0420" + assert ( + hex(Arcfour(b"Secret").process(b"Attack at dawn")) + == b"45a01f645fc35b383552544b9bf5" + ) -class TestLzw(): +class TestLzw: def test_lzwdecode(self): - assert lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01') \ - == b'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42' + assert ( + lzwdecode(b"\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01") + == b"\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42" + ) -class TestRunlength(): +class TestRunlength: def test_rldecode(self): - assert rldecode(b'\x05123456\xfa7\x04abcde\x80junk') \ - == b'1234567777777abcde' + assert rldecode(b"\x05123456\xfa7\x04abcde\x80junk") == b"1234567777777abcde" diff --git a/tests/test_pdfminer_psparser.py b/tests/test_pdfminer_psparser.py index 12f1d70..4024fc7 100644 --- a/tests/test_pdfminer_psparser.py +++ b/tests/test_pdfminer_psparser.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) class TestPSBaseParser: """Simplistic Test cases""" - TESTDATA = br'''%!PS + TESTDATA = rb"""%!PS begin end " @ # /a/BCD /Some_Name /foo#5f#xbaa @@ -26,33 +26,83 @@ baa) func/a/b{(c)do*}def [ 1 (z) ! ] << /foo (bar) >> -''' +""" TOKENS = [ - (5, KWD(b'begin')), (11, KWD(b'end')), (16, KWD(b'"')), - (19, KWD(b'@')), (21, KWD(b'#')), (23, LIT('a')), (25, LIT('BCD')), - (30, LIT('Some_Name')), (41, LIT('foo_xbaa')), (54, 0), (56, 1), - (59, -2), (62, 0.5), (65, 1.234), (71, b'abc'), (77, b''), - (80, b'abc ( def ) ghi'), (98, b'def \x00 4ghi'), - (118, b'bach\\slask'), (132, b'foo\nbaa'), - (143, b'this % is not a comment.'), (170, b'foo\nbaa'), - (180, b'foobaa'), (191, b''), (194, b' '), (199, b'@@ '), - (211, b'\xab\xcd\x00\x124\x05'), (226, KWD(b'func')), (230, LIT('a')), - (232, LIT('b')), (234, KWD(b'{')), (235, b'c'), (238, KWD(b'do*')), - (241, KWD(b'}')), (242, KWD(b'def')), (246, KWD(b'[')), (248, 1), - (250, b'z'), (254, KWD(b'!')), (256, KWD(b']')), (258, KWD(b'<<')), - (261, LIT('foo')), (266, b'bar'), (272, KWD(b'>>')) + (5, KWD(b"begin")), + (11, KWD(b"end")), + (16, KWD(b'"')), + (19, KWD(b"@")), + (21, KWD(b"#")), + (23, LIT("a")), + (25, LIT("BCD")), + (30, LIT("Some_Name")), + (41, LIT("foo_xbaa")), + (54, 0), + (56, 1), + (59, -2), + (62, 0.5), + (65, 1.234), + (71, b"abc"), + (77, b""), + (80, b"abc ( def ) ghi"), + (98, b"def \x00 4ghi"), + (118, b"bach\\slask"), + (132, b"foo\nbaa"), + (143, b"this % is not a comment."), + (170, b"foo\nbaa"), + (180, b"foobaa"), + (191, b""), + (194, b" "), + (199, b"@@ "), + (211, b"\xab\xcd\x00\x124\x05"), + (226, KWD(b"func")), + (230, LIT("a")), + (232, LIT("b")), + (234, KWD(b"{")), + (235, b"c"), + (238, KWD(b"do*")), + (241, KWD(b"}")), + (242, KWD(b"def")), + (246, KWD(b"[")), + (248, 1), + (250, b"z"), + (254, KWD(b"!")), + (256, KWD(b"]")), + (258, KWD(b"<<")), + (261, LIT("foo")), + (266, b"bar"), + (272, KWD(b">>")), ] OBJS = [ - (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')), - (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5), - (65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'), - (98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'), - (143, b'this % is not a comment.'), (170, b'foo\nbaa'), - (180, b'foobaa'), (191, b''), (194, b' '), (199, b'@@ '), - (211, b'\xab\xcd\x00\x124\x05'), (230, LIT('a')), (232, LIT('b')), - (234, [b'c']), (246, [1, b'z']), (258, {'foo': b'bar'}), + (23, LIT("a")), + (25, LIT("BCD")), + (30, LIT("Some_Name")), + (41, LIT("foo_xbaa")), + (54, 0), + (56, 1), + (59, -2), + (62, 0.5), + (65, 1.234), + (71, b"abc"), + (77, b""), + (80, b"abc ( def ) ghi"), + (98, b"def \x00 4ghi"), + (118, b"bach\\slask"), + (132, b"foo\nbaa"), + (143, b"this % is not a comment."), + (170, b"foo\nbaa"), + (180, b"foobaa"), + (191, b""), + (194, b" "), + (199, b"@@ "), + (211, b"\xab\xcd\x00\x124\x05"), + (230, LIT("a")), + (232, LIT("b")), + (234, [b"c"]), + (246, [1, b"z"]), + (258, {"foo": b"bar"}), ] def get_tokens(self, s): diff --git a/tests/test_pdfpage.py b/tests/test_pdfpage.py index 0c0c6a6..0d3109f 100644 --- a/tests/test_pdfpage.py +++ b/tests/test_pdfpage.py @@ -6,10 +6,10 @@ from pdfminer.pdfparser import PDFParser class TestPdfPage(object): def test_page_labels(self): - path = absolute_sample_path('contrib/pagelabels.pdf') - expected_labels = ['iii', 'iv', '1', '2', '1'] + path = absolute_sample_path("contrib/pagelabels.pdf") + expected_labels = ["iii", "iv", "1", "2", "1"] - with open(path, 'rb') as fp: + with open(path, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser) for (i, page) in enumerate(PDFPage.create_pages(doc)): diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py index 9c71782..84e3111 100644 --- a/tests/test_tools_dumppdf.py +++ b/tests/test_tools_dumppdf.py @@ -11,48 +11,47 @@ def run(filename, options=None): absolute_path = absolute_sample_path(filename) with TemporaryFilePath() as output_file_name: if options: - s = 'dumppdf -o %s %s %s' % (output_file_name, - options, absolute_path) + s = "dumppdf -o %s %s %s" % (output_file_name, options, absolute_path) else: - s = 'dumppdf -o %s %s' % (output_file_name, absolute_path) + s = "dumppdf -o %s %s" % (output_file_name, absolute_path) - dumppdf.main(s.split(' ')[1:]) + dumppdf.main(s.split(" ")[1:]) class TestDumpPDF(unittest.TestCase): def test_simple1(self): - run('simple1.pdf', '-t -a') + run("simple1.pdf", "-t -a") def test_simple2(self): - run('simple2.pdf', '-t -a') + run("simple2.pdf", "-t -a") def test_jo(self): - run('jo.pdf', '-t -a') + run("jo.pdf", "-t -a") def test_simple3(self): - run('simple3.pdf', '-t -a') + run("simple3.pdf", "-t -a") def test_2(self): - run('nonfree/dmca.pdf', '-t -a') + run("nonfree/dmca.pdf", "-t -a") def test_3(self): - run('nonfree/f1040nr.pdf') + run("nonfree/f1040nr.pdf") def test_4(self): - run('nonfree/i1040nr.pdf') + run("nonfree/i1040nr.pdf") def test_5(self): - run('nonfree/kampo.pdf', '-t -a') + run("nonfree/kampo.pdf", "-t -a") def test_6(self): - run('nonfree/naacl06-shinyama.pdf', '-t -a') + run("nonfree/naacl06-shinyama.pdf", "-t -a") def test_simple1_raw(self): """Known issue: crash in dumpxml writing binary to text stream.""" with pytest.raises(TypeError): - run('simple1.pdf', '-r -a') + run("simple1.pdf", "-r -a") def test_simple1_binary(self): """Known issue: crash in dumpxml writing binary to text stream.""" with pytest.raises(TypeError): - run('simple1.pdf', '-b -a') + run("simple1.pdf", "-b -a") diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py index f951a32..5e2b75a 100644 --- a/tests/test_tools_pdf2txt.py +++ b/tests/test_tools_pdf2txt.py @@ -12,115 +12,119 @@ def run(sample_path, options=None): absolute_path = absolute_sample_path(sample_path) with TemporaryFilePath() as output_file_name: if options: - s = 'pdf2txt -o{} {} {}' \ - .format(output_file_name, options, absolute_path) + s = "pdf2txt -o{} {} {}".format(output_file_name, options, absolute_path) else: - s = 'pdf2txt -o{} {}'.format(output_file_name, absolute_path) + s = "pdf2txt -o{} {}".format(output_file_name, absolute_path) - pdf2txt.main(s.split(' ')[1:]) + pdf2txt.main(s.split(" ")[1:]) -class TestPdf2Txt(): +class TestPdf2Txt: def test_jo(self): - run('jo.pdf') + run("jo.pdf") def test_simple1(self): - run('simple1.pdf') + run("simple1.pdf") def test_simple2(self): - run('simple2.pdf') + run("simple2.pdf") def test_simple3(self): - run('simple3.pdf') + run("simple3.pdf") def test_sample_one_byte_identity_encode(self): - run('sampleOneByteIdentityEncode.pdf') + run("sampleOneByteIdentityEncode.pdf") def test_nonfree_175(self): """Regression test for: https://github.com/pdfminer/pdfminer.six/issues/65 """ - run('nonfree/175.pdf') + run("nonfree/175.pdf") def test_nonfree_dmca(self): - run('nonfree/dmca.pdf') + run("nonfree/dmca.pdf") def test_nonfree_f1040nr(self): - run('nonfree/f1040nr.pdf', '-p 1') + run("nonfree/f1040nr.pdf", "-p 1") def test_nonfree_i1040nr(self): - run('nonfree/i1040nr.pdf', '-p 1') + run("nonfree/i1040nr.pdf", "-p 1") def test_nonfree_kampo(self): - run('nonfree/kampo.pdf') + run("nonfree/kampo.pdf") def test_nonfree_naacl06_shinyama(self): - run('nonfree/naacl06-shinyama.pdf') + run("nonfree/naacl06-shinyama.pdf") def test_nlp2004slides(self): - run('nonfree/nlp2004slides.pdf', '-p 1') + run("nonfree/nlp2004slides.pdf", "-p 1") def test_contrib_2b(self): - run('contrib/2b.pdf', '-A -t xml') + run("contrib/2b.pdf", "-A -t xml") def test_contrib_issue_350(self): """Regression test for https://github.com/pdfminer/pdfminer.six/issues/350""" - run('contrib/issue-00352-asw-oct96-p41.pdf') + run("contrib/issue-00352-asw-oct96-p41.pdf") def test_scancode_patchelf(self): """Regression test for https://github.com/euske/pdfminer/issues/96""" - run('scancode/patchelf.pdf') + run("scancode/patchelf.pdf") def test_contrib_hash_two_complement(self): """Check that unsigned integer is added correctly to encryption hash.et See https://github.com/pdfminer/pdfminer.six/issues/186 """ - run('contrib/issue-00352-hash-twos-complement.pdf') + run("contrib/issue-00352-hash-twos-complement.pdf") def test_contrib_excel(self): """Regression test for - https://github.com/pdfminer/pdfminer.six/issues/369 - """ - run('contrib/issue-00369-excel.pdf', '-t html') + https://github.com/pdfminer/pdfminer.six/issues/369 + """ + run("contrib/issue-00369-excel.pdf", "-t html") def test_encryption_aes128(self): - run('encryption/aes-128.pdf', '-P foo') + run("encryption/aes-128.pdf", "-P foo") def test_encryption_aes128m(self): - run('encryption/aes-128-m.pdf', '-P foo') + run("encryption/aes-128-m.pdf", "-P foo") def test_encryption_aes256(self): - run('encryption/aes-256.pdf', '-P foo') + run("encryption/aes-256.pdf", "-P foo") def test_encryption_aes256m(self): - run('encryption/aes-256-m.pdf', '-P foo') + run("encryption/aes-256-m.pdf", "-P foo") def test_encryption_aes256_r6_user(self): - run('encryption/aes-256-r6.pdf', '-P usersecret') + run("encryption/aes-256-r6.pdf", "-P usersecret") def test_encryption_aes256_r6_owner(self): - run('encryption/aes-256-r6.pdf', '-P ownersecret') + run("encryption/aes-256-r6.pdf", "-P ownersecret") def test_encryption_base(self): - run('encryption/base.pdf', '-P foo') + run("encryption/base.pdf", "-P foo") def test_encryption_rc4_40(self): - run('encryption/rc4-40.pdf', '-P foo') + run("encryption/rc4-40.pdf", "-P foo") def test_encryption_rc4_128(self): - run('encryption/rc4-128.pdf', '-P foo') + run("encryption/rc4-128.pdf", "-P foo") class TestDumpImages: - @staticmethod def extract_images(input_file, *args): output_dir = mkdtemp() with TemporaryFilePath() as output_file_name: - commands = ['-o', output_file_name, '--output-dir', - output_dir, input_file, *args] + commands = [ + "-o", + output_file_name, + "--output-dir", + output_dir, + input_file, + *args, + ] pdf2txt.main(commands) image_files = os.listdir(output_dir) rmtree(output_dir) @@ -132,39 +136,38 @@ class TestDumpImages: Regression test for: https://github.com/pdfminer/pdfminer.six/issues/131 """ - filepath = absolute_sample_path('../samples/nonfree/dmca.pdf') - image_files = self.extract_images(filepath, '-p', '1') - assert image_files[0].endswith('bmp') + filepath = absolute_sample_path("../samples/nonfree/dmca.pdf") + image_files = self.extract_images(filepath, "-p", "1") + assert image_files[0].endswith("bmp") def test_nonfree_175(self): """Extract images of pdf containing jpg images""" - self.extract_images(absolute_sample_path('../samples/nonfree/175.pdf')) + self.extract_images(absolute_sample_path("../samples/nonfree/175.pdf")) def test_jbig2_image_export(self): """Extract images of pdf containing jbig2 images Feature test for: https://github.com/pdfminer/pdfminer.six/pull/46 """ - input_file = absolute_sample_path( - '../samples/contrib/pdf-with-jbig2.pdf') + input_file = absolute_sample_path("../samples/contrib/pdf-with-jbig2.pdf") output_dir = mkdtemp() with TemporaryFilePath() as output_file_name: - commands = ['-o', output_file_name, '--output-dir', - output_dir, input_file] + commands = ["-o", output_file_name, "--output-dir", output_dir, input_file] pdf2txt.main(commands) image_files = os.listdir(output_dir) try: - assert image_files[0].endswith('.jb2') - assert filecmp.cmp(output_dir + '/' + image_files[0], - absolute_sample_path( - '../samples/contrib/XIPLAYER0.jb2')) + assert image_files[0].endswith(".jb2") + assert filecmp.cmp( + output_dir + "/" + image_files[0], + absolute_sample_path("../samples/contrib/XIPLAYER0.jb2"), + ) finally: rmtree(output_dir) def test_contrib_matplotlib(self): """Test a pdf with Type3 font""" - run('contrib/matplotlib.pdf') + run("contrib/matplotlib.pdf") def test_nonfree_cmp_itext_logo(self): """Test a pdf with Type3 font""" - run('nonfree/cmp_itext_logo.pdf') + run("nonfree/cmp_itext_logo.pdf") diff --git a/tests/test_utils.py b/tests/test_utils.py index d745256..062a973 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,8 +4,13 @@ import pytest from helpers import absolute_sample_path from pdfminer.layout import LTComponent -from pdfminer.utils import open_filename, Plane, shorten_str, \ - format_int_roman, format_int_alpha +from pdfminer.utils import ( + open_filename, + Plane, + shorten_str, + format_int_roman, + format_int_alpha, +) class TestOpenFilename: @@ -48,14 +53,12 @@ class TestPlane: assert result == [obj] def test_find_if_object_is_smaller_than_gridsize(self): - plane, obj = self.given_plane_with_one_object(object_size=1, - gridsize=100) + plane, obj = self.given_plane_with_one_object(object_size=1, gridsize=100) result = list(plane.find((0, 0, 100, 100))) assert result == [obj] def test_find_object_if_much_larger_than_gridsize(self): - plane, obj = self.given_plane_with_one_object(object_size=100, - gridsize=10) + plane, obj = self.given_plane_with_one_object(object_size=100, gridsize=10) result = list(plane.find((0, 0, 100, 100))) assert result == [obj] @@ -70,43 +73,43 @@ class TestPlane: class TestFunctions(object): def test_shorten_str(self): - s = shorten_str('Hello there World', 15) - assert s == 'Hello ... World' + s = shorten_str("Hello there World", 15) + assert s == "Hello ... World" def test_shorten_short_str_is_same(self): - s = 'Hello World' + s = "Hello World" assert shorten_str(s, 50) == s def test_shorten_to_really_short(self): - assert shorten_str('Hello World', 5) == 'Hello' + assert shorten_str("Hello World", 5) == "Hello" def test_format_int_alpha(self): - assert format_int_alpha(1) == 'a' - assert format_int_alpha(2) == 'b' - assert format_int_alpha(26) == 'z' - assert format_int_alpha(27) == 'aa' - assert format_int_alpha(28) == 'ab' - assert format_int_alpha(26 * 2) == 'az' - assert format_int_alpha(26 * 2 + 1) == 'ba' - assert format_int_alpha(26 * 27) == 'zz' - assert format_int_alpha(26 * 27 + 1) == 'aaa' + assert format_int_alpha(1) == "a" + assert format_int_alpha(2) == "b" + assert format_int_alpha(26) == "z" + assert format_int_alpha(27) == "aa" + assert format_int_alpha(28) == "ab" + assert format_int_alpha(26 * 2) == "az" + assert format_int_alpha(26 * 2 + 1) == "ba" + assert format_int_alpha(26 * 27) == "zz" + assert format_int_alpha(26 * 27 + 1) == "aaa" def test_format_int_roman(self): - assert format_int_roman(1) == 'i' - assert format_int_roman(2) == 'ii' - assert format_int_roman(3) == 'iii' - assert format_int_roman(4) == 'iv' - assert format_int_roman(5) == 'v' - assert format_int_roman(6) == 'vi' - assert format_int_roman(7) == 'vii' - assert format_int_roman(8) == 'viii' - assert format_int_roman(9) == 'ix' - assert format_int_roman(10) == 'x' - assert format_int_roman(11) == 'xi' - assert format_int_roman(20) == 'xx' - assert format_int_roman(40) == 'xl' - assert format_int_roman(45) == 'xlv' - assert format_int_roman(50) == 'l' - assert format_int_roman(90) == 'xc' - assert format_int_roman(91) == 'xci' - assert format_int_roman(100) == 'c' + assert format_int_roman(1) == "i" + assert format_int_roman(2) == "ii" + assert format_int_roman(3) == "iii" + assert format_int_roman(4) == "iv" + assert format_int_roman(5) == "v" + assert format_int_roman(6) == "vi" + assert format_int_roman(7) == "vii" + assert format_int_roman(8) == "viii" + assert format_int_roman(9) == "ix" + assert format_int_roman(10) == "x" + assert format_int_roman(11) == "xi" + assert format_int_roman(20) == "xx" + assert format_int_roman(40) == "xl" + assert format_int_roman(45) == "xlv" + assert format_int_roman(50) == "l" + assert format_int_roman(90) == "xc" + assert format_int_roman(91) == "xci" + assert format_int_roman(100) == "c" diff --git a/tools/conv_afm.py b/tools/conv_afm.py index 07f7ebf..cb91baa 100755 --- a/tools/conv_afm.py +++ b/tools/conv_afm.py @@ -7,39 +7,38 @@ import fileinput def main(argv): fonts = {} for line in fileinput.input(): - f = line.strip().split(' ') + f = line.strip().split(" ") if not f: continue k = f[0] - if k == 'FontName': + if k == "FontName": fontname = f[1] - props = {'FontName': fontname, 'Flags': 0} + props = {"FontName": fontname, "Flags": 0} chars = {} fonts[fontname] = (props, chars) - elif k == 'C': + elif k == "C": cid = int(f[1]) if 0 <= cid and cid <= 255: width = int(f[4]) chars[cid] = width - elif k in ('CapHeight', 'XHeight', 'ItalicAngle', - 'Ascender', 'Descender'): - k = {'Ascender': 'Ascent', 'Descender': 'Descent'}.get(k, k) + elif k in ("CapHeight", "XHeight", "ItalicAngle", "Ascender", "Descender"): + k = {"Ascender": "Ascent", "Descender": "Descent"}.get(k, k) props[k] = float(f[1]) - elif k in ('FontName', 'FamilyName', 'Weight'): - k = {'FamilyName': 'FontFamily', 'Weight': 'FontWeight'}.get(k, k) + elif k in ("FontName", "FamilyName", "Weight"): + k = {"FamilyName": "FontFamily", "Weight": "FontWeight"}.get(k, k) props[k] = f[1] - elif k == 'IsFixedPitch': - if f[1].lower() == 'true': - props['Flags'] = 64 - elif k == 'FontBBox': + elif k == "IsFixedPitch": + if f[1].lower() == "true": + props["Flags"] = 64 + elif k == "FontBBox": props[k] = tuple(map(float, f[1:5])) - print('# -*- python -*-') - print('FONT_METRICS = {') + print("# -*- python -*-") + print("FONT_METRICS = {") for (fontname, (props, chars)) in fonts.items(): - print(' {!r}: {!r},'.format(fontname, (props, chars))) - print('}') + print(" {!r}: {!r},".format(fontname, (props, chars))) + print("}") return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main(sys.argv)) # type: ignore[no-untyped-call] diff --git a/tools/conv_cmap.py b/tools/conv_cmap.py index 7ce0aef..e39c17e 100755 --- a/tools/conv_cmap.py +++ b/tools/conv_cmap.py @@ -6,7 +6,6 @@ import codecs class CMapConverter: - def __init__(self, enc2codec={}): self.enc2codec = enc2codec self.code2cid = {} # {'cmapname': ...} @@ -19,12 +18,12 @@ class CMapConverter: return self.code2cid.keys() def get_maps(self, enc): - if enc.endswith('-H'): + if enc.endswith("-H"): (hmapenc, vmapenc) = (enc, None) - elif enc == 'H': - (hmapenc, vmapenc) = ('H', 'V') + elif enc == "H": + (hmapenc, vmapenc) = ("H", "V") else: - (hmapenc, vmapenc) = (enc+'-H', enc+'-V') + (hmapenc, vmapenc) = (enc + "-H", enc + "-V") if hmapenc in self.code2cid: hmap = self.code2cid[hmapenc] else: @@ -43,12 +42,12 @@ class CMapConverter: def load(self, fp): encs = None for line in fp: - (line, _, _) = line.strip().partition('#') + (line, _, _) = line.strip().partition("#") if not line: continue - values = line.split('\t') + values = line.split("\t") if encs is None: - assert values[0] == 'CID', str(values) + assert values[0] == "CID", str(values) encs = values continue @@ -68,7 +67,7 @@ class CMapConverter: def add(unimap, enc, code): try: codec = self.enc2codec[enc] - c = code.decode(codec, 'strict') + c = code.decode(codec, "strict") if len(c) == 1: if c not in unimap: unimap[c] = 0 @@ -89,20 +88,20 @@ class CMapConverter: unimap_h = {} unimap_v = {} for (enc, value) in zip(encs, values): - if enc == 'CID': + if enc == "CID": continue - if value == '*': + if value == "*": continue # hcodes, vcodes: encoded bytes for each writing mode. hcodes = [] vcodes = [] - for code in value.split(','): - vertical = code.endswith('v') + for code in value.split(","): + vertical = code.endswith("v") if vertical: code = code[:-1] try: - code = codecs.decode(code, 'hex_codec') + code = codecs.decode(code, "hex_codec") except Exception: code = chr(int(code, 16)) if vertical: @@ -155,17 +154,19 @@ def main(argv): import os.path def usage(): - print('usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]' - % argv[0]) + print( + "usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]" % argv[0] + ) return 100 + try: - (opts, args) = getopt.getopt(argv[1:], 'c:') + (opts, args) = getopt.getopt(argv[1:], "c:") except getopt.GetoptError: return usage() enc2codec = {} for (k, v) in opts: - if k == '-c': - (enc, _, codec) = v.partition('=') + if k == "-c": + (enc, _, codec) = v.partition("=") enc2codec[enc] = codec if not args: return usage() @@ -176,27 +177,27 @@ def main(argv): converter = CMapConverter(enc2codec) for path in args: - print('reading: %r...' % path) + print("reading: %r..." % path) fp = open(path) converter.load(fp) fp.close() for enc in converter.get_encs(): - fname = '%s.pickle.gz' % enc + fname = "%s.pickle.gz" % enc path = os.path.join(outdir, fname) - print('writing: %r...' % path) - fp = gzip.open(path, 'wb') + print("writing: %r..." % path) + fp = gzip.open(path, "wb") converter.dump_cmap(fp, enc) fp.close() - fname = 'to-unicode-%s.pickle.gz' % regname + fname = "to-unicode-%s.pickle.gz" % regname path = os.path.join(outdir, fname) - print('writing: %r...' % path) - fp = gzip.open(path, 'wb') + print("writing: %r..." % path) + fp = gzip.open(path, "wb") converter.dump_unicodemap(fp) fp.close() return -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main(sys.argv)) # type: ignore[no-untyped-call] diff --git a/tools/conv_glyphlist.py b/tools/conv_glyphlist.py index dc65f50..7a1183f 100755 --- a/tools/conv_glyphlist.py +++ b/tools/conv_glyphlist.py @@ -8,20 +8,19 @@ def main(argv): state = 0 for line in fileinput.input(): line = line.strip() - if not line or line.startswith('#'): + if not line or line.startswith("#"): if state == 1: state = 2 - print('}\n') + print("}\n") print(line) continue if state == 0: - print('\nglyphname2unicode = {') + print("\nglyphname2unicode = {") state = 1 - (name, x) = line.split(';') - codes = x.split(' ') - print(' {!r}: u\'{}\',' - .format(name, ''.join('\\u%s' % code for code in codes))) + (name, x) = line.split(";") + codes = x.split(" ") + print(" {!r}: u'{}',".format(name, "".join("\\u%s" % code for code in codes))) -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main(sys.argv)) # type: ignore[no-untyped-call] diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 2199b9d..cc8c455 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -4,8 +4,7 @@ import logging import os.path import re import sys -from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \ - Union, cast +from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, Union, cast from argparse import ArgumentParser import pdfminer @@ -25,33 +24,33 @@ ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]') def escape(s: Union[str, bytes]) -> str: if isinstance(s, bytes): - us = str(s, 'latin-1') + us = str(s, "latin-1") else: us = s - return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), us) + return ESC_PAT.sub(lambda m: "&#%d;" % ord(m.group(0)), us) def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None: if obj is None: - out.write('') + out.write("") return if isinstance(obj, dict): out.write('\n' % len(obj)) for (k, v) in obj.items(): - out.write('%s\n' % k) - out.write('') + out.write("%s\n" % k) + out.write("") dumpxml(out, v) - out.write('\n') - out.write('') + out.write("\n") + out.write("") return if isinstance(obj, list): out.write('\n' % len(obj)) for v in obj: dumpxml(out, v) - out.write('\n') - out.write('') + out.write("\n") + out.write("") return if isinstance(obj, (str, bytes)): @@ -59,21 +58,20 @@ def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None: return if isinstance(obj, PDFStream): - if codec == 'raw': + if codec == "raw": # Bug: writing bytes to text I/O. This will raise TypeError. out.write(obj.get_rawdata()) # type: ignore [arg-type] - elif codec == 'binary': + elif codec == "binary": # Bug: writing bytes to text I/O. This will raise TypeError. out.write(obj.get_data()) # type: ignore [arg-type] else: - out.write('\n\n') + out.write("\n\n") dumpxml(out, obj.attrs) - out.write('\n\n') - if codec == 'text': + out.write("\n\n") + if codec == "text": data = obj.get_data() - out.write('%s\n' - % (len(data), escape(data))) - out.write('') + out.write('%s\n' % (len(data), escape(data))) + out.write("") return if isinstance(obj, PDFObjRef): @@ -82,38 +80,36 @@ def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None: if isinstance(obj, PSKeyword): # Likely bug: obj.name is bytes, not str - out.write('%s' - % obj.name) # type: ignore [str-bytes-safe] + out.write("%s" % obj.name) # type: ignore [str-bytes-safe] return if isinstance(obj, PSLiteral): # Likely bug: obj.name may be bytes, not str - out.write('%s' - % obj.name) # type: ignore [str-bytes-safe] + out.write("%s" % obj.name) # type: ignore [str-bytes-safe] return if isnumber(obj): - out.write('%s' % obj) + out.write("%s" % obj) return raise TypeError(obj) def dumptrailers( - out: TextIO, - doc: PDFDocument, - show_fallback_xref: bool = False + out: TextIO, doc: PDFDocument, show_fallback_xref: bool = False ) -> None: for xref in doc.xrefs: if not isinstance(xref, PDFXRefFallback) or show_fallback_xref: - out.write('\n') + out.write("\n") dumpxml(out, xref.get_trailer()) - out.write('\n\n\n') + out.write("\n\n\n") no_xrefs = all(isinstance(xref, PDFXRefFallback) for xref in doc.xrefs) if no_xrefs and not show_fallback_xref: - msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \ - 'you want to display the content of a fallback xref that ' \ - 'contains all objects.' + msg = ( + "This PDF does not have an xref. Use --show-fallback-xref if " + "you want to display the content of a fallback xref that " + "contains all objects." + ) logger.warning(msg) return @@ -122,10 +118,10 @@ def dumpallobjs( out: TextIO, doc: PDFDocument, codec: Optional[str] = None, - show_fallback_xref: bool = False + show_fallback_xref: bool = False, ) -> None: visited = set() - out.write('') + out.write("") for xref in doc.xrefs: for objid in xref.get_objids(): if objid in visited: @@ -137,11 +133,11 @@ def dumpallobjs( continue out.write('\n' % objid) dumpxml(out, obj, codec=codec) - out.write('\n\n\n') + out.write("\n\n\n") except PDFObjectNotFound as e: - print('not found: %r' % e) + print("not found: %r" % e) dumptrailers(out, doc, show_fallback_xref) - out.write('') + out.write("") return @@ -150,16 +146,18 @@ def dumpoutline( fname: str, objids: Any, pagenos: Container[int], - password: str = '', + password: str = "", dumpall: bool = False, codec: Optional[str] = None, - extractdir: Optional[str] = None + extractdir: Optional[str] = None, ) -> None: - fp = open(fname, 'rb') + fp = open(fname, "rb") parser = PDFParser(fp) doc = PDFDocument(parser, password) - pages = {page.pageid: pageno for (pageno, page) - in enumerate(PDFPage.create_pages(doc), 1)} + pages = { + page.pageid: pageno + for (pageno, page) in enumerate(PDFPage.create_pages(doc), 1) + } def resolve_dest(dest: object) -> Any: if isinstance(dest, (str, bytes)): @@ -167,14 +165,14 @@ def dumpoutline( elif isinstance(dest, PSLiteral): dest = resolve1(doc.get_dest(dest.name)) if isinstance(dest, dict): - dest = dest['D'] + dest = dest["D"] if isinstance(dest, PDFObjRef): dest = dest.resolve() return dest try: outlines = doc.get_outlines() - outfp.write('\n') + outfp.write("\n") for (level, title, dest, a, se) in outlines: pageno = None if dest: @@ -183,21 +181,20 @@ def dumpoutline( elif a: action = a if isinstance(action, dict): - subtype = action.get('S') - if subtype and repr(subtype) == '/\'GoTo\'' and action.get( - 'D'): - dest = resolve_dest(action['D']) + subtype = action.get("S") + if subtype and repr(subtype) == "/'GoTo'" and action.get("D"): + dest = resolve_dest(action["D"]) pageno = pages[dest[0].objid] s = escape(title) outfp.write('\n'.format(level, s)) if dest is not None: - outfp.write('') + outfp.write("") dumpxml(outfp, dest) - outfp.write('\n') + outfp.write("\n") if pageno is not None: - outfp.write('%r\n' % pageno) - outfp.write('\n') - outfp.write('\n') + outfp.write("%r\n" % pageno) + outfp.write("\n") + outfp.write("\n") except PDFNoOutlines: pass parser.close() @@ -205,43 +202,48 @@ def dumpoutline( return -LITERAL_FILESPEC = LIT('Filespec') -LITERAL_EMBEDDEDFILE = LIT('EmbeddedFile') +LITERAL_FILESPEC = LIT("Filespec") +LITERAL_EMBEDDEDFILE = LIT("EmbeddedFile") def extractembedded(fname: str, password: str, extractdir: str) -> None: def extract1(objid: int, obj: Dict[str, Any]) -> None: - filename = os.path.basename(obj.get('UF') or - cast(bytes, obj.get('F')).decode()) - fileref = obj['EF'].get('UF') or obj['EF'].get('F') + filename = os.path.basename(obj.get("UF") or cast(bytes, obj.get("F")).decode()) + fileref = obj["EF"].get("UF") or obj["EF"].get("F") fileobj = doc.getobj(fileref.objid) if not isinstance(fileobj, PDFStream): - error_msg = 'unable to process PDF: reference for %r is not a ' \ - 'PDFStream' % filename + error_msg = ( + "unable to process PDF: reference for %r is not a " + "PDFStream" % filename + ) raise PDFValueError(error_msg) - if fileobj.get('Type') is not LITERAL_EMBEDDEDFILE: + if fileobj.get("Type") is not LITERAL_EMBEDDEDFILE: raise PDFValueError( - 'unable to process PDF: reference for %r ' - 'is not an EmbeddedFile' % (filename)) - path = os.path.join(extractdir, '%.6d-%s' % (objid, filename)) + "unable to process PDF: reference for %r " + "is not an EmbeddedFile" % (filename) + ) + path = os.path.join(extractdir, "%.6d-%s" % (objid, filename)) if os.path.exists(path): - raise IOError('file exists: %r' % path) - print('extracting: %r' % path) + raise IOError("file exists: %r" % path) + print("extracting: %r" % path) os.makedirs(os.path.dirname(path), exist_ok=True) - out = open(path, 'wb') + out = open(path, "wb") out.write(fileobj.get_data()) out.close() return - with open(fname, 'rb') as fp: + with open(fname, "rb") as fp: parser = PDFParser(fp) doc = PDFDocument(parser, password) extracted_objids = set() for xref in doc.xrefs: for objid in xref.get_objids(): obj = doc.getobj(objid) - if objid not in extracted_objids and isinstance(obj, dict) \ - and obj.get('Type') is LITERAL_FILESPEC: + if ( + objid not in extracted_objids + and isinstance(obj, dict) + and obj.get("Type") is LITERAL_FILESPEC + ): extracted_objids.add(objid) extract1(objid, obj) return @@ -252,13 +254,13 @@ def dumppdf( fname: str, objids: Iterable[int], pagenos: Container[int], - password: str = '', + password: str = "", dumpall: bool = False, codec: Optional[str] = None, extractdir: Optional[str] = None, - show_fallback_xref: bool = False + show_fallback_xref: bool = False, ) -> None: - fp = open(fname, 'rb') + fp = open(fname, "rb") parser = PDFParser(fp) doc = PDFDocument(parser, password) if objids: @@ -279,71 +281,125 @@ def dumppdf( if (not objids) and (not pagenos) and (not dumpall): dumptrailers(outfp, doc, show_fallback_xref) fp.close() - if codec not in ('raw', 'binary'): - outfp.write('\n') + if codec not in ("raw", "binary"): + outfp.write("\n") return def create_parser() -> ArgumentParser: parser = ArgumentParser(description=__doc__, add_help=True) - parser.add_argument('files', type=str, default=None, nargs='+', - help='One or more paths to PDF files.') + parser.add_argument( + "files", + type=str, + default=None, + nargs="+", + help="One or more paths to PDF files.", + ) parser.add_argument( - "--version", "-v", action="version", - version="pdfminer.six v{}".format(pdfminer.__version__)) + "--version", + "-v", + action="version", + version="pdfminer.six v{}".format(pdfminer.__version__), + ) parser.add_argument( - '--debug', '-d', default=False, action='store_true', - help='Use debug logging level.') + "--debug", + "-d", + default=False, + action="store_true", + help="Use debug logging level.", + ) procedure_parser = parser.add_mutually_exclusive_group() procedure_parser.add_argument( - '--extract-toc', '-T', default=False, action='store_true', - help='Extract structure of outline') + "--extract-toc", + "-T", + default=False, + action="store_true", + help="Extract structure of outline", + ) procedure_parser.add_argument( - '--extract-embedded', '-E', type=str, - help='Extract embedded files') + "--extract-embedded", "-E", type=str, help="Extract embedded files" + ) parse_params = parser.add_argument_group( - 'Parser', description='Used during PDF parsing') + "Parser", description="Used during PDF parsing" + ) parse_params.add_argument( - '--page-numbers', type=int, default=None, nargs='+', - help='A space-seperated list of page numbers to parse.') + "--page-numbers", + type=int, + default=None, + nargs="+", + help="A space-seperated list of page numbers to parse.", + ) parse_params.add_argument( - '--pagenos', '-p', type=str, - help='A comma-separated list of page numbers to parse. Included for ' - 'legacy applications, use --page-numbers for more idiomatic ' - 'argument entry.') + "--pagenos", + "-p", + type=str, + help="A comma-separated list of page numbers to parse. Included for " + "legacy applications, use --page-numbers for more idiomatic " + "argument entry.", + ) parse_params.add_argument( - '--objects', '-i', type=str, - help='Comma separated list of object numbers to extract') + "--objects", + "-i", + type=str, + help="Comma separated list of object numbers to extract", + ) parse_params.add_argument( - '--all', '-a', default=False, action='store_true', - help='If the structure of all objects should be extracted') + "--all", + "-a", + default=False, + action="store_true", + help="If the structure of all objects should be extracted", + ) parse_params.add_argument( - '--show-fallback-xref', action='store_true', - help='Additionally show the fallback xref. Use this if the PDF ' - 'has zero or only invalid xref\'s. This setting is ignored if ' - '--extract-toc or --extract-embedded is used.') + "--show-fallback-xref", + action="store_true", + help="Additionally show the fallback xref. Use this if the PDF " + "has zero or only invalid xref's. This setting is ignored if " + "--extract-toc or --extract-embedded is used.", + ) parse_params.add_argument( - '--password', '-P', type=str, default='', - help='The password to use for decrypting PDF file.') + "--password", + "-P", + type=str, + default="", + help="The password to use for decrypting PDF file.", + ) output_params = parser.add_argument_group( - 'Output', description='Used during output generation.') + "Output", description="Used during output generation." + ) output_params.add_argument( - '--outfile', '-o', type=str, default='-', + "--outfile", + "-o", + type=str, + default="-", help='Path to file where output is written. Or "-" (default) to ' - 'write to stdout.') + "write to stdout.", + ) codec_parser = output_params.add_mutually_exclusive_group() codec_parser.add_argument( - '--raw-stream', '-r', default=False, action='store_true', - help='Write stream objects without encoding') + "--raw-stream", + "-r", + default=False, + action="store_true", + help="Write stream objects without encoding", + ) codec_parser.add_argument( - '--binary-stream', '-b', default=False, action='store_true', - help='Write stream objects with binary encoding') + "--binary-stream", + "-b", + default=False, + action="store_true", + help="Write stream objects with binary encoding", + ) codec_parser.add_argument( - '--text-stream', '-t', default=False, action='store_true', - help='Write stream objects as plain text') + "--text-stream", + "-t", + default=False, + action="store_true", + help="Write stream objects as plain text", + ) return parser @@ -355,53 +411,63 @@ def main(argv: Optional[List[str]] = None) -> None: if args.debug: logging.getLogger().setLevel(logging.DEBUG) - if args.outfile == '-': + if args.outfile == "-": outfp = sys.stdout else: - outfp = open(args.outfile, 'w') + outfp = open(args.outfile, "w") if args.objects: - objids = [int(x) for x in args.objects.split(',')] + objids = [int(x) for x in args.objects.split(",")] else: objids = [] if args.page_numbers: pagenos = {x - 1 for x in args.page_numbers} elif args.pagenos: - pagenos = {int(x) - 1 for x in args.pagenos.split(',')} + pagenos = {int(x) - 1 for x in args.pagenos.split(",")} else: pagenos = set() password = args.password if args.raw_stream: - codec: Optional[str] = 'raw' + codec: Optional[str] = "raw" elif args.binary_stream: - codec = 'binary' + codec = "binary" elif args.text_stream: - codec = 'text' + codec = "text" else: codec = None for fname in args.files: if args.extract_toc: dumpoutline( - outfp, fname, objids, pagenos, password=password, - dumpall=args.all, codec=codec, extractdir=None + outfp, + fname, + objids, + pagenos, + password=password, + dumpall=args.all, + codec=codec, + extractdir=None, ) elif args.extract_embedded: - extractembedded( - fname, password=password, extractdir=args.extract_embedded - ) + extractembedded(fname, password=password, extractdir=args.extract_embedded) else: dumppdf( - outfp, fname, objids, pagenos, password=password, - dumpall=args.all, codec=codec, extractdir=None, - show_fallback_xref=args.show_fallback_xref + outfp, + fname, + objids, + pagenos, + password=password, + dumpall=args.all, + codec=codec, + extractdir=None, + show_fallback_xref=args.show_fallback_xref, ) outfp.close() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index b5760c8..e872f5f 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -12,10 +12,7 @@ from pdfminer.utils import AnyIO logging.basicConfig() -OUTPUT_TYPES = ((".htm", "html"), - (".html", "html"), - (".xml", "xml"), - (".tag", "tag")) +OUTPUT_TYPES = ((".htm", "html"), (".html", "html"), (".xml", "xml"), (".tag", "tag")) def float_or_disabled(x: str) -> Optional[float]: @@ -29,17 +26,17 @@ def float_or_disabled(x: str) -> Optional[float]: def extract_text( files: Iterable[str] = [], - outfile: str = '-', + outfile: str = "-", laparams: Optional[LAParams] = None, - output_type: str = 'text', - codec: str = 'utf-8', + output_type: str = "text", + codec: str = "utf-8", strip_control: bool = False, maxpages: int = 0, page_numbers: Optional[Container[int]] = None, password: str = "", scale: float = 1.0, rotation: int = 0, - layoutmode: str = 'normal', + layoutmode: str = "normal", output_dir: Optional[str] = None, debug: bool = False, disable_caching: bool = False, @@ -56,7 +53,7 @@ def extract_text( if outfile == "-": outfp: AnyIO = sys.stdout if sys.stdout.encoding is not None: - codec = 'utf-8' + codec = "utf-8" else: outfp = open(outfile, "wb") @@ -69,118 +66,211 @@ def extract_text( def parse_args(args: Optional[List[str]]) -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__, add_help=True) parser.add_argument( - "files", type=str, default=None, nargs="+", - help="One or more paths to PDF files.") + "files", + type=str, + default=None, + nargs="+", + help="One or more paths to PDF files.", + ) parser.add_argument( - "--version", "-v", action="version", - version="pdfminer.six v{}".format(pdfminer.__version__)) + "--version", + "-v", + action="version", + version="pdfminer.six v{}".format(pdfminer.__version__), + ) parser.add_argument( - "--debug", "-d", default=False, action="store_true", - help="Use debug logging level.") + "--debug", + "-d", + default=False, + action="store_true", + help="Use debug logging level.", + ) parser.add_argument( - "--disable-caching", "-C", default=False, action="store_true", - help="If caching or resources, such as fonts, should be disabled.") + "--disable-caching", + "-C", + default=False, + action="store_true", + help="If caching or resources, such as fonts, should be disabled.", + ) parse_params = parser.add_argument_group( - 'Parser', description='Used during PDF parsing') + "Parser", description="Used during PDF parsing" + ) parse_params.add_argument( - "--page-numbers", type=int, default=None, nargs="+", - help="A space-seperated list of page numbers to parse.") + "--page-numbers", + type=int, + default=None, + nargs="+", + help="A space-seperated list of page numbers to parse.", + ) parse_params.add_argument( - "--pagenos", "-p", type=str, + "--pagenos", + "-p", + type=str, help="A comma-separated list of page numbers to parse. " - "Included for legacy applications, use --page-numbers " - "for more idiomatic argument entry.") + "Included for legacy applications, use --page-numbers " + "for more idiomatic argument entry.", + ) parse_params.add_argument( - "--maxpages", "-m", type=int, default=0, - help="The maximum number of pages to parse.") + "--maxpages", + "-m", + type=int, + default=0, + help="The maximum number of pages to parse.", + ) parse_params.add_argument( - "--password", "-P", type=str, default="", - help="The password to use for decrypting PDF file.") + "--password", + "-P", + type=str, + default="", + help="The password to use for decrypting PDF file.", + ) parse_params.add_argument( - "--rotation", "-R", default=0, type=int, + "--rotation", + "-R", + default=0, + type=int, help="The number of degrees to rotate the PDF " - "before other types of processing.") + "before other types of processing.", + ) la_params = LAParams() # will be used for defaults la_param_group = parser.add_argument_group( - 'Layout analysis', description='Used during layout analysis.') + "Layout analysis", description="Used during layout analysis." + ) la_param_group.add_argument( - "--no-laparams", "-n", default=False, action="store_true", - help="If layout analysis parameters should be ignored.") - la_param_group.add_argument( - "--detect-vertical", "-V", default=la_params.detect_vertical, + "--no-laparams", + "-n", + default=False, action="store_true", - help="If vertical text should be considered during layout analysis") + help="If layout analysis parameters should be ignored.", + ) la_param_group.add_argument( - "--line-overlap", type=float, default=la_params.line_overlap, - help='If two characters have more overlap than this they ' - 'are considered to be on the same line. The overlap is specified ' - 'relative to the minimum height of both characters.') + "--detect-vertical", + "-V", + default=la_params.detect_vertical, + action="store_true", + help="If vertical text should be considered during layout analysis", + ) la_param_group.add_argument( - "--char-margin", "-M", type=float, default=la_params.char_margin, + "--line-overlap", + type=float, + default=la_params.line_overlap, + help="If two characters have more overlap than this they " + "are considered to be on the same line. The overlap is specified " + "relative to the minimum height of both characters.", + ) + la_param_group.add_argument( + "--char-margin", + "-M", + type=float, + default=la_params.char_margin, help="If two characters are closer together than this margin they " - "are considered to be part of the same line. The margin is " - "specified relative to the width of the character.") + "are considered to be part of the same line. The margin is " + "specified relative to the width of the character.", + ) la_param_group.add_argument( - "--word-margin", "-W", type=float, default=la_params.word_margin, + "--word-margin", + "-W", + type=float, + default=la_params.word_margin, help="If two characters on the same line are further apart than this " - "margin then they are considered to be two separate words, and " - "an intermediate space will be added for readability. The margin " - "is specified relative to the width of the character.") + "margin then they are considered to be two separate words, and " + "an intermediate space will be added for readability. The margin " + "is specified relative to the width of the character.", + ) la_param_group.add_argument( - "--line-margin", "-L", type=float, default=la_params.line_margin, + "--line-margin", + "-L", + type=float, + default=la_params.line_margin, help="If two lines are close together they are considered to " - "be part of the same paragraph. The margin is specified " - "relative to the height of a line.") + "be part of the same paragraph. The margin is specified " + "relative to the height of a line.", + ) la_param_group.add_argument( - "--boxes-flow", "-F", type=float_or_disabled, + "--boxes-flow", + "-F", + type=float_or_disabled, default=la_params.boxes_flow, help="Specifies how much a horizontal and vertical position of a " - "text matters when determining the order of lines. The value " - "should be within the range of -1.0 (only horizontal position " - "matters) to +1.0 (only vertical position matters). You can also " - "pass `disabled` to disable advanced layout analysis, and " - "instead return text based on the position of the bottom left " - "corner of the text box.") + "text matters when determining the order of lines. The value " + "should be within the range of -1.0 (only horizontal position " + "matters) to +1.0 (only vertical position matters). You can also " + "pass `disabled` to disable advanced layout analysis, and " + "instead return text based on the position of the bottom left " + "corner of the text box.", + ) la_param_group.add_argument( - "--all-texts", "-A", default=la_params.all_texts, action="store_true", - help="If layout analysis should be performed on text in figures.") + "--all-texts", + "-A", + default=la_params.all_texts, + action="store_true", + help="If layout analysis should be performed on text in figures.", + ) output_params = parser.add_argument_group( - 'Output', description='Used during output generation.') + "Output", description="Used during output generation." + ) output_params.add_argument( - "--outfile", "-o", type=str, default="-", + "--outfile", + "-o", + type=str, + default="-", help="Path to file where output is written. " - "Or \"-\" (default) to write to stdout.") + 'Or "-" (default) to write to stdout.', + ) output_params.add_argument( - "--output_type", "-t", type=str, default="text", - help="Type of output to generate {text,html,xml,tag}.") + "--output_type", + "-t", + type=str, + default="text", + help="Type of output to generate {text,html,xml,tag}.", + ) output_params.add_argument( - "--codec", "-c", type=str, default="utf-8", - help="Text encoding to use in output file.") + "--codec", + "-c", + type=str, + default="utf-8", + help="Text encoding to use in output file.", + ) output_params.add_argument( - "--output-dir", "-O", default=None, + "--output-dir", + "-O", + default=None, help="The output directory to put extracted images in. If not given, " - "images are not extracted.") + "images are not extracted.", + ) output_params.add_argument( - "--layoutmode", "-Y", default="normal", - type=str, help="Type of layout to use when generating html " - "{normal,exact,loose}. If normal,each line is" - " positioned separately in the html. If exact" - ", each character is positioned separately in" - " the html. If loose, same result as normal " - "but with an additional newline after each " - "text line. Only used when output_type is html.") + "--layoutmode", + "-Y", + default="normal", + type=str, + help="Type of layout to use when generating html " + "{normal,exact,loose}. If normal,each line is" + " positioned separately in the html. If exact" + ", each character is positioned separately in" + " the html. If loose, same result as normal " + "but with an additional newline after each " + "text line. Only used when output_type is html.", + ) output_params.add_argument( - "--scale", "-s", type=float, default=1.0, + "--scale", + "-s", + type=float, + default=1.0, help="The amount of zoom to use when generating html file. " - "Only used when output_type is html.") + "Only used when output_type is html.", + ) output_params.add_argument( - "--strip-control", "-S", default=False, action="store_true", + "--strip-control", + "-S", + default=False, + action="store_true", help="Remove control statement from text. " - "Only used when output_type is xml.") + "Only used when output_type is xml.", + ) parsed_args = parser.parse_args(args=args) @@ -199,13 +289,10 @@ def parse_args(args: Optional[List[str]]) -> argparse.Namespace: ) if parsed_args.page_numbers: - parsed_args.page_numbers = {x-1 for x in parsed_args.page_numbers} + parsed_args.page_numbers = {x - 1 for x in parsed_args.page_numbers} if parsed_args.pagenos: - parsed_args.page_numbers = { - int(x) - 1 - for x in parsed_args.pagenos.split(",") - } + parsed_args.page_numbers = {int(x) - 1 for x in parsed_args.pagenos.split(",")} if parsed_args.output_type == "text" and parsed_args.outfile != "-": for override, alttype in OUTPUT_TYPES: @@ -222,5 +309,5 @@ def main(args: Optional[List[str]] = None) -> int: return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/tools/pdfdiff.py b/tools/pdfdiff.py index 1be0723..43156e8 100644 --- a/tools/pdfdiff.py +++ b/tools/pdfdiff.py @@ -21,14 +21,20 @@ def compare(file1: str, file2: str, **kwargs: Any) -> Iterable[str]: # If any LAParams group arguments were passed, # create an LAParams object and # populate with given args. Otherwise, set it to None. - if kwargs.get('laparams', None) is None: + if kwargs.get("laparams", None) is None: laparams = layout.LAParams() - for param in ("all_texts", "detect_vertical", "word_margin", - "char_margin", "line_margin", "boxes_flow"): + for param in ( + "all_texts", + "detect_vertical", + "word_margin", + "char_margin", + "line_margin", + "boxes_flow", + ): paramv = kwargs.get(param, None) if paramv is not None: setattr(laparams, param, paramv) - kwargs['laparams'] = laparams + kwargs["laparams"] = laparams s1 = io.StringIO() with open(file1, "rb") as fp: @@ -39,81 +45,140 @@ def compare(file1: str, file2: str, **kwargs: Any) -> Iterable[str]: high_level.extract_text_to_fp(fp, s2, **kwargs) import difflib + s1.seek(0) s2.seek(0) s1_lines, s2_lines = s1.readlines(), s2.readlines() import os.path + try: - extension = os.path.splitext(kwargs['outfile'])[1][1:4] - if extension.lower() == 'htm': + extension = os.path.splitext(kwargs["outfile"])[1][1:4] + if extension.lower() == "htm": return difflib.HtmlDiff().make_file(s1_lines, s2_lines) except KeyError: pass - return difflib.unified_diff(s1_lines, s2_lines, n=kwargs['context_lines']) + return difflib.unified_diff(s1_lines, s2_lines, n=kwargs["context_lines"]) # main def main(args: Optional[List[str]] = None) -> int: import argparse + P = argparse.ArgumentParser(description=__doc__) P.add_argument("file1", type=str, default=None, help="File 1 to compare.") P.add_argument("file2", type=str, default=None, help="File 2 to compare.") - P.add_argument("-o", "--outfile", type=str, default="-", - help="Output file(default/'-' is stdout) if .htm or .html," - " create an HTML table (or a complete HTML file " - "containing the table) showing a side by side, " - "line by line comparison of text with inter-line and " - "intra-line change highlights. The table can be " - "generated in either full or " - "contextual difference mode.") - P.add_argument("-N", "--context-lines", default=3, type=int, - help="context lines shown") - P.add_argument("-d", "--debug", default=False, action="store_true", - help="Debug output.") + P.add_argument( + "-o", + "--outfile", + type=str, + default="-", + help="Output file(default/'-' is stdout) if .htm or .html," + " create an HTML table (or a complete HTML file " + "containing the table) showing a side by side, " + "line by line comparison of text with inter-line and " + "intra-line change highlights. The table can be " + "generated in either full or " + "contextual difference mode.", + ) + P.add_argument( + "-N", "--context-lines", default=3, type=int, help="context lines shown" + ) + P.add_argument( + "-d", "--debug", default=False, action="store_true", help="Debug output." + ) # params for pdf2txt - P.add_argument("-p", "--pagenos", type=str, - help="Comma-separated list of page numbers to parse. " - "Included for legacy applications, " - "use --page-numbers for more " - "idiomatic argument entry.") - P.add_argument("--page-numbers", type=int, default=None, nargs="+", - help="Alternative to --pagenos with space-separated " - "numbers; supercedes --pagenos where it is used.") - P.add_argument("-m", "--maxpages", type=int, default=0, - help="Maximum pages to parse") - P.add_argument("-P", "--password", type=str, default="", - help="Decryption password for both PDFs") - P.add_argument("-t", "--output_type", type=str, default="text", - help="pdf2txt type: text|html|xml|tag (default is text)") - P.add_argument("-c", "--codec", type=str, default="utf-8", - help="Text encoding") + P.add_argument( + "-p", + "--pagenos", + type=str, + help="Comma-separated list of page numbers to parse. " + "Included for legacy applications, " + "use --page-numbers for more " + "idiomatic argument entry.", + ) + P.add_argument( + "--page-numbers", + type=int, + default=None, + nargs="+", + help="Alternative to --pagenos with space-separated " + "numbers; supercedes --pagenos where it is used.", + ) + P.add_argument( + "-m", "--maxpages", type=int, default=0, help="Maximum pages to parse" + ) + P.add_argument( + "-P", + "--password", + type=str, + default="", + help="Decryption password for both PDFs", + ) + P.add_argument( + "-t", + "--output_type", + type=str, + default="text", + help="pdf2txt type: text|html|xml|tag (default is text)", + ) + P.add_argument("-c", "--codec", type=str, default="utf-8", help="Text encoding") P.add_argument("-s", "--scale", type=float, default=1.0, help="Scale") - P.add_argument("-A", "--all-texts", default=None, action="store_true", - help="LAParams all texts") - P.add_argument("-V", "--detect-vertical", default=None, - action="store_true", help="LAParams detect vertical") - P.add_argument("-W", "--word-margin", type=float, default=None, - help="LAParams word margin") - P.add_argument("-M", "--char-margin", type=float, default=None, - help="LAParams char margin") - P.add_argument("-L", "--line-margin", type=float, default=None, - help="LAParams line margin") - P.add_argument("-F", "--boxes-flow", type=float, default=None, - help="LAParams boxes flow") - P.add_argument("-Y", "--layoutmode", default="normal", type=str, - help="HTML Layout Mode") - P.add_argument("-n", "--no-laparams", default=False, - action="store_true", help="Pass None as LAParams") - P.add_argument("-R", "--rotation", default=0, type=int, - help="Rotation") - P.add_argument("-O", "--output-dir", default=None, - help="Output directory for images") - P.add_argument("-C", "--disable-caching", default=False, - action="store_true", help="Disable caching") - P.add_argument("-S", "--strip-control", default=False, - action="store_true", help="Strip control in XML mode") + P.add_argument( + "-A", + "--all-texts", + default=None, + action="store_true", + help="LAParams all texts", + ) + P.add_argument( + "-V", + "--detect-vertical", + default=None, + action="store_true", + help="LAParams detect vertical", + ) + P.add_argument( + "-W", "--word-margin", type=float, default=None, help="LAParams word margin" + ) + P.add_argument( + "-M", "--char-margin", type=float, default=None, help="LAParams char margin" + ) + P.add_argument( + "-L", "--line-margin", type=float, default=None, help="LAParams line margin" + ) + P.add_argument( + "-F", "--boxes-flow", type=float, default=None, help="LAParams boxes flow" + ) + P.add_argument( + "-Y", "--layoutmode", default="normal", type=str, help="HTML Layout Mode" + ) + P.add_argument( + "-n", + "--no-laparams", + default=False, + action="store_true", + help="Pass None as LAParams", + ) + P.add_argument("-R", "--rotation", default=0, type=int, help="Rotation") + P.add_argument( + "-O", "--output-dir", default=None, help="Output directory for images" + ) + P.add_argument( + "-C", + "--disable-caching", + default=False, + action="store_true", + help="Disable caching", + ) + P.add_argument( + "-S", + "--strip-control", + default=False, + action="store_true", + help="Strip control in XML mode", + ) A = P.parse_args(args=args) @@ -121,26 +186,28 @@ def main(args: Optional[List[str]] = None) -> int: logging.getLogger().setLevel(logging.DEBUG) if A.page_numbers: - A.page_numbers = {x-1 for x in A.page_numbers} + A.page_numbers = {x - 1 for x in A.page_numbers} if A.pagenos: - A.page_numbers = {int(x)-1 for x in A.pagenos.split(",")} + A.page_numbers = {int(x) - 1 for x in A.pagenos.split(",")} if A.output_type == "text" and A.outfile != "-": - for override, alttype in ((".htm", "html"), - (".html", "html"), - (".xml", "xml"), - (".tag", "tag")): + for override, alttype in ( + (".htm", "html"), + (".html", "html"), + (".xml", "xml"), + (".tag", "tag"), + ): if A.outfile.endswith(override): A.output_type = alttype if A.outfile == "-": outfp = sys.stdout else: - outfp = open(A.outfile, "w", encoding='utf-8') + outfp = open(A.outfile, "w", encoding="utf-8") outfp.writelines(compare(**vars(A))) outfp.close() return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/tools/pdfstats.py b/tools/pdfstats.py index 9bf3472..1b57b80 100755 --- a/tools/pdfstats.py +++ b/tools/pdfstats.py @@ -21,7 +21,7 @@ _, SCRIPT = os.path.split(__file__) def msg(*args: object, **kwargs: Any) -> None: - print(' '.join(map(str, args)), **kwargs) # noqa E999 + print(" ".join(map(str, args)), **kwargs) # noqa E999 def flat_iter(obj: object) -> Iterator[object]: @@ -35,22 +35,22 @@ def main(args: List[str]) -> int: msg(SCRIPT, args) if len(args) != 1: - msg('Parse a PDF file and print some pdfminer-specific stats') - msg('Usage:', SCRIPT, '') + msg("Parse a PDF file and print some pdfminer-specific stats") + msg("Usage:", SCRIPT, "") return 1 - infilename, = args + (infilename,) = args lt_types: Counter[str] = collections.Counter() - with open(infilename, 'rb') as pdf_file: + with open(infilename, "rb") as pdf_file: # Create a PDF parser object associated with the file object. parser = PDFParser(pdf_file) # Create a PDF document object that stores the document structure. # Supply the password for initialization. - password = '' + password = "" document = PDFDocument(parser, password) # Check if the document allows text extraction. if not document.is_extractable: @@ -63,7 +63,7 @@ def main(args: List[str]) -> int: laparams = LAParams( detect_vertical=True, all_texts=True, - ) + ) device = PDFPageAggregator(rsrcmgr, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) @@ -75,11 +75,11 @@ def main(args: List[str]) -> int: lt_types.update(type(item).__name__ for item in flat_iter(layout)) - msg('page_count', page_count) - msg('lt_types:', ' '.join('{}:{}'.format(*tc) for tc in lt_types.items())) + msg("page_count", page_count) + msg("lt_types:", " ".join("{}:{}".format(*tc) for tc in lt_types.items())) return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main(sys.argv[1:])) diff --git a/tools/prof.py b/tools/prof.py index 18803a7..0477fd9 100644 --- a/tools/prof.py +++ b/tools/prof.py @@ -7,15 +7,16 @@ def prof_main(argv: List[str]) -> int: import hotshot.stats # type: ignore[import] def usage() -> int: - print('usage: %s module.function [args ...]' % argv[0]) + print("usage: %s module.function [args ...]" % argv[0]) return 100 + args = argv[1:] if len(args) < 1: return usage() name = args.pop(0) - prof = name+'.prof' - i = name.rindex('.') - (modname, funcname) = (name[:i], name[i+1:]) + prof = name + ".prof" + i = name.rindex(".") + (modname, funcname) = (name[:i], name[i + 1 :]) # Type error: fromlist expects sequence of strings; presumably the intent # is to retrieve the named module rather than a top-level package (as in @@ -31,10 +32,10 @@ def prof_main(argv: List[str]) -> int: else: stats = hotshot.stats.load(prof) stats.strip_dirs() - stats.sort_stats('time', 'calls') + stats.sort_stats("time", "calls") stats.print_stats(1000) return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(prof_main(sys.argv))