Check blackness in github actions (#711)

* Check blackness in github actions * Blacken code * Update github action names * Add contributing guidelines on using black * Add to checklist for PR
2022-02-11 22:46:51 +01:00 · 2022-02-11 22:46:51 +01:00 · b9a8920cdf
parent 830acff94c
commit b9a8920cdf
60 changed files with 12836 additions and 7435 deletions
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,5 @@
 [flake8]
 max-line-length = 88
 extend-ignore = 
    # See https://github.com/PyCQA/pycodestyle/issues/373
    E203,
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -1,22 +1,17 @@
 **Pull request**
-Thanks for improving pdfminer.six! Please include the following information to
+Please remove this paragraph and replace it with a description of your PR.
-help us discuss and merge this PR:
+Also include links to the issues that it fixes. 
 - A description of why this PR is needed. What does it fix? What does it 
  improve?
 - A summary of the things that this PR changes.
 - Reference the issues that this PR fixes (use the fixes #(issue nr) syntax). 
  If this PR does not fix any issue, create the issue first and mention that 
  you are willing to work on it.
 **How Has This Been Tested?**
-Please describe the tests that you ran to verify your changes. Provide 
+Please repalce this paragraph with a description of how this PR has been 
-instructions so we can reproduce. Include an example pdf if you have one. 
+tested. Include the necessary instructions and files such that other can
 reproduce it.
 **Checklist**
 - [ ] I have formatted my code with [black](https://github.com/psf/black).
 - [ ] I have added tests that prove my fix is effective or that my feature 
  works
 - [ ] I have added docstrings to newly created methods and classes
--- a/.github/workflows/actions.yml
+++ b/.github/workflows/actions.yml
@ -15,6 +15,15 @@ env:
 jobs:
  check-code-formatting:
    name: Check code formatting
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v2
      - name: Check code formatting
        uses: psf/black@stable
  check-coding-style:
    name: Check coding style
    runs-on: ubuntu-latest
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -31,7 +31,7 @@ Any contribution is appreciated! You might want to:
 * Include unit tests when possible. In case of bugs, this will help to prevent the same mistake in the future. In case 
  of features, this will show that your code works correctly.
 * Code should work for Python 3.6+.
-* Code should conform to PEP8 coding style.
+* Code should be formatted with [black](https://github.com/psf/black). 
 * New features should be well documented using docstrings.
 * Check spelling and grammar.
 * Don't forget to update the [CHANGELOG.md](CHANGELOG.md#[Unreleased])
@ -68,3 +68,9 @@ Any contribution is appreciated! You might want to:
   ```sh
    nox -e py36
    ```
 4. After changing the code, run the black formatter.
    ```sh
    black .
    ```
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -16,14 +16,13 @@ from typing import List
 import pdfminer
-sys.path.insert(0, os.path.join(
+sys.path.insert(0, os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../"))
    os.path.abspath(os.path.dirname(__file__)), '../../'))
 # -- Project information -----------------------------------------------------
-project = 'pdfminer.six'
+project = "pdfminer.six"
-copyright = '2019, Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman'
+copyright = "2019, Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman"
-author = 'Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman'
+author = "Yusuke Shinyama, Philippe Guglielmetti & Pieter Marsman"
 # The full version, including alpha/beta/rc tags
 release = pdfminer.__version__
@ -35,16 +34,16 @@ release = pdfminer.__version__
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinxarg.ext',
+    "sphinxarg.ext",
-    'sphinx.ext.autodoc',
+    "sphinx.ext.autodoc",
-    'sphinx.ext.doctest',
+    "sphinx.ext.doctest",
 ]
 # Root rst file
-master_doc = 'index'
+master_doc = "index"
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@ -57,9 +56,9 @@ exclude_patterns: List[str] = []
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+html_theme = "alabaster"
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
--- a/noxfile.py
+++ b/noxfile.py
@ -6,53 +6,30 @@ PYTHON_ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"]
@nox.session
 def lint(session):
-    session.install('flake8')
+    session.install("flake8")
-    session.run(
+    session.run("flake8", "pdfminer/", "tools/", "tests/", "--count", "--statistics")
        'flake8',
        'pdfminer/',
        'tools/',
        'tests/',
        '--count',
        '--statistics'
    )
@nox.session
 def types(session):
-    session.install('mypy')
+    session.install("mypy")
    session.run(
-        'mypy',
+        "mypy", "--install-types", "--non-interactive", "--show-error-codes", "."
        '--install-types',
        '--non-interactive',
        '--show-error-codes',
        '.'
    )
@nox.session(python=PYTHON_ALL_VERSIONS)
 def tests(session):
    session.install("-e", ".[dev]")
-    session.run('pytest')
+    session.run("pytest")
@nox.session
 def docs(session):
    session.install("-e", ".[docs]")
    session.run(
-        'python',
+        "python", "-m", "sphinx", "-b", "html", "docs/source", "docs/build/html"
        '-m',
        'sphinx',
        '-b',
        'html',
        'docs/source',
        'docs/build/html'
    )
    session.run(
-        'python',
+        "python", "-m", "sphinx", "-b", "doctest", "docs/source", "docs/build/doctest"
        '-m',
        'sphinx',
        '-b',
        'doctest',
        'docs/source',
        'docs/build/doctest'
    )
--- a/pdfminer/init.py
+++ b/pdfminer/init.py
@ -1,4 +1,4 @@
-__version__ = '20211012'
+__version__ = "20211012"
-if __name__ == '__main__':
+if __name__ == "__main__":
    print(__version__)
--- a/pdfminer/_saslprep.py
+++ b/pdfminer/_saslprep.py
@ -18,7 +18,7 @@
 """An implementation of RFC4013 SASLprep."""
-__all__ = ['saslprep']
+__all__ = ["saslprep"]
 import stringprep
 from typing import Callable, Tuple
@ -37,7 +37,8 @@ _PROHIBITED: Tuple[Callable[[str], bool], ...] = (
    stringprep.in_table_c6,
    stringprep.in_table_c7,
    stringprep.in_table_c8,
-    stringprep.in_table_c9)
+    stringprep.in_table_c9,
 )
 def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
@ -63,12 +64,12 @@ def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
    in_table_c12 = stringprep.in_table_c12
    in_table_b1 = stringprep.in_table_b1
    data = "".join(
-        ["\u0020" if in_table_c12(elt) else elt
+        ["\u0020" if in_table_c12(elt) else elt for elt in data if not in_table_b1(elt)]
-         for elt in data if not in_table_b1(elt)])
+    )
    # RFC3454 section 2, step 2 - Normalize
    # RFC4013 section 2.2 normalization
-    data = unicodedata.ucd_3_2_0.normalize('NFKC', data)
+    data = unicodedata.ucd_3_2_0.normalize("NFKC", data)
    in_table_d1 = stringprep.in_table_d1
    if in_table_d1(data[0]):
@ -89,7 +90,6 @@ def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
    # RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
    for char in data:
        if any(in_table(char) for in_table in prohibited):
-            raise ValueError(
+            raise ValueError("SASLprep: failed prohibited character check")
                "SASLprep: failed prohibited character check")
    return data
--- a/pdfminer/arcfour.py
+++ b/pdfminer/arcfour.py
@ -9,7 +9,6 @@ from typing import Sequence
 class Arcfour:
    def __init__(self, key: Sequence[int]) -> None:
        # because Py3 range is not indexable
        s = [i for i in range(256)]
@ -24,12 +23,12 @@ class Arcfour:
    def process(self, data: bytes) -> bytes:
        (i, j) = (self.i, self.j)
        s = self.s
-        r = b''
+        r = b""
        for c in iter(data):
-            i = (i+1) % 256
+            i = (i + 1) % 256
-            j = (j+s[i]) % 256
+            j = (j + s[i]) % 256
            (s[i], s[j]) = (s[j], s[i])
-            k = s[(s[i]+s[j]) % 256]
+            k = s[(s[i] + s[j]) % 256]
            r += bytes((c ^ k,))
        (self.i, self.j) = (i, j)
        return r
--- a/pdfminer/ascii85.py
+++ b/pdfminer/ascii85.py
@ -21,30 +21,30 @@ def ascii85decode(data: bytes) -> bytes:
    """
    n = b = 0
-    out = b''
+    out = b""
    for i in iter(data):
        c = bytes((i,))
-        if b'!' <= c and c <= b'u':
+        if b"!" <= c and c <= b"u":
            n += 1
-            b = b*85+(ord(c)-33)
+            b = b * 85 + (ord(c) - 33)
            if n == 5:
-                out += struct.pack('>L', b)
+                out += struct.pack(">L", b)
                n = b = 0
-        elif c == b'z':
+        elif c == b"z":
            assert n == 0, str(n)
-            out += b'\0\0\0\0'
+            out += b"\0\0\0\0"
-        elif c == b'~':
+        elif c == b"~":
            if n:
-                for _ in range(5-n):
+                for _ in range(5 - n):
-                    b = b*85+84
+                    b = b * 85 + 84
-                out += struct.pack('>L', b)[:n-1]
+                out += struct.pack(">L", b)[: n - 1]
            break
    return out
 # asciihexdecode(data)
-hex_re = re.compile(br'([a-f\d]{2})', re.IGNORECASE)
+hex_re = re.compile(rb"([a-f\d]{2})", re.IGNORECASE)
-trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
+trail_re = re.compile(rb"^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$", re.IGNORECASE)
 def asciihexdecode(data: bytes) -> bytes:
@ -57,15 +57,16 @@ def asciihexdecode(data: bytes) -> bytes:
    the EOD marker after reading an odd number of hexadecimal digits, it
    will behave as if a 0 followed the last digit.
    """
    def decode(x: bytes) -> bytes:
        i = int(x, 16)
        return bytes((i,))
-    out = b''
+    out = b""
    for x in hex_re.findall(data):
        out += decode(x)
    m = trail_re.search(data)
    if m:
-        out += decode(m.group(1)+b'0')
+        out += decode(m.group(1) + b"0")
    return out
--- a/pdfminer/ccitt.py
+++ b/pdfminer/ccitt.py
@ -12,8 +12,18 @@
 import array
-from typing import (Any, Callable, Dict, Iterator, List, MutableSequence,
+from typing import (
-                    Optional, Sequence, Union, cast)
+    Any,
    Callable,
    Dict,
    Iterator,
    List,
    MutableSequence,
    Optional,
    Sequence,
    Union,
    cast,
 )
 def get_bytes(data: bytes) -> Iterator[int]:
@ -46,7 +56,7 @@ class BitParser:
                if p[b] is None:
                    p[b] = [None, None]
                p = p[b]
-            if bits[i] == '1':
+            if bits[i] == "1":
                b = 1
            else:
                b = 0
@ -74,252 +84,252 @@ class BitParser:
 class CCITTG4Parser(BitParser):
    MODE = [None, None]
-    BitParser.add(MODE, 0,   '1')
+    BitParser.add(MODE, 0, "1")
-    BitParser.add(MODE, +1,  '011')
+    BitParser.add(MODE, +1, "011")
-    BitParser.add(MODE, -1,  '010')
+    BitParser.add(MODE, -1, "010")
-    BitParser.add(MODE, 'h', '001')
+    BitParser.add(MODE, "h", "001")
-    BitParser.add(MODE, 'p', '0001')
+    BitParser.add(MODE, "p", "0001")
-    BitParser.add(MODE, +2,  '000011')
+    BitParser.add(MODE, +2, "000011")
-    BitParser.add(MODE, -2,  '000010')
+    BitParser.add(MODE, -2, "000010")
-    BitParser.add(MODE, +3,  '0000011')
+    BitParser.add(MODE, +3, "0000011")
-    BitParser.add(MODE, -3,  '0000010')
+    BitParser.add(MODE, -3, "0000010")
-    BitParser.add(MODE, 'u', '0000001111')
+    BitParser.add(MODE, "u", "0000001111")
-    BitParser.add(MODE, 'x1', '0000001000')
+    BitParser.add(MODE, "x1", "0000001000")
-    BitParser.add(MODE, 'x2', '0000001001')
+    BitParser.add(MODE, "x2", "0000001001")
-    BitParser.add(MODE, 'x3', '0000001010')
+    BitParser.add(MODE, "x3", "0000001010")
-    BitParser.add(MODE, 'x4', '0000001011')
+    BitParser.add(MODE, "x4", "0000001011")
-    BitParser.add(MODE, 'x5', '0000001100')
+    BitParser.add(MODE, "x5", "0000001100")
-    BitParser.add(MODE, 'x6', '0000001101')
+    BitParser.add(MODE, "x6", "0000001101")
-    BitParser.add(MODE, 'x7', '0000001110')
+    BitParser.add(MODE, "x7", "0000001110")
-    BitParser.add(MODE, 'e', '000000000001000000000001')
+    BitParser.add(MODE, "e", "000000000001000000000001")
    WHITE = [None, None]
-    BitParser.add(WHITE, 0, '00110101')
+    BitParser.add(WHITE, 0, "00110101")
-    BitParser.add(WHITE, 1, '000111')
+    BitParser.add(WHITE, 1, "000111")
-    BitParser.add(WHITE, 2, '0111')
+    BitParser.add(WHITE, 2, "0111")
-    BitParser.add(WHITE, 3, '1000')
+    BitParser.add(WHITE, 3, "1000")
-    BitParser.add(WHITE, 4, '1011')
+    BitParser.add(WHITE, 4, "1011")
-    BitParser.add(WHITE, 5, '1100')
+    BitParser.add(WHITE, 5, "1100")
-    BitParser.add(WHITE, 6, '1110')
+    BitParser.add(WHITE, 6, "1110")
-    BitParser.add(WHITE, 7, '1111')
+    BitParser.add(WHITE, 7, "1111")
-    BitParser.add(WHITE, 8, '10011')
+    BitParser.add(WHITE, 8, "10011")
-    BitParser.add(WHITE, 9, '10100')
+    BitParser.add(WHITE, 9, "10100")
-    BitParser.add(WHITE, 10, '00111')
+    BitParser.add(WHITE, 10, "00111")
-    BitParser.add(WHITE, 11, '01000')
+    BitParser.add(WHITE, 11, "01000")
-    BitParser.add(WHITE, 12, '001000')
+    BitParser.add(WHITE, 12, "001000")
-    BitParser.add(WHITE, 13, '000011')
+    BitParser.add(WHITE, 13, "000011")
-    BitParser.add(WHITE, 14, '110100')
+    BitParser.add(WHITE, 14, "110100")
-    BitParser.add(WHITE, 15, '110101')
+    BitParser.add(WHITE, 15, "110101")
-    BitParser.add(WHITE, 16, '101010')
+    BitParser.add(WHITE, 16, "101010")
-    BitParser.add(WHITE, 17, '101011')
+    BitParser.add(WHITE, 17, "101011")
-    BitParser.add(WHITE, 18, '0100111')
+    BitParser.add(WHITE, 18, "0100111")
-    BitParser.add(WHITE, 19, '0001100')
+    BitParser.add(WHITE, 19, "0001100")
-    BitParser.add(WHITE, 20, '0001000')
+    BitParser.add(WHITE, 20, "0001000")
-    BitParser.add(WHITE, 21, '0010111')
+    BitParser.add(WHITE, 21, "0010111")
-    BitParser.add(WHITE, 22, '0000011')
+    BitParser.add(WHITE, 22, "0000011")
-    BitParser.add(WHITE, 23, '0000100')
+    BitParser.add(WHITE, 23, "0000100")
-    BitParser.add(WHITE, 24, '0101000')
+    BitParser.add(WHITE, 24, "0101000")
-    BitParser.add(WHITE, 25, '0101011')
+    BitParser.add(WHITE, 25, "0101011")
-    BitParser.add(WHITE, 26, '0010011')
+    BitParser.add(WHITE, 26, "0010011")
-    BitParser.add(WHITE, 27, '0100100')
+    BitParser.add(WHITE, 27, "0100100")
-    BitParser.add(WHITE, 28, '0011000')
+    BitParser.add(WHITE, 28, "0011000")
-    BitParser.add(WHITE, 29, '00000010')
+    BitParser.add(WHITE, 29, "00000010")
-    BitParser.add(WHITE, 30, '00000011')
+    BitParser.add(WHITE, 30, "00000011")
-    BitParser.add(WHITE, 31, '00011010')
+    BitParser.add(WHITE, 31, "00011010")
-    BitParser.add(WHITE, 32, '00011011')
+    BitParser.add(WHITE, 32, "00011011")
-    BitParser.add(WHITE, 33, '00010010')
+    BitParser.add(WHITE, 33, "00010010")
-    BitParser.add(WHITE, 34, '00010011')
+    BitParser.add(WHITE, 34, "00010011")
-    BitParser.add(WHITE, 35, '00010100')
+    BitParser.add(WHITE, 35, "00010100")
-    BitParser.add(WHITE, 36, '00010101')
+    BitParser.add(WHITE, 36, "00010101")
-    BitParser.add(WHITE, 37, '00010110')
+    BitParser.add(WHITE, 37, "00010110")
-    BitParser.add(WHITE, 38, '00010111')
+    BitParser.add(WHITE, 38, "00010111")
-    BitParser.add(WHITE, 39, '00101000')
+    BitParser.add(WHITE, 39, "00101000")
-    BitParser.add(WHITE, 40, '00101001')
+    BitParser.add(WHITE, 40, "00101001")
-    BitParser.add(WHITE, 41, '00101010')
+    BitParser.add(WHITE, 41, "00101010")
-    BitParser.add(WHITE, 42, '00101011')
+    BitParser.add(WHITE, 42, "00101011")
-    BitParser.add(WHITE, 43, '00101100')
+    BitParser.add(WHITE, 43, "00101100")
-    BitParser.add(WHITE, 44, '00101101')
+    BitParser.add(WHITE, 44, "00101101")
-    BitParser.add(WHITE, 45, '00000100')
+    BitParser.add(WHITE, 45, "00000100")
-    BitParser.add(WHITE, 46, '00000101')
+    BitParser.add(WHITE, 46, "00000101")
-    BitParser.add(WHITE, 47, '00001010')
+    BitParser.add(WHITE, 47, "00001010")
-    BitParser.add(WHITE, 48, '00001011')
+    BitParser.add(WHITE, 48, "00001011")
-    BitParser.add(WHITE, 49, '01010010')
+    BitParser.add(WHITE, 49, "01010010")
-    BitParser.add(WHITE, 50, '01010011')
+    BitParser.add(WHITE, 50, "01010011")
-    BitParser.add(WHITE, 51, '01010100')
+    BitParser.add(WHITE, 51, "01010100")
-    BitParser.add(WHITE, 52, '01010101')
+    BitParser.add(WHITE, 52, "01010101")
-    BitParser.add(WHITE, 53, '00100100')
+    BitParser.add(WHITE, 53, "00100100")
-    BitParser.add(WHITE, 54, '00100101')
+    BitParser.add(WHITE, 54, "00100101")
-    BitParser.add(WHITE, 55, '01011000')
+    BitParser.add(WHITE, 55, "01011000")
-    BitParser.add(WHITE, 56, '01011001')
+    BitParser.add(WHITE, 56, "01011001")
-    BitParser.add(WHITE, 57, '01011010')
+    BitParser.add(WHITE, 57, "01011010")
-    BitParser.add(WHITE, 58, '01011011')
+    BitParser.add(WHITE, 58, "01011011")
-    BitParser.add(WHITE, 59, '01001010')
+    BitParser.add(WHITE, 59, "01001010")
-    BitParser.add(WHITE, 60, '01001011')
+    BitParser.add(WHITE, 60, "01001011")
-    BitParser.add(WHITE, 61, '00110010')
+    BitParser.add(WHITE, 61, "00110010")
-    BitParser.add(WHITE, 62, '00110011')
+    BitParser.add(WHITE, 62, "00110011")
-    BitParser.add(WHITE, 63, '00110100')
+    BitParser.add(WHITE, 63, "00110100")
-    BitParser.add(WHITE, 64, '11011')
+    BitParser.add(WHITE, 64, "11011")
-    BitParser.add(WHITE, 128, '10010')
+    BitParser.add(WHITE, 128, "10010")
-    BitParser.add(WHITE, 192, '010111')
+    BitParser.add(WHITE, 192, "010111")
-    BitParser.add(WHITE, 256, '0110111')
+    BitParser.add(WHITE, 256, "0110111")
-    BitParser.add(WHITE, 320, '00110110')
+    BitParser.add(WHITE, 320, "00110110")
-    BitParser.add(WHITE, 384, '00110111')
+    BitParser.add(WHITE, 384, "00110111")
-    BitParser.add(WHITE, 448, '01100100')
+    BitParser.add(WHITE, 448, "01100100")
-    BitParser.add(WHITE, 512, '01100101')
+    BitParser.add(WHITE, 512, "01100101")
-    BitParser.add(WHITE, 576, '01101000')
+    BitParser.add(WHITE, 576, "01101000")
-    BitParser.add(WHITE, 640, '01100111')
+    BitParser.add(WHITE, 640, "01100111")
-    BitParser.add(WHITE, 704, '011001100')
+    BitParser.add(WHITE, 704, "011001100")
-    BitParser.add(WHITE, 768, '011001101')
+    BitParser.add(WHITE, 768, "011001101")
-    BitParser.add(WHITE, 832, '011010010')
+    BitParser.add(WHITE, 832, "011010010")
-    BitParser.add(WHITE, 896, '011010011')
+    BitParser.add(WHITE, 896, "011010011")
-    BitParser.add(WHITE, 960, '011010100')
+    BitParser.add(WHITE, 960, "011010100")
-    BitParser.add(WHITE, 1024, '011010101')
+    BitParser.add(WHITE, 1024, "011010101")
-    BitParser.add(WHITE, 1088, '011010110')
+    BitParser.add(WHITE, 1088, "011010110")
-    BitParser.add(WHITE, 1152, '011010111')
+    BitParser.add(WHITE, 1152, "011010111")
-    BitParser.add(WHITE, 1216, '011011000')
+    BitParser.add(WHITE, 1216, "011011000")
-    BitParser.add(WHITE, 1280, '011011001')
+    BitParser.add(WHITE, 1280, "011011001")
-    BitParser.add(WHITE, 1344, '011011010')
+    BitParser.add(WHITE, 1344, "011011010")
-    BitParser.add(WHITE, 1408, '011011011')
+    BitParser.add(WHITE, 1408, "011011011")
-    BitParser.add(WHITE, 1472, '010011000')
+    BitParser.add(WHITE, 1472, "010011000")
-    BitParser.add(WHITE, 1536, '010011001')
+    BitParser.add(WHITE, 1536, "010011001")
-    BitParser.add(WHITE, 1600, '010011010')
+    BitParser.add(WHITE, 1600, "010011010")
-    BitParser.add(WHITE, 1664, '011000')
+    BitParser.add(WHITE, 1664, "011000")
-    BitParser.add(WHITE, 1728, '010011011')
+    BitParser.add(WHITE, 1728, "010011011")
-    BitParser.add(WHITE, 1792, '00000001000')
+    BitParser.add(WHITE, 1792, "00000001000")
-    BitParser.add(WHITE, 1856, '00000001100')
+    BitParser.add(WHITE, 1856, "00000001100")
-    BitParser.add(WHITE, 1920, '00000001101')
+    BitParser.add(WHITE, 1920, "00000001101")
-    BitParser.add(WHITE, 1984, '000000010010')
+    BitParser.add(WHITE, 1984, "000000010010")
-    BitParser.add(WHITE, 2048, '000000010011')
+    BitParser.add(WHITE, 2048, "000000010011")
-    BitParser.add(WHITE, 2112, '000000010100')
+    BitParser.add(WHITE, 2112, "000000010100")
-    BitParser.add(WHITE, 2176, '000000010101')
+    BitParser.add(WHITE, 2176, "000000010101")
-    BitParser.add(WHITE, 2240, '000000010110')
+    BitParser.add(WHITE, 2240, "000000010110")
-    BitParser.add(WHITE, 2304, '000000010111')
+    BitParser.add(WHITE, 2304, "000000010111")
-    BitParser.add(WHITE, 2368, '000000011100')
+    BitParser.add(WHITE, 2368, "000000011100")
-    BitParser.add(WHITE, 2432, '000000011101')
+    BitParser.add(WHITE, 2432, "000000011101")
-    BitParser.add(WHITE, 2496, '000000011110')
+    BitParser.add(WHITE, 2496, "000000011110")
-    BitParser.add(WHITE, 2560, '000000011111')
+    BitParser.add(WHITE, 2560, "000000011111")
    BLACK = [None, None]
-    BitParser.add(BLACK, 0, '0000110111')
+    BitParser.add(BLACK, 0, "0000110111")
-    BitParser.add(BLACK, 1, '010')
+    BitParser.add(BLACK, 1, "010")
-    BitParser.add(BLACK, 2, '11')
+    BitParser.add(BLACK, 2, "11")
-    BitParser.add(BLACK, 3, '10')
+    BitParser.add(BLACK, 3, "10")
-    BitParser.add(BLACK, 4, '011')
+    BitParser.add(BLACK, 4, "011")
-    BitParser.add(BLACK, 5, '0011')
+    BitParser.add(BLACK, 5, "0011")
-    BitParser.add(BLACK, 6, '0010')
+    BitParser.add(BLACK, 6, "0010")
-    BitParser.add(BLACK, 7, '00011')
+    BitParser.add(BLACK, 7, "00011")
-    BitParser.add(BLACK, 8, '000101')
+    BitParser.add(BLACK, 8, "000101")
-    BitParser.add(BLACK, 9, '000100')
+    BitParser.add(BLACK, 9, "000100")
-    BitParser.add(BLACK, 10, '0000100')
+    BitParser.add(BLACK, 10, "0000100")
-    BitParser.add(BLACK, 11, '0000101')
+    BitParser.add(BLACK, 11, "0000101")
-    BitParser.add(BLACK, 12, '0000111')
+    BitParser.add(BLACK, 12, "0000111")
-    BitParser.add(BLACK, 13, '00000100')
+    BitParser.add(BLACK, 13, "00000100")
-    BitParser.add(BLACK, 14, '00000111')
+    BitParser.add(BLACK, 14, "00000111")
-    BitParser.add(BLACK, 15, '000011000')
+    BitParser.add(BLACK, 15, "000011000")
-    BitParser.add(BLACK, 16, '0000010111')
+    BitParser.add(BLACK, 16, "0000010111")
-    BitParser.add(BLACK, 17, '0000011000')
+    BitParser.add(BLACK, 17, "0000011000")
-    BitParser.add(BLACK, 18, '0000001000')
+    BitParser.add(BLACK, 18, "0000001000")
-    BitParser.add(BLACK, 19, '00001100111')
+    BitParser.add(BLACK, 19, "00001100111")
-    BitParser.add(BLACK, 20, '00001101000')
+    BitParser.add(BLACK, 20, "00001101000")
-    BitParser.add(BLACK, 21, '00001101100')
+    BitParser.add(BLACK, 21, "00001101100")
-    BitParser.add(BLACK, 22, '00000110111')
+    BitParser.add(BLACK, 22, "00000110111")
-    BitParser.add(BLACK, 23, '00000101000')
+    BitParser.add(BLACK, 23, "00000101000")
-    BitParser.add(BLACK, 24, '00000010111')
+    BitParser.add(BLACK, 24, "00000010111")
-    BitParser.add(BLACK, 25, '00000011000')
+    BitParser.add(BLACK, 25, "00000011000")
-    BitParser.add(BLACK, 26, '000011001010')
+    BitParser.add(BLACK, 26, "000011001010")
-    BitParser.add(BLACK, 27, '000011001011')
+    BitParser.add(BLACK, 27, "000011001011")
-    BitParser.add(BLACK, 28, '000011001100')
+    BitParser.add(BLACK, 28, "000011001100")
-    BitParser.add(BLACK, 29, '000011001101')
+    BitParser.add(BLACK, 29, "000011001101")
-    BitParser.add(BLACK, 30, '000001101000')
+    BitParser.add(BLACK, 30, "000001101000")
-    BitParser.add(BLACK, 31, '000001101001')
+    BitParser.add(BLACK, 31, "000001101001")
-    BitParser.add(BLACK, 32, '000001101010')
+    BitParser.add(BLACK, 32, "000001101010")
-    BitParser.add(BLACK, 33, '000001101011')
+    BitParser.add(BLACK, 33, "000001101011")
-    BitParser.add(BLACK, 34, '000011010010')
+    BitParser.add(BLACK, 34, "000011010010")
-    BitParser.add(BLACK, 35, '000011010011')
+    BitParser.add(BLACK, 35, "000011010011")
-    BitParser.add(BLACK, 36, '000011010100')
+    BitParser.add(BLACK, 36, "000011010100")
-    BitParser.add(BLACK, 37, '000011010101')
+    BitParser.add(BLACK, 37, "000011010101")
-    BitParser.add(BLACK, 38, '000011010110')
+    BitParser.add(BLACK, 38, "000011010110")
-    BitParser.add(BLACK, 39, '000011010111')
+    BitParser.add(BLACK, 39, "000011010111")
-    BitParser.add(BLACK, 40, '000001101100')
+    BitParser.add(BLACK, 40, "000001101100")
-    BitParser.add(BLACK, 41, '000001101101')
+    BitParser.add(BLACK, 41, "000001101101")
-    BitParser.add(BLACK, 42, '000011011010')
+    BitParser.add(BLACK, 42, "000011011010")
-    BitParser.add(BLACK, 43, '000011011011')
+    BitParser.add(BLACK, 43, "000011011011")
-    BitParser.add(BLACK, 44, '000001010100')
+    BitParser.add(BLACK, 44, "000001010100")
-    BitParser.add(BLACK, 45, '000001010101')
+    BitParser.add(BLACK, 45, "000001010101")
-    BitParser.add(BLACK, 46, '000001010110')
+    BitParser.add(BLACK, 46, "000001010110")
-    BitParser.add(BLACK, 47, '000001010111')
+    BitParser.add(BLACK, 47, "000001010111")
-    BitParser.add(BLACK, 48, '000001100100')
+    BitParser.add(BLACK, 48, "000001100100")
-    BitParser.add(BLACK, 49, '000001100101')
+    BitParser.add(BLACK, 49, "000001100101")
-    BitParser.add(BLACK, 50, '000001010010')
+    BitParser.add(BLACK, 50, "000001010010")
-    BitParser.add(BLACK, 51, '000001010011')
+    BitParser.add(BLACK, 51, "000001010011")
-    BitParser.add(BLACK, 52, '000000100100')
+    BitParser.add(BLACK, 52, "000000100100")
-    BitParser.add(BLACK, 53, '000000110111')
+    BitParser.add(BLACK, 53, "000000110111")
-    BitParser.add(BLACK, 54, '000000111000')
+    BitParser.add(BLACK, 54, "000000111000")
-    BitParser.add(BLACK, 55, '000000100111')
+    BitParser.add(BLACK, 55, "000000100111")
-    BitParser.add(BLACK, 56, '000000101000')
+    BitParser.add(BLACK, 56, "000000101000")
-    BitParser.add(BLACK, 57, '000001011000')
+    BitParser.add(BLACK, 57, "000001011000")
-    BitParser.add(BLACK, 58, '000001011001')
+    BitParser.add(BLACK, 58, "000001011001")
-    BitParser.add(BLACK, 59, '000000101011')
+    BitParser.add(BLACK, 59, "000000101011")
-    BitParser.add(BLACK, 60, '000000101100')
+    BitParser.add(BLACK, 60, "000000101100")
-    BitParser.add(BLACK, 61, '000001011010')
+    BitParser.add(BLACK, 61, "000001011010")
-    BitParser.add(BLACK, 62, '000001100110')
+    BitParser.add(BLACK, 62, "000001100110")
-    BitParser.add(BLACK, 63, '000001100111')
+    BitParser.add(BLACK, 63, "000001100111")
-    BitParser.add(BLACK, 64, '0000001111')
+    BitParser.add(BLACK, 64, "0000001111")
-    BitParser.add(BLACK, 128, '000011001000')
+    BitParser.add(BLACK, 128, "000011001000")
-    BitParser.add(BLACK, 192, '000011001001')
+    BitParser.add(BLACK, 192, "000011001001")
-    BitParser.add(BLACK, 256, '000001011011')
+    BitParser.add(BLACK, 256, "000001011011")
-    BitParser.add(BLACK, 320, '000000110011')
+    BitParser.add(BLACK, 320, "000000110011")
-    BitParser.add(BLACK, 384, '000000110100')
+    BitParser.add(BLACK, 384, "000000110100")
-    BitParser.add(BLACK, 448, '000000110101')
+    BitParser.add(BLACK, 448, "000000110101")
-    BitParser.add(BLACK, 512, '0000001101100')
+    BitParser.add(BLACK, 512, "0000001101100")
-    BitParser.add(BLACK, 576, '0000001101101')
+    BitParser.add(BLACK, 576, "0000001101101")
-    BitParser.add(BLACK, 640, '0000001001010')
+    BitParser.add(BLACK, 640, "0000001001010")
-    BitParser.add(BLACK, 704, '0000001001011')
+    BitParser.add(BLACK, 704, "0000001001011")
-    BitParser.add(BLACK, 768, '0000001001100')
+    BitParser.add(BLACK, 768, "0000001001100")
-    BitParser.add(BLACK, 832, '0000001001101')
+    BitParser.add(BLACK, 832, "0000001001101")
-    BitParser.add(BLACK, 896, '0000001110010')
+    BitParser.add(BLACK, 896, "0000001110010")
-    BitParser.add(BLACK, 960, '0000001110011')
+    BitParser.add(BLACK, 960, "0000001110011")
-    BitParser.add(BLACK, 1024, '0000001110100')
+    BitParser.add(BLACK, 1024, "0000001110100")
-    BitParser.add(BLACK, 1088, '0000001110101')
+    BitParser.add(BLACK, 1088, "0000001110101")
-    BitParser.add(BLACK, 1152, '0000001110110')
+    BitParser.add(BLACK, 1152, "0000001110110")
-    BitParser.add(BLACK, 1216, '0000001110111')
+    BitParser.add(BLACK, 1216, "0000001110111")
-    BitParser.add(BLACK, 1280, '0000001010010')
+    BitParser.add(BLACK, 1280, "0000001010010")
-    BitParser.add(BLACK, 1344, '0000001010011')
+    BitParser.add(BLACK, 1344, "0000001010011")
-    BitParser.add(BLACK, 1408, '0000001010100')
+    BitParser.add(BLACK, 1408, "0000001010100")
-    BitParser.add(BLACK, 1472, '0000001010101')
+    BitParser.add(BLACK, 1472, "0000001010101")
-    BitParser.add(BLACK, 1536, '0000001011010')
+    BitParser.add(BLACK, 1536, "0000001011010")
-    BitParser.add(BLACK, 1600, '0000001011011')
+    BitParser.add(BLACK, 1600, "0000001011011")
-    BitParser.add(BLACK, 1664, '0000001100100')
+    BitParser.add(BLACK, 1664, "0000001100100")
-    BitParser.add(BLACK, 1728, '0000001100101')
+    BitParser.add(BLACK, 1728, "0000001100101")
-    BitParser.add(BLACK, 1792, '00000001000')
+    BitParser.add(BLACK, 1792, "00000001000")
-    BitParser.add(BLACK, 1856, '00000001100')
+    BitParser.add(BLACK, 1856, "00000001100")
-    BitParser.add(BLACK, 1920, '00000001101')
+    BitParser.add(BLACK, 1920, "00000001101")
-    BitParser.add(BLACK, 1984, '000000010010')
+    BitParser.add(BLACK, 1984, "000000010010")
-    BitParser.add(BLACK, 2048, '000000010011')
+    BitParser.add(BLACK, 2048, "000000010011")
-    BitParser.add(BLACK, 2112, '000000010100')
+    BitParser.add(BLACK, 2112, "000000010100")
-    BitParser.add(BLACK, 2176, '000000010101')
+    BitParser.add(BLACK, 2176, "000000010101")
-    BitParser.add(BLACK, 2240, '000000010110')
+    BitParser.add(BLACK, 2240, "000000010110")
-    BitParser.add(BLACK, 2304, '000000010111')
+    BitParser.add(BLACK, 2304, "000000010111")
-    BitParser.add(BLACK, 2368, '000000011100')
+    BitParser.add(BLACK, 2368, "000000011100")
-    BitParser.add(BLACK, 2432, '000000011101')
+    BitParser.add(BLACK, 2432, "000000011101")
-    BitParser.add(BLACK, 2496, '000000011110')
+    BitParser.add(BLACK, 2496, "000000011110")
-    BitParser.add(BLACK, 2560, '000000011111')
+    BitParser.add(BLACK, 2560, "000000011111")
    UNCOMPRESSED = [None, None]
-    BitParser.add(UNCOMPRESSED, '1', '1')
+    BitParser.add(UNCOMPRESSED, "1", "1")
-    BitParser.add(UNCOMPRESSED, '01', '01')
+    BitParser.add(UNCOMPRESSED, "01", "01")
-    BitParser.add(UNCOMPRESSED, '001', '001')
+    BitParser.add(UNCOMPRESSED, "001", "001")
-    BitParser.add(UNCOMPRESSED, '0001', '0001')
+    BitParser.add(UNCOMPRESSED, "0001", "0001")
-    BitParser.add(UNCOMPRESSED, '00001', '00001')
+    BitParser.add(UNCOMPRESSED, "00001", "00001")
-    BitParser.add(UNCOMPRESSED, '00000', '000001')
+    BitParser.add(UNCOMPRESSED, "00000", "000001")
-    BitParser.add(UNCOMPRESSED, 'T00', '00000011')
+    BitParser.add(UNCOMPRESSED, "T00", "00000011")
-    BitParser.add(UNCOMPRESSED, 'T10', '00000010')
+    BitParser.add(UNCOMPRESSED, "T10", "00000010")
-    BitParser.add(UNCOMPRESSED, 'T000', '000000011')
+    BitParser.add(UNCOMPRESSED, "T000", "000000011")
-    BitParser.add(UNCOMPRESSED, 'T100', '000000010')
+    BitParser.add(UNCOMPRESSED, "T100", "000000010")
-    BitParser.add(UNCOMPRESSED, 'T0000', '0000000011')
+    BitParser.add(UNCOMPRESSED, "T0000", "0000000011")
-    BitParser.add(UNCOMPRESSED, 'T1000', '0000000010')
+    BitParser.add(UNCOMPRESSED, "T1000", "0000000010")
-    BitParser.add(UNCOMPRESSED, 'T00000', '00000000011')
+    BitParser.add(UNCOMPRESSED, "T00000", "00000000011")
-    BitParser.add(UNCOMPRESSED, 'T10000', '00000000010')
+    BitParser.add(UNCOMPRESSED, "T10000", "00000000010")
    class EOFB(Exception):
        pass
@ -352,21 +362,21 @@ class CCITTG4Parser(BitParser):
        return
    def _parse_mode(self, mode: object) -> BitParserState:
-        if mode == 'p':
+        if mode == "p":
            self._do_pass()
            self._flush_line()
            return self.MODE
-        elif mode == 'h':
+        elif mode == "h":
            self._n1 = 0
            self._accept = self._parse_horiz1
            if self._color:
                return self.WHITE
            else:
                return self.BLACK
-        elif mode == 'u':
+        elif mode == "u":
            self._accept = self._parse_uncompressed
            return self.UNCOMPRESSED
-        elif mode == 'e':
+        elif mode == "e":
            raise self.EOFB
        elif isinstance(mode, int):
            self._do_vertical(mode)
@ -381,7 +391,7 @@ class CCITTG4Parser(BitParser):
        self._n1 += n
        if n < 64:
            self._n2 = 0
-            self._color = 1-self._color
+            self._color = 1 - self._color
            self._accept = self._parse_horiz2
        if self._color:
            return self.WHITE
@ -393,7 +403,7 @@ class CCITTG4Parser(BitParser):
            raise self.InvalidData
        self._n2 += n
        if n < 64:
-            self._color = 1-self._color
+            self._color = 1 - self._color
            self._accept = self._parse_mode
            self._do_horizontal(self._n1, self._n2)
            self._flush_line()
@ -406,7 +416,7 @@ class CCITTG4Parser(BitParser):
    def _parse_uncompressed(self, bits: Optional[str]) -> BitParserState:
        if not bits:
            raise self.InvalidData
-        if bits.startswith('T'):
+        if bits.startswith("T"):
            self._accept = self._parse_mode
            self._color = int(bits[1])
            self._do_uncompressed(bits[2:])
@ -416,33 +426,37 @@ class CCITTG4Parser(BitParser):
            return self.UNCOMPRESSED
    def _get_bits(self) -> str:
-        return ''.join(str(b) for b in self._curline[:self._curpos])
+        return "".join(str(b) for b in self._curline[: self._curpos])
    def _get_refline(self, i: int) -> str:
        if i < 0:
-            return '[]'+''.join(str(b) for b in self._refline)
+            return "[]" + "".join(str(b) for b in self._refline)
        elif len(self._refline) <= i:
-            return ''.join(str(b) for b in self._refline)+'[]'
+            return "".join(str(b) for b in self._refline) + "[]"
        else:
-            return (''.join(str(b) for b in self._refline[:i]) +
+            return (
-                    '['+str(self._refline[i])+']' +
+                "".join(str(b) for b in self._refline[:i])
-                    ''.join(str(b) for b in self._refline[i+1:]))
+                + "["
                + str(self._refline[i])
                + "]"
                + "".join(str(b) for b in self._refline[i + 1 :])
            )
    def reset(self) -> None:
        self._y = 0
-        self._curline = array.array('b', [1]*self.width)
+        self._curline = array.array("b", [1] * self.width)
        self._reset_line()
        self._accept = self._parse_mode
        self._state = self.MODE
        return
    def output_line(self, y: int, bits: Sequence[int]) -> None:
-        print(y, ''.join(str(b) for b in bits))
+        print(y, "".join(str(b) for b in bits))
        return
    def _reset_line(self) -> None:
        self._refline = self._curline
-        self._curline = array.array('b', [1]*self.width)
+        self._curline = array.array("b", [1] * self.width)
        self._curpos = -1
        self._color = 1
        return
@ -457,15 +471,17 @@ class CCITTG4Parser(BitParser):
        return
    def _do_vertical(self, dx: int) -> None:
-        x1 = self._curpos+1
+        x1 = self._curpos + 1
        while 1:
            if x1 == 0:
-                if (self._color == 1 and self._refline[x1] != self._color):
+                if self._color == 1 and self._refline[x1] != self._color:
                    break
            elif x1 == len(self._refline):
                break
-            elif (self._refline[x1-1] == self._color and
+            elif (
-                  self._refline[x1] != self._color):
+                self._refline[x1 - 1] == self._color
                and self._refline[x1] != self._color
            ):
                break
            x1 += 1
        x1 += dx
@ -478,29 +494,33 @@ class CCITTG4Parser(BitParser):
            for x in range(x0, x1):
                self._curline[x] = self._color
        self._curpos = x1
-        self._color = 1-self._color
+        self._color = 1 - self._color
        return
    def _do_pass(self) -> None:
-        x1 = self._curpos+1
+        x1 = self._curpos + 1
        while 1:
            if x1 == 0:
-                if (self._color == 1 and self._refline[x1] != self._color):
+                if self._color == 1 and self._refline[x1] != self._color:
                    break
            elif x1 == len(self._refline):
                break
-            elif (self._refline[x1-1] == self._color and
+            elif (
-                  self._refline[x1] != self._color):
+                self._refline[x1 - 1] == self._color
                and self._refline[x1] != self._color
            ):
                break
            x1 += 1
        while 1:
            if x1 == 0:
-                if (self._color == 0 and self._refline[x1] == self._color):
+                if self._color == 0 and self._refline[x1] == self._color:
                    break
            elif x1 == len(self._refline):
                break
-            elif (self._refline[x1-1] != self._color and
+            elif (
-                  self._refline[x1] == self._color):
+                self._refline[x1 - 1] != self._color
                and self._refline[x1] == self._color
            ):
                break
            x1 += 1
        for x in range(self._curpos, x1):
@ -520,7 +540,7 @@ class CCITTG4Parser(BitParser):
        for _ in range(n2):
            if len(self._curline) <= x:
                break
-            self._curline[x] = 1-self._color
+            self._curline[x] = 1 - self._color
            x += 1
        self._curpos = x
        return
@ -534,34 +554,34 @@ class CCITTG4Parser(BitParser):
 class CCITTFaxDecoder(CCITTG4Parser):
-
+    def __init__(
-    def __init__(self, width: int, bytealign: bool = False,
+        self, width: int, bytealign: bool = False, reversed: bool = False
-                 reversed: bool = False) -> None:
+    ) -> None:
        CCITTG4Parser.__init__(self, width, bytealign=bytealign)
        self.reversed = reversed
-        self._buf = b''
+        self._buf = b""
        return
    def close(self) -> bytes:
        return self._buf
    def output_line(self, y: int, bits: Sequence[int]) -> None:
-        arr = array.array('B', [0]*((len(bits)+7)//8))
+        arr = array.array("B", [0] * ((len(bits) + 7) // 8))
        if self.reversed:
-            bits = [1-b for b in bits]
+            bits = [1 - b for b in bits]
        for (i, b) in enumerate(bits):
            if b:
-                arr[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
+                arr[i // 8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
        self._buf += arr.tobytes()
        return
 def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes:
-    K = params.get('K')
+    K = params.get("K")
    if K == -1:
-        cols = cast(int, params.get('Columns'))
+        cols = cast(int, params.get("Columns"))
-        bytealign = cast(bool, params.get('EncodedByteAlign'))
+        bytealign = cast(bool, params.get("EncodedByteAlign"))
-        reversed = cast(bool, params.get('BlackIs1'))
+        reversed = cast(bool, params.get("BlackIs1"))
        parser = CCITTFaxDecoder(cols, bytealign=bytealign, reversed=reversed)
    else:
        raise ValueError(K)
@ -573,12 +593,14 @@ def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes:
 def main(argv: List[str]) -> None:
    if not argv[1:]:
        import unittest
        unittest.main()
        return
    class Parser(CCITTG4Parser):
        def __init__(self, width: int, bytealign: bool = False) -> None:
            import pygame  # type: ignore[import]
            CCITTG4Parser.__init__(self, width, bytealign=bytealign)
            self.img = pygame.Surface((self.width, 1000))
            return
@ -593,11 +615,13 @@ def main(argv: List[str]) -> None:
        def close(self) -> None:
            import pygame
-            pygame.image.save(self.img, 'out.bmp')
+
            pygame.image.save(self.img, "out.bmp")
            return
    for path in argv[1:]:
-        fp = open(path, 'rb')
+        fp = open(path, "rb")
-        (_, _, k, w, h, _) = path.split('.')
+        (_, _, k, w, h, _) = path.split(".")
        parser = Parser(int(w))
        parser.feedbytes(fp.read())
        parser.close()
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@ -16,8 +16,20 @@ import os.path
 import pickle as pickle
 import struct
 import sys
-from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List,
+from typing import (
-                    MutableMapping, Optional, TextIO, Tuple, Union, cast)
+    Any,
    BinaryIO,
    Dict,
    Iterable,
    Iterator,
    List,
    MutableMapping,
    Optional,
    TextIO,
    Tuple,
    Union,
    cast,
 )
 from .encodingdb import name2unicode
 from .psparser import KWD
@ -45,7 +57,7 @@ class CMapBase:
        self.attrs: MutableMapping[str, object] = kwargs.copy()
    def is_vertical(self) -> bool:
-        return self.attrs.get('WMode', 0) != 0
+        return self.attrs.get("WMode", 0) != 0
    def set_attr(self, k: str, v: object) -> None:
        self.attrs[k] = v
@ -53,8 +65,7 @@ class CMapBase:
    def add_code2cid(self, code: str, cid: int) -> None:
        pass
-    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]) -> None:
                       ) -> None:
        pass
    def use_cmap(self, cmap: "CMapBase") -> None:
@ -65,13 +76,12 @@ class CMapBase:
 class CMap(CMapBase):
    def __init__(self, **kwargs: Union[str, int]) -> None:
        CMapBase.__init__(self, **kwargs)
        self.code2cid: Dict[int, object] = {}
    def __repr__(self) -> str:
-        return '<CMap: %s>' % self.attrs.get('CMapName')
+        return "<CMap: %s>" % self.attrs.get("CMapName")
    def use_cmap(self, cmap: CMapBase) -> None:
        assert isinstance(cmap, CMap), str(type(cmap))
@ -84,10 +94,11 @@ class CMap(CMapBase):
                    copy(d, v)
                else:
                    dst[k] = v
        copy(self.code2cid, cmap.code2cid)
    def decode(self, code: bytes) -> Iterator[int]:
-        log.debug('decode: %r, %r', self, code)
+        log.debug("decode: %r, %r", self, code)
        d = self.code2cid
        for i in iter(code):
            if i in d:
@ -100,70 +111,70 @@ class CMap(CMapBase):
            else:
                d = self.code2cid
-    def dump(self, out: TextIO = sys.stdout,
+    def dump(
-             code2cid: Optional[Dict[int, object]] = None,
+        self,
-             code: Tuple[int, ...] = ()) -> None:
+        out: TextIO = sys.stdout,
        code2cid: Optional[Dict[int, object]] = None,
        code: Tuple[int, ...] = (),
    ) -> None:
        if code2cid is None:
            code2cid = self.code2cid
            code = ()
        for (k, v) in sorted(code2cid.items()):
-            c = code+(k,)
+            c = code + (k,)
            if isinstance(v, int):
-                out.write('code %r = cid %d\n' % (c, v))
+                out.write("code %r = cid %d\n" % (c, v))
            else:
                self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c)
 class IdentityCMap(CMapBase):
    def decode(self, code: bytes) -> Tuple[int, ...]:
-        n = len(code)//2
+        n = len(code) // 2
        if n:
-            return struct.unpack('>%dH' % n, code)
+            return struct.unpack(">%dH" % n, code)
        else:
            return ()
 class IdentityCMapByte(IdentityCMap):
    def decode(self, code: bytes) -> Tuple[int, ...]:
        n = len(code)
        if n:
-            return struct.unpack('>%dB' % n, code)
+            return struct.unpack(">%dB" % n, code)
        else:
            return ()
 class UnicodeMap(CMapBase):
    def __init__(self, **kwargs: Union[str, int]) -> None:
        CMapBase.__init__(self, **kwargs)
        self.cid2unichr: Dict[int, str] = {}
    def __repr__(self) -> str:
-        return '<UnicodeMap: %s>' % self.attrs.get('CMapName')
+        return "<UnicodeMap: %s>" % self.attrs.get("CMapName")
    def get_unichr(self, cid: int) -> str:
-        log.debug('get_unichr: %r, %r', self, cid)
+        log.debug("get_unichr: %r, %r", self, cid)
        return self.cid2unichr[cid]
    def dump(self, out: TextIO = sys.stdout) -> None:
        for (k, v) in sorted(self.cid2unichr.items()):
-            out.write('cid %d = unicode %r\n' % (k, v))
+            out.write("cid %d = unicode %r\n" % (k, v))
 class IdentityUnicodeMap(UnicodeMap):
    def get_unichr(self, cid: int) -> str:
        """Interpret character id as unicode codepoint"""
-        log.debug('get_unichr: %r, %r', self, cid)
+        log.debug("get_unichr: %r, %r", self, cid)
        return chr(cid)
 class FileCMap(CMap):
    def add_code2cid(self, code: str, cid: int) -> None:
-        assert isinstance(code, str) and isinstance(cid, int),\
+        assert isinstance(code, str) and isinstance(cid, int), str(
-            str((type(code), type(cid)))
+            (type(code), type(cid))
        )
        d = self.code2cid
        for c in code[:-1]:
            ci = ord(c)
@ -178,9 +189,7 @@ class FileCMap(CMap):
 class FileUnicodeMap(UnicodeMap):
-
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]) -> None:
    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
                       ) -> None:
        assert isinstance(cid, int), str(type(cid))
        if isinstance(code, PSLiteral):
            # Interpret as an Adobe glyph name.
@ -188,7 +197,7 @@ class FileUnicodeMap(UnicodeMap):
            self.cid2unichr[cid] = name2unicode(code.name)
        elif isinstance(code, bytes):
            # Interpret as UTF-16BE.
-            self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore')
+            self.cid2unichr[cid] = code.decode("UTF-16BE", "ignore")
        elif isinstance(code, int):
            self.cid2unichr[cid] = chr(code)
        else:
@ -196,21 +205,19 @@ class FileUnicodeMap(UnicodeMap):
 class PyCMap(CMap):
    def __init__(self, name: str, module: Any) -> None:
        super().__init__(CMapName=name)
        self.code2cid = module.CODE2CID
        if module.IS_VERTICAL:
-            self.attrs['WMode'] = 1
+            self.attrs["WMode"] = 1
 class PyUnicodeMap(UnicodeMap):
    def __init__(self, name: str, module: Any, vertical: bool) -> None:
        super().__init__(CMapName=name)
        if vertical:
            self.cid2unichr = module.CID2UNICHR_V
-            self.attrs['WMode'] = 1
+            self.attrs["WMode"] = 1
        else:
            self.cid2unichr = module.CID2UNICHR_H
@ -226,10 +233,12 @@ class CMapDB:
    @classmethod
    def _load_data(cls, name: str) -> Any:
        name = name.replace("\0", "")
-        filename = '%s.pickle.gz' % name
+        filename = "%s.pickle.gz" % name
-        log.debug('loading: %r', name)
+        log.debug("loading: %r", name)
-        cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'),
+        cmap_paths = (
-                      os.path.join(os.path.dirname(__file__), 'cmap'),)
+            os.environ.get("CMAP_PATH", "/usr/share/pdfminer/"),
            os.path.join(os.path.dirname(__file__), "cmap"),
        )
        for directory in cmap_paths:
            path = os.path.join(directory, filename)
            if os.path.exists(path):
@ -243,13 +252,13 @@ class CMapDB:
    @classmethod
    def get_cmap(cls, name: str) -> CMapBase:
-        if name == 'Identity-H':
+        if name == "Identity-H":
            return IdentityCMap(WMode=0)
-        elif name == 'Identity-V':
+        elif name == "Identity-V":
            return IdentityCMap(WMode=1)
-        elif name == 'OneByteIdentityH':
+        elif name == "OneByteIdentityH":
            return IdentityCMapByte(WMode=0)
-        elif name == 'OneByteIdentityV':
+        elif name == "OneByteIdentityV":
            return IdentityCMapByte(WMode=1)
        try:
            return cls._cmap_cache[name]
@ -265,14 +274,12 @@ class CMapDB:
            return cls._umap_cache[name][vertical]
        except KeyError:
            pass
-        data = cls._load_data('to-unicode-%s' % name)
+        data = cls._load_data("to-unicode-%s" % name)
-        cls._umap_cache[name] = [PyUnicodeMap(name, data, v)
+        cls._umap_cache[name] = [PyUnicodeMap(name, data, v) for v in (False, True)]
                                 for v in (False, True)]
        return cls._umap_cache[name][vertical]
 class CMapParser(PSStackParser[PSKeyword]):
    def __init__(self, cmap: CMapBase, fp: BinaryIO) -> None:
        PSStackParser.__init__(self, fp)
        self.cmap = cmap
@ -287,22 +294,22 @@ class CMapParser(PSStackParser[PSKeyword]):
            pass
        return
-    KEYWORD_BEGINCMAP = KWD(b'begincmap')
+    KEYWORD_BEGINCMAP = KWD(b"begincmap")
-    KEYWORD_ENDCMAP = KWD(b'endcmap')
+    KEYWORD_ENDCMAP = KWD(b"endcmap")
-    KEYWORD_USECMAP = KWD(b'usecmap')
+    KEYWORD_USECMAP = KWD(b"usecmap")
-    KEYWORD_DEF = KWD(b'def')
+    KEYWORD_DEF = KWD(b"def")
-    KEYWORD_BEGINCODESPACERANGE = KWD(b'begincodespacerange')
+    KEYWORD_BEGINCODESPACERANGE = KWD(b"begincodespacerange")
-    KEYWORD_ENDCODESPACERANGE = KWD(b'endcodespacerange')
+    KEYWORD_ENDCODESPACERANGE = KWD(b"endcodespacerange")
-    KEYWORD_BEGINCIDRANGE = KWD(b'begincidrange')
+    KEYWORD_BEGINCIDRANGE = KWD(b"begincidrange")
-    KEYWORD_ENDCIDRANGE = KWD(b'endcidrange')
+    KEYWORD_ENDCIDRANGE = KWD(b"endcidrange")
-    KEYWORD_BEGINCIDCHAR = KWD(b'begincidchar')
+    KEYWORD_BEGINCIDCHAR = KWD(b"begincidchar")
-    KEYWORD_ENDCIDCHAR = KWD(b'endcidchar')
+    KEYWORD_ENDCIDCHAR = KWD(b"endcidchar")
-    KEYWORD_BEGINBFRANGE = KWD(b'beginbfrange')
+    KEYWORD_BEGINBFRANGE = KWD(b"beginbfrange")
-    KEYWORD_ENDBFRANGE = KWD(b'endbfrange')
+    KEYWORD_ENDBFRANGE = KWD(b"endbfrange")
-    KEYWORD_BEGINBFCHAR = KWD(b'beginbfchar')
+    KEYWORD_BEGINBFCHAR = KWD(b"beginbfchar")
-    KEYWORD_ENDBFCHAR = KWD(b'endbfchar')
+    KEYWORD_ENDBFCHAR = KWD(b"endbfchar")
-    KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange')
+    KEYWORD_BEGINNOTDEFRANGE = KWD(b"beginnotdefrange")
-    KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange')
+    KEYWORD_ENDNOTDEFRANGE = KWD(b"endnotdefrange")
    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        if token is self.KEYWORD_BEGINCMAP:
@ -346,8 +353,12 @@ class CMapParser(PSStackParser[PSKeyword]):
        if token is self.KEYWORD_ENDCIDRANGE:
            objs = [obj for (__, obj) in self.popall()]
            for (s, e, cid) in choplist(3, objs):
-                if (not isinstance(s, bytes) or not isinstance(e, bytes) or
+                if (
-                   not isinstance(cid, int) or len(s) != len(e)):
+                    not isinstance(s, bytes)
                    or not isinstance(e, bytes)
                    or not isinstance(cid, int)
                    or len(s) != len(e)
                ):
                    continue
                sprefix = s[:-4]
                eprefix = e[:-4]
@ -358,9 +369,9 @@ class CMapParser(PSStackParser[PSKeyword]):
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
-                for i in range(e1-s1+1):
+                for i in range(e1 - s1 + 1):
-                    x = sprefix+struct.pack('>L', s1+i)[-vlen:]
+                    x = sprefix + struct.pack(">L", s1 + i)[-vlen:]
-                    self.cmap.add_cid2unichr(cid+i, x)
+                    self.cmap.add_cid2unichr(cid + i, x)
            return
        if token is self.KEYWORD_BEGINCIDCHAR:
@ -379,23 +390,26 @@ class CMapParser(PSStackParser[PSKeyword]):
        if token is self.KEYWORD_ENDBFRANGE:
            objs = [obj for (__, obj) in self.popall()]
            for (s, e, code) in choplist(3, objs):
-                if (not isinstance(s, bytes) or not isinstance(e, bytes) or
+                if (
-                   len(s) != len(e)):
+                    not isinstance(s, bytes)
                    or not isinstance(e, bytes)
                    or len(s) != len(e)
                ):
                    continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                if isinstance(code, list):
-                    for i in range(e1-s1+1):
+                    for i in range(e1 - s1 + 1):
-                        self.cmap.add_cid2unichr(s1+i, code[i])
+                        self.cmap.add_cid2unichr(s1 + i, code[i])
                else:
                    assert isinstance(code, bytes)
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
-                    for i in range(e1-s1+1):
+                    for i in range(e1 - s1 + 1):
-                        x = prefix+struct.pack('>L', base+i)[-vlen:]
+                        x = prefix + struct.pack(">L", base + i)[-vlen:]
-                        self.cmap.add_cid2unichr(s1+i, x)
+                        self.cmap.add_cid2unichr(s1 + i, x)
            return
        if token is self.KEYWORD_BEGINBFCHAR:
@ -422,7 +436,7 @@ class CMapParser(PSStackParser[PSKeyword]):
 def main(argv: List[str]) -> None:
    args = argv[1:]
    for fname in args:
-        fp = open(fname, 'rb')
+        fp = open(fname, "rb")
        cmap = FileUnicodeMap()
        CMapParser(cmap, fp).run()
        fp.close()
@ -430,5 +444,5 @@ def main(argv: List[str]) -> None:
    return
-if __name__ == '__main__':
+if __name__ == "__main__":
    main(sys.argv)
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@ -1,8 +1,19 @@
 import io
 import logging
 import re
-from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO,
+from typing import (
-                    Tuple, TypeVar, Union, cast)
+    BinaryIO,
    Dict,
    Generic,
    List,
    Optional,
    Sequence,
    TextIO,
    Tuple,
    TypeVar,
    Union,
    cast,
 )
 from pdfminer.pdfcolor import PDFColorSpace
 from . import utils
@ -46,7 +57,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        self,
        rsrcmgr: PDFResourceManager,
        pageno: int = 1,
-        laparams: Optional[LAParams] = None
+        laparams: Optional[LAParams] = None,
    ) -> None:
        PDFTextDevice.__init__(self, rsrcmgr)
        self.pageno = pageno
@ -57,7 +68,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        (x0, y0, x1, y1) = page.mediabox
        (x0, y0) = apply_matrix_pt(ctm, (x0, y0))
        (x1, y1) = apply_matrix_pt(ctm, (x1, y1))
-        mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
+        mediabox = (0, 0, abs(x0 - x1), abs(y0 - y1))
        self.cur_item = LTPage(self.pageno, mediabox)
    def end_page(self, page: PDFPage) -> None:
@ -80,9 +91,11 @@ class PDFLayoutAnalyzer(PDFTextDevice):
    def render_image(self, name: str, stream: PDFStream) -> None:
        assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
-        item = LTImage(name, stream,
+        item = LTImage(
-                       (self.cur_item.x0, self.cur_item.y0,
+            name,
-                        self.cur_item.x1, self.cur_item.y1))
+            stream,
            (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1),
        )
        self.cur_item.add(item)
    def paint_path(
@ -91,15 +104,15 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        stroke: bool,
        fill: bool,
        evenodd: bool,
-        path: Sequence[PathSegment]
+        path: Sequence[PathSegment],
    ) -> None:
        """Paint paths described in section 4.4 of the PDF reference manual"""
-        shape = ''.join(x[0] for x in path)
+        shape = "".join(x[0] for x in path)
-        if shape.count('m') > 1:
+        if shape.count("m") > 1:
            # recurse if there are multiple m's in this shape
-            for m in re.finditer(r'm[^m]+', shape):
+            for m in re.finditer(r"m[^m]+", shape):
-                subpath = path[m.start(0):m.end(0)]
+                subpath = path[m.start(0) : m.end(0)]
                self.paint_path(gstate, stroke, fill, evenodd, subpath)
        else:
@ -110,38 +123,68 @@ class PDFLayoutAnalyzer(PDFTextDevice):
            # And, per Section 4.4's Table 4.9, all other path commands place
            # their point-position in their final two arguments. (Any preceding
            # arguments represent control points on Bézier curves.)
-            raw_pts = [cast(Point, p[-2:] if p[0] != 'h' else path[0][-2:])
+            raw_pts = [
-                       for p in path]
+                cast(Point, p[-2:] if p[0] != "h" else path[0][-2:]) for p in path
            ]
            pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts]
-            if shape in {'mlh', 'ml'}:
+            if shape in {"mlh", "ml"}:
                # single line segment
                #
                # Note: 'ml', in conditional above, is a frequent anomaly
                # that we want to support.
-                line = LTLine(gstate.linewidth, pts[0], pts[1], stroke,
+                line = LTLine(
-                              fill, evenodd, gstate.scolor, gstate.ncolor)
+                    gstate.linewidth,
                    pts[0],
                    pts[1],
                    stroke,
                    fill,
                    evenodd,
                    gstate.scolor,
                    gstate.ncolor,
                )
                self.cur_item.add(line)
-            elif shape in {'mlllh', 'mllll'}:
+            elif shape in {"mlllh", "mllll"}:
                (x0, y0), (x1, y1), (x2, y2), (x3, y3), _ = pts
-                is_closed_loop = (pts[0] == pts[4])
+                is_closed_loop = pts[0] == pts[4]
-                has_square_coordinates = \
+                has_square_coordinates = (
-                    (x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) \
+                    x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0
-                    or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)
+                ) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)
                if is_closed_loop and has_square_coordinates:
-                    rect = LTRect(gstate.linewidth, (*pts[0], *pts[2]), stroke,
+                    rect = LTRect(
-                                  fill, evenodd, gstate.scolor, gstate.ncolor)
+                        gstate.linewidth,
                        (*pts[0], *pts[2]),
                        stroke,
                        fill,
                        evenodd,
                        gstate.scolor,
                        gstate.ncolor,
                    )
                    self.cur_item.add(rect)
                else:
-                    curve = LTCurve(gstate.linewidth, pts, stroke, fill,
+                    curve = LTCurve(
-                                    evenodd, gstate.scolor, gstate.ncolor)
+                        gstate.linewidth,
                        pts,
                        stroke,
                        fill,
                        evenodd,
                        gstate.scolor,
                        gstate.ncolor,
                    )
                    self.cur_item.add(curve)
            else:
-                curve = LTCurve(gstate.linewidth, pts, stroke, fill, evenodd,
+                curve = LTCurve(
-                                gstate.scolor, gstate.ncolor)
+                    gstate.linewidth,
                    pts,
                    stroke,
                    fill,
                    evenodd,
                    gstate.scolor,
                    gstate.ncolor,
                )
                self.cur_item.add(curve)
    def render_char(
@ -153,7 +196,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        rise: float,
        cid: int,
        ncs: PDFColorSpace,
-        graphicstate: PDFGraphicState
+        graphicstate: PDFGraphicState,
    ) -> float:
        try:
            text = font.to_unichr(cid)
@ -162,14 +205,24 @@ class PDFLayoutAnalyzer(PDFTextDevice):
            text = self.handle_undefined_char(font, cid)
        textwidth = font.char_width(cid)
        textdisp = font.char_disp(cid)
-        item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth,
+        item = LTChar(
-                      textdisp, ncs, graphicstate)
+            matrix,
            font,
            fontsize,
            scaling,
            rise,
            text,
            textwidth,
            textdisp,
            ncs,
            graphicstate,
        )
        self.cur_item.add(item)
        return item.adv
    def handle_undefined_char(self, font: PDFFont, cid: int) -> str:
-        log.debug('undefined: %r, %r', font, cid)
+        log.debug("undefined: %r, %r", font, cid)
-        return '(cid:%d)' % cid
+        return "(cid:%d)" % cid
    def receive_layout(self, ltpage: LTPage) -> None:
        pass
@ -180,10 +233,9 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
        self,
        rsrcmgr: PDFResourceManager,
        pageno: int = 1,
-        laparams: Optional[LAParams] = None
+        laparams: Optional[LAParams] = None,
    ) -> None:
-        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
+        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
                                   laparams=laparams)
        self.result: Optional[LTPage] = None
    def receive_layout(self, ltpage: LTPage) -> None:
@ -195,7 +247,7 @@ class PDFPageAggregator(PDFLayoutAnalyzer):
 # Some PDFConverter children support only binary I/O
-IOType = TypeVar('IOType', TextIO, BinaryIO, AnyIO)
+IOType = TypeVar("IOType", TextIO, BinaryIO, AnyIO)
 class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]):
@ -203,12 +255,11 @@ class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]):
        self,
        rsrcmgr: PDFResourceManager,
        outfp: IOType,
-        codec: str = 'utf-8',
+        codec: str = "utf-8",
        pageno: int = 1,
-        laparams: Optional[LAParams] = None
+        laparams: Optional[LAParams] = None,
    ) -> None:
-        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
+        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams)
                                   laparams=laparams)
        self.outfp: IOType = outfp
        self.codec = codec
        self.outfp_binary = self._is_binary_stream(self.outfp)
@ -216,9 +267,9 @@ class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]):
    @staticmethod
    def _is_binary_stream(outfp: AnyIO) -> bool:
        """Test if an stream is binary or not"""
-        if 'b' in getattr(outfp, 'mode', ''):
+        if "b" in getattr(outfp, "mode", ""):
            return True
-        elif hasattr(outfp, 'mode'):
+        elif hasattr(outfp, "mode"):
            # output stream has a mode, but it does not contain 'b'
            return False
        elif isinstance(outfp, io.BytesIO):
@ -236,19 +287,18 @@ class TextConverter(PDFConverter[AnyIO]):
        self,
        rsrcmgr: PDFResourceManager,
        outfp: AnyIO,
-        codec: str = 'utf-8',
+        codec: str = "utf-8",
        pageno: int = 1,
        laparams: Optional[LAParams] = None,
        showpageno: bool = False,
-        imagewriter: Optional[ImageWriter] = None
+        imagewriter: Optional[ImageWriter] = None,
    ) -> None:
-        super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno,
+        super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
                         laparams=laparams)
        self.showpageno = showpageno
        self.imagewriter = imagewriter
    def write_text(self, text: str) -> None:
-        text = utils.compatible_encode_method(text, self.codec, 'ignore')
+        text = utils.compatible_encode_method(text, self.codec, "ignore")
        if self.outfp_binary:
            cast(BinaryIO, self.outfp).write(text.encode())
        else:
@ -262,14 +312,15 @@ class TextConverter(PDFConverter[AnyIO]):
            elif isinstance(item, LTText):
                self.write_text(item.get_text())
            if isinstance(item, LTTextBox):
-                self.write_text('\n')
+                self.write_text("\n")
            elif isinstance(item, LTImage):
                if self.imagewriter is not None:
                    self.imagewriter.export_image(item)
        if self.showpageno:
-            self.write_text('Page %s\n' % ltpage.pageid)
+            self.write_text("Page %s\n" % ltpage.pageid)
        render(ltpage)
-        self.write_text('\f')
+        self.write_text("\f")
    # Some dummy functions to save memory/CPU when all that is wanted
    # is text.  This stops all the image and drawing output from being
@ -286,54 +337,55 @@ class TextConverter(PDFConverter[AnyIO]):
        stroke: bool,
        fill: bool,
        evenodd: bool,
-        path: Sequence[PathSegment]
+        path: Sequence[PathSegment],
    ) -> None:
        return
 class HTMLConverter(PDFConverter[AnyIO]):
    RECT_COLORS = {
-        'figure': 'yellow',
+        "figure": "yellow",
-        'textline': 'magenta',
+        "textline": "magenta",
-        'textbox': 'cyan',
+        "textbox": "cyan",
-        'textgroup': 'red',
+        "textgroup": "red",
-        'curve': 'black',
+        "curve": "black",
-        'page': 'gray',
+        "page": "gray",
    }
    TEXT_COLORS = {
-        'textbox': 'blue',
+        "textbox": "blue",
-        'char': 'black',
+        "char": "black",
    }
    def __init__(
        self,
        rsrcmgr: PDFResourceManager,
        outfp: AnyIO,
-        codec: str = 'utf-8',
+        codec: str = "utf-8",
        pageno: int = 1,
        laparams: Optional[LAParams] = None,
        scale: float = 1,
        fontscale: float = 1.0,
-        layoutmode: str = 'normal',
+        layoutmode: str = "normal",
        showpageno: bool = True,
        pagemargin: int = 50,
        imagewriter: Optional[ImageWriter] = None,
        debug: int = 0,
        rect_colors: Optional[Dict[str, str]] = None,
-        text_colors: Optional[Dict[str, str]] = None
+        text_colors: Optional[Dict[str, str]] = None,
    ) -> None:
-        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
+        PDFConverter.__init__(
-                              laparams=laparams)
+            self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams
        )
        # write() assumes a codec for binary I/O, or no codec for text I/O.
        if self.outfp_binary == (not self.codec):
            raise ValueError("Codec is required for a binary I/O output")
        if text_colors is None:
-            text_colors = {'char': 'black'}
+            text_colors = {"char": "black"}
        if rect_colors is None:
-            rect_colors = {'curve': 'black', 'page': 'gray'}
+            rect_colors = {"curve": "black", "page": "gray"}
        self.scale = scale
        self.fontscale = fontscale
@ -360,23 +412,27 @@ class HTMLConverter(PDFConverter[AnyIO]):
        return
    def write_header(self) -> None:
-        self.write('<html><head>\n')
+        self.write("<html><head>\n")
        if self.codec:
-            s = '<meta http-equiv="Content-Type" content="text/html; ' \
+            s = (
                '<meta http-equiv="Content-Type" content="text/html; '
                'charset=%s">\n' % self.codec
            )
        else:
            s = '<meta http-equiv="Content-Type" content="text/html">\n'
        self.write(s)
-        self.write('</head><body>\n')
+        self.write("</head><body>\n")
        return
    def write_footer(self) -> None:
-        page_links = ['<a href="#{}">{}</a>'.format(i, i)
+        page_links = [
-                      for i in range(1, self.pageno)]
+            '<a href="#{}">{}</a>'.format(i, i) for i in range(1, self.pageno)
-        s = '<div style="position:absolute; top:0px;">Page: %s</div>\n' % \
+        ]
-            ', '.join(page_links)
+        s = '<div style="position:absolute; top:0px;">Page: %s</div>\n' % ", ".join(
            page_links
        )
        self.write(s)
-        self.write('</body></html>\n')
+        self.write("</body></html>\n")
        return
    def write_text(self, text: str) -> None:
@ -384,71 +440,67 @@ class HTMLConverter(PDFConverter[AnyIO]):
        return
    def place_rect(
-        self,
+        self, color: str, borderwidth: int, x: float, y: float, w: float, h: float
        color: str,
        borderwidth: int,
        x: float,
        y: float,
        w: float,
        h: float
    ) -> None:
        color2 = self.rect_colors.get(color)
        if color2 is not None:
-            s = '<span style="position:absolute; border: %s %dpx solid; ' \
+            s = (
-                'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % \
+                '<span style="position:absolute; border: %s %dpx solid; '
-                (color2, borderwidth, x * self.scale,
+                'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n'
-                 (self._yoffset - y) * self.scale, w * self.scale,
+                % (
-                 h * self.scale)
+                    color2,
-            self.write(
+                    borderwidth,
-                s)
+                    x * self.scale,
                    (self._yoffset - y) * self.scale,
                    w * self.scale,
                    h * self.scale,
                )
            )
            self.write(s)
        return
-    def place_border(
+    def place_border(self, color: str, borderwidth: int, item: LTComponent) -> None:
-        self,
+        self.place_rect(color, borderwidth, item.x0, item.y1, item.width, item.height)
        color: str,
        borderwidth: int,
        item: LTComponent
    ) -> None:
        self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
                        item.height)
        return
    def place_image(
-        self,
+        self, item: LTImage, borderwidth: int, x: float, y: float, w: float, h: float
        item: LTImage,
        borderwidth: int,
        x: float,
        y: float,
        w: float,
        h: float
    ) -> None:
        if self.imagewriter is not None:
            name = self.imagewriter.export_image(item)
-            s = '<img src="%s" border="%d" style="position:absolute; ' \
+            s = (
-                'left:%dpx; top:%dpx;" width="%d" height="%d" />\n' % \
+                '<img src="%s" border="%d" style="position:absolute; '
-                (enc(name), borderwidth, x * self.scale,
+                'left:%dpx; top:%dpx;" width="%d" height="%d" />\n'
-                 (self._yoffset - y) * self.scale, w * self.scale,
+                % (
-                 h * self.scale)
+                    enc(name),
                    borderwidth,
                    x * self.scale,
                    (self._yoffset - y) * self.scale,
                    w * self.scale,
                    h * self.scale,
                )
            )
            self.write(s)
        return
    def place_text(
-        self,
+        self, color: str, text: str, x: float, y: float, size: float
        color: str,
        text: str,
        x: float,
        y: float,
        size: float
    ) -> None:
        color2 = self.text_colors.get(color)
        if color2 is not None:
-            s = '<span style="position:absolute; color:%s; left:%dpx; ' \
+            s = (
-                'top:%dpx; font-size:%dpx;">' % \
+                '<span style="position:absolute; color:%s; left:%dpx; '
-                (color2, x * self.scale, (self._yoffset - y) * self.scale,
+                'top:%dpx; font-size:%dpx;">'
-                 size * self.scale * self.fontscale)
+                % (
                    color2,
                    x * self.scale,
                    (self._yoffset - y) * self.scale,
                    size * self.scale * self.fontscale,
                )
            )
            self.write(s)
            self.write_text(text)
-            self.write('</span>\n')
+            self.write("</span>\n")
        return
    def begin_div(
@ -459,47 +511,57 @@ class HTMLConverter(PDFConverter[AnyIO]):
        y: float,
        w: float,
        h: float,
-        writing_mode: str = 'False'
+        writing_mode: str = "False",
    ) -> None:
        self._fontstack.append(self._font)
        self._font = None
-        s = '<div style="position:absolute; border: %s %dpx solid; ' \
+        s = (
-            'writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; ' \
+            '<div style="position:absolute; border: %s %dpx solid; '
-            'height:%dpx;">' % \
+            "writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; "
-            (color, borderwidth, writing_mode, x * self.scale,
+            'height:%dpx;">'
-             (self._yoffset - y) * self.scale, w * self.scale, h * self.scale)
+            % (
                color,
                borderwidth,
                writing_mode,
                x * self.scale,
                (self._yoffset - y) * self.scale,
                w * self.scale,
                h * self.scale,
            )
        )
        self.write(s)
        return
    def end_div(self, color: str) -> None:
        if self._font is not None:
-            self.write('</span>')
+            self.write("</span>")
        self._font = self._fontstack.pop()
-        self.write('</div>')
+        self.write("</div>")
        return
    def put_text(self, text: str, fontname: str, fontsize: float) -> None:
        font = (fontname, fontsize)
        if font != self._font:
            if self._font is not None:
-                self.write('</span>')
+                self.write("</span>")
            # Remove subset tag from fontname, see PDF Reference 5.5.3
-            fontname_without_subset_tag = fontname.split('+')[-1]
+            fontname_without_subset_tag = fontname.split("+")[-1]
-            self.write('<span style="font-family: %s; font-size:%dpx">' %
+            self.write(
-                       (fontname_without_subset_tag,
+                '<span style="font-family: %s; font-size:%dpx">'
-                        fontsize * self.scale * self.fontscale))
+                % (fontname_without_subset_tag, fontsize * self.scale * self.fontscale)
            )
            self._font = font
        self.write_text(text)
        return
    def put_newline(self) -> None:
-        self.write('<br>')
+        self.write("<br>")
        return
    def receive_layout(self, ltpage: LTPage) -> None:
        def show_group(item: Union[LTTextGroup, TextGroupElement]) -> None:
            if isinstance(item, LTTextGroup):
-                self.place_border('textgroup', 1, item)
+                self.place_border("textgroup", 1, item)
                for child in item:
                    show_group(child)
            return
@ -508,63 +570,74 @@ class HTMLConverter(PDFConverter[AnyIO]):
            child: LTItem
            if isinstance(item, LTPage):
                self._yoffset += item.y1
-                self.place_border('page', 1, item)
+                self.place_border("page", 1, item)
                if self.showpageno:
-                    self.write('<div style="position:absolute; top:%dpx;">' %
+                    self.write(
-                               ((self._yoffset-item.y1)*self.scale))
+                        '<div style="position:absolute; top:%dpx;">'
-                    self.write('<a name="{}">Page {}</a></div>\n'
+                        % ((self._yoffset - item.y1) * self.scale)
-                               .format(item.pageid, item.pageid))
+                    )
                    self.write(
                        '<a name="{}">Page {}</a></div>\n'.format(
                            item.pageid, item.pageid
                        )
                    )
                for child in item:
                    render(child)
                if item.groups is not None:
                    for group in item.groups:
                        show_group(group)
            elif isinstance(item, LTCurve):
-                self.place_border('curve', 1, item)
+                self.place_border("curve", 1, item)
            elif isinstance(item, LTFigure):
-                self.begin_div('figure', 1, item.x0, item.y1, item.width,
+                self.begin_div("figure", 1, item.x0, item.y1, item.width, item.height)
                               item.height)
                for child in item:
                    render(child)
-                self.end_div('figure')
+                self.end_div("figure")
            elif isinstance(item, LTImage):
-                self.place_image(item, 1, item.x0, item.y1, item.width,
+                self.place_image(item, 1, item.x0, item.y1, item.width, item.height)
                                 item.height)
            else:
-                if self.layoutmode == 'exact':
+                if self.layoutmode == "exact":
                    if isinstance(item, LTTextLine):
-                        self.place_border('textline', 1, item)
+                        self.place_border("textline", 1, item)
                        for child in item:
                            render(child)
                    elif isinstance(item, LTTextBox):
-                        self.place_border('textbox', 1, item)
+                        self.place_border("textbox", 1, item)
-                        self.place_text('textbox', str(item.index+1), item.x0,
+                        self.place_text(
-                                        item.y1, 20)
+                            "textbox", str(item.index + 1), item.x0, item.y1, 20
                        )
                        for child in item:
                            render(child)
                    elif isinstance(item, LTChar):
-                        self.place_border('char', 1, item)
+                        self.place_border("char", 1, item)
-                        self.place_text('char', item.get_text(), item.x0,
+                        self.place_text(
-                                        item.y1, item.size)
+                            "char", item.get_text(), item.x0, item.y1, item.size
                        )
                else:
                    if isinstance(item, LTTextLine):
                        for child in item:
                            render(child)
-                        if self.layoutmode != 'loose':
+                        if self.layoutmode != "loose":
                            self.put_newline()
                    elif isinstance(item, LTTextBox):
-                        self.begin_div('textbox', 1, item.x0, item.y1,
+                        self.begin_div(
-                                       item.width, item.height,
+                            "textbox",
-                                       item.get_writing_mode())
+                            1,
                            item.x0,
                            item.y1,
                            item.width,
                            item.height,
                            item.get_writing_mode(),
                        )
                        for child in item:
                            render(child)
-                        self.end_div('textbox')
+                        self.end_div("textbox")
                    elif isinstance(item, LTChar):
-                        self.put_text(item.get_text(), item.fontname,
+                        self.put_text(item.get_text(), item.fontname, item.size)
                                      item.size)
                    elif isinstance(item, LTText):
                        self.write_text(item.get_text())
            return
        render(ltpage)
        self._yoffset += self.pagemargin
        return
@ -576,20 +649,21 @@ class HTMLConverter(PDFConverter[AnyIO]):
 class XMLConverter(PDFConverter[AnyIO]):
-    CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
+    CONTROL = re.compile("[\x00-\x08\x0b-\x0c\x0e-\x1f]")
    def __init__(
        self,
        rsrcmgr: PDFResourceManager,
        outfp: AnyIO,
-        codec: str = 'utf-8',
+        codec: str = "utf-8",
        pageno: int = 1,
        laparams: Optional[LAParams] = None,
        imagewriter: Optional[ImageWriter] = None,
-        stripcontrol: bool = False
+        stripcontrol: bool = False,
    ) -> None:
-        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
+        PDFConverter.__init__(
-                              laparams=laparams)
+            self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams
        )
        # write() assumes a codec for binary I/O, or no codec for text I/O.
        if self.outfp_binary == (not self.codec):
@ -612,100 +686,125 @@ class XMLConverter(PDFConverter[AnyIO]):
            self.write('<?xml version="1.0" encoding="%s" ?>\n' % self.codec)
        else:
            self.write('<?xml version="1.0" ?>\n')
-        self.write('<pages>\n')
+        self.write("<pages>\n")
        return
    def write_footer(self) -> None:
-        self.write('</pages>\n')
+        self.write("</pages>\n")
        return
    def write_text(self, text: str) -> None:
        if self.stripcontrol:
-            text = self.CONTROL.sub('', text)
+            text = self.CONTROL.sub("", text)
        self.write(enc(text))
        return
    def receive_layout(self, ltpage: LTPage) -> None:
        def show_group(item: LTItem) -> None:
            if isinstance(item, LTTextBox):
-                self.write('<textbox id="%d" bbox="%s" />\n' %
+                self.write(
-                           (item.index, bbox2str(item.bbox)))
+                    '<textbox id="%d" bbox="%s" />\n'
                    % (item.index, bbox2str(item.bbox))
                )
            elif isinstance(item, LTTextGroup):
                self.write('<textgroup bbox="%s">\n' % bbox2str(item.bbox))
                for child in item:
                    show_group(child)
-                self.write('</textgroup>\n')
+                self.write("</textgroup>\n")
            return
        def render(item: LTItem) -> None:
            child: LTItem
            if isinstance(item, LTPage):
-                s = '<page id="%s" bbox="%s" rotate="%d">\n' % \
+                s = '<page id="%s" bbox="%s" rotate="%d">\n' % (
-                    (item.pageid, bbox2str(item.bbox), item.rotate)
+                    item.pageid,
                    bbox2str(item.bbox),
                    item.rotate,
                )
                self.write(s)
                for child in item:
                    render(child)
                if item.groups is not None:
-                    self.write('<layout>\n')
+                    self.write("<layout>\n")
                    for group in item.groups:
                        show_group(group)
-                    self.write('</layout>\n')
+                    self.write("</layout>\n")
-                self.write('</page>\n')
+                self.write("</page>\n")
            elif isinstance(item, LTLine):
-                s = '<line linewidth="%d" bbox="%s" />\n' % \
+                s = '<line linewidth="%d" bbox="%s" />\n' % (
-                    (item.linewidth, bbox2str(item.bbox))
+                    item.linewidth,
                    bbox2str(item.bbox),
                )
                self.write(s)
            elif isinstance(item, LTRect):
-                s = '<rect linewidth="%d" bbox="%s" />\n' % \
+                s = '<rect linewidth="%d" bbox="%s" />\n' % (
-                    (item.linewidth, bbox2str(item.bbox))
+                    item.linewidth,
                    bbox2str(item.bbox),
                )
                self.write(s)
            elif isinstance(item, LTCurve):
-                s = '<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % \
+                s = '<curve linewidth="%d" bbox="%s" pts="%s"/>\n' % (
-                    (item.linewidth, bbox2str(item.bbox), item.get_pts())
+                    item.linewidth,
                    bbox2str(item.bbox),
                    item.get_pts(),
                )
                self.write(s)
            elif isinstance(item, LTFigure):
-                s = '<figure name="%s" bbox="%s">\n' % \
+                s = '<figure name="%s" bbox="%s">\n' % (item.name, bbox2str(item.bbox))
                    (item.name, bbox2str(item.bbox))
                self.write(s)
                for child in item:
                    render(child)
-                self.write('</figure>\n')
+                self.write("</figure>\n")
            elif isinstance(item, LTTextLine):
                self.write('<textline bbox="%s">\n' % bbox2str(item.bbox))
                for child in item:
                    render(child)
-                self.write('</textline>\n')
+                self.write("</textline>\n")
            elif isinstance(item, LTTextBox):
-                wmode = ''
+                wmode = ""
                if isinstance(item, LTTextBoxVertical):
                    wmode = ' wmode="vertical"'
-                s = '<textbox id="%d" bbox="%s"%s>\n' %\
+                s = '<textbox id="%d" bbox="%s"%s>\n' % (
-                    (item.index, bbox2str(item.bbox), wmode)
+                    item.index,
                    bbox2str(item.bbox),
                    wmode,
                )
                self.write(s)
                for child in item:
                    render(child)
-                self.write('</textbox>\n')
+                self.write("</textbox>\n")
            elif isinstance(item, LTChar):
-                s = '<text font="%s" bbox="%s" colourspace="%s" ' \
+                s = (
-                    'ncolour="%s" size="%.3f">' % \
+                    '<text font="%s" bbox="%s" colourspace="%s" '
-                    (enc(item.fontname), bbox2str(item.bbox),
+                    'ncolour="%s" size="%.3f">'
-                     item.ncs.name, item.graphicstate.ncolor, item.size)
+                    % (
                        enc(item.fontname),
                        bbox2str(item.bbox),
                        item.ncs.name,
                        item.graphicstate.ncolor,
                        item.size,
                    )
                )
                self.write(s)
                self.write_text(item.get_text())
-                self.write('</text>\n')
+                self.write("</text>\n")
            elif isinstance(item, LTText):
-                self.write('<text>%s</text>\n' % item.get_text())
+                self.write("<text>%s</text>\n" % item.get_text())
            elif isinstance(item, LTImage):
                if self.imagewriter is not None:
                    name = self.imagewriter.export_image(item)
-                    self.write('<image src="%s" width="%d" height="%d" />\n' %
+                    self.write(
-                               (enc(name), item.width, item.height))
+                        '<image src="%s" width="%d" height="%d" />\n'
                        % (enc(name), item.width, item.height)
                    )
                else:
-                    self.write('<image width="%d" height="%d" />\n' %
+                    self.write(
-                               (item.width, item.height))
+                        '<image width="%d" height="%d" />\n' % (item.width, item.height)
                    )
            else:
-                assert False, str(('Unhandled', item))
+                assert False, str(("Unhandled", item))
            return
        render(ltpage)
        return
--- a/pdfminer/data_structures.py
+++ b/pdfminer/data_structures.py
@ -11,18 +11,19 @@ class NumberTree:
    See Section 3.8.6 of the PDF Reference.
    """
    def __init__(self, obj: Any):
        self._obj = dict_value(obj)
        self.nums: Optional[Iterable[Any]] = None
        self.kids: Optional[Iterable[Any]] = None
        self.limits: Optional[Iterable[Any]] = None
-        if 'Nums' in self._obj:
+        if "Nums" in self._obj:
-            self.nums = list_value(self._obj['Nums'])
+            self.nums = list_value(self._obj["Nums"])
-        if 'Kids' in self._obj:
+        if "Kids" in self._obj:
-            self.kids = list_value(self._obj['Kids'])
+            self.kids = list_value(self._obj["Kids"])
-        if 'Limits' in self._obj:
+        if "Limits" in self._obj:
-            self.limits = list_value(self._obj['Limits'])
+            self.limits = list_value(self._obj["Limits"])
    def _parse(self) -> List[Tuple[int, Any]]:
        items = []
@ -44,7 +45,7 @@ class NumberTree:
        if settings.STRICT:
            if not all(a[0] <= b[0] for a, b in zip(values, values[1:])):
-                raise PDFSyntaxError('Number tree elements are out of order')
+                raise PDFSyntaxError("Number tree elements are out of order")
        else:
            values.sort(key=lambda t: t[0])
--- a/pdfminer/encodingdb.py
+++ b/pdfminer/encodingdb.py
@ -6,7 +6,7 @@ from .glyphlist import glyphname2unicode
 from .latin_enc import ENCODING
 from .psparser import PSLiteral
-HEXADECIMAL = re.compile(r'[0-9a-fA-F]+')
+HEXADECIMAL = re.compile(r"[0-9a-fA-F]+")
 log = logging.getLogger(__name__)
@ -25,39 +25,41 @@ def name2unicode(name: str) -> str:
    :returns unicode character if name resembles something,
    otherwise a KeyError
    """
-    name = name.split('.')[0]
+    name = name.split(".")[0]
-    components = name.split('_')
+    components = name.split("_")
    if len(components) > 1:
-        return ''.join(map(name2unicode, components))
+        return "".join(map(name2unicode, components))
    else:
        if name in glyphname2unicode:
            return glyphname2unicode[name]
-        elif name.startswith('uni'):
+        elif name.startswith("uni"):
-            name_without_uni = name.strip('uni')
+            name_without_uni = name.strip("uni")
-            if HEXADECIMAL.match(name_without_uni) and \
+            if HEXADECIMAL.match(name_without_uni) and len(name_without_uni) % 4 == 0:
-                    len(name_without_uni) % 4 == 0:
+                unicode_digits = [
-                unicode_digits = [int(name_without_uni[i:i + 4], base=16)
+                    int(name_without_uni[i : i + 4], base=16)
-                                  for i in range(0, len(name_without_uni), 4)]
+                    for i in range(0, len(name_without_uni), 4)
                ]
                for digit in unicode_digits:
                    raise_key_error_for_invalid_unicode(digit)
                characters = map(chr, unicode_digits)
-                return ''.join(characters)
+                return "".join(characters)
-        elif name.startswith('u'):
+        elif name.startswith("u"):
-            name_without_u = name.strip('u')
+            name_without_u = name.strip("u")
-            if HEXADECIMAL.match(name_without_u) and \
+            if HEXADECIMAL.match(name_without_u) and 4 <= len(name_without_u) <= 6:
                    4 <= len(name_without_u) <= 6:
                unicode_digit = int(name_without_u, base=16)
                raise_key_error_for_invalid_unicode(unicode_digit)
                return chr(unicode_digit)
-    raise KeyError('Could not convert unicode name "%s" to character because '
+    raise KeyError(
-                   'it does not match specification' % name)
+        'Could not convert unicode name "%s" to character because '
        "it does not match specification" % name
    )
 def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None:
@ -67,8 +69,10 @@ def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None:
    :raises KeyError if unicode digit is invalid
    """
    if 55295 < unicode_digit < 57344:
-        raise KeyError('Unicode digit %d is invalid because '
+        raise KeyError(
-                       'it is in the range D800 through DFFF' % unicode_digit)
+            "Unicode digit %d is invalid because "
            "it is in the range D800 through DFFF" % unicode_digit
        )
 class EncodingDB:
@ -89,17 +93,15 @@ class EncodingDB:
            pdf2unicode[pdf] = c
    encodings = {
-        'StandardEncoding': std2unicode,
+        "StandardEncoding": std2unicode,
-        'MacRomanEncoding': mac2unicode,
+        "MacRomanEncoding": mac2unicode,
-        'WinAnsiEncoding': win2unicode,
+        "WinAnsiEncoding": win2unicode,
-        'PDFDocEncoding': pdf2unicode,
+        "PDFDocEncoding": pdf2unicode,
    }
    @classmethod
    def get_encoding(
-        cls,
+        cls, name: str, diff: Optional[Iterable[object]] = None
        name: str,
        diff: Optional[Iterable[object]] = None
    ) -> Dict[int, str]:
        cid2unicode = cls.encodings.get(name, cls.std2unicode)
        if diff:
--- a/pdfminer/fontmetrics.py
+++ b/pdfminer/fontmetrics.py
--- a/pdfminer/glyphlist.py
+++ b/pdfminer/glyphlist.py
--- a/pdfminer/high_level.py
+++ b/pdfminer/high_level.py
@ -5,8 +5,7 @@ import sys
 from io import StringIO
 from typing import Any, BinaryIO, Container, Iterator, Optional, cast
-from .converter import XMLConverter, HTMLConverter, TextConverter, \
+from .converter import XMLConverter, HTMLConverter, TextConverter, PDFPageAggregator
    PDFPageAggregator
 from .image import ImageWriter
 from .layout import LAParams, LTPage
 from .pdfdevice import PDFDevice, TagExtractor
@ -18,20 +17,20 @@ from .utils import open_filename, FileOrName, AnyIO
 def extract_text_to_fp(
    inf: BinaryIO,
    outfp: AnyIO,
-    output_type: str = 'text',
+    output_type: str = "text",
-    codec: str = 'utf-8',
+    codec: str = "utf-8",
    laparams: Optional[LAParams] = None,
    maxpages: int = 0,
    page_numbers: Optional[Container[int]] = None,
    password: str = "",
    scale: float = 1.0,
    rotation: int = 0,
-    layoutmode: str = 'normal',
+    layoutmode: str = "normal",
    output_dir: Optional[str] = None,
    strip_control: bool = False,
    debug: bool = False,
    disable_caching: bool = False,
-    **kwargs: Any
+    **kwargs: Any,
 ) -> None:
    """Parses text from inf-file and writes to outfp file-like object.
@ -72,39 +71,52 @@ def extract_text_to_fp(
    rsrcmgr = PDFResourceManager(caching=not disable_caching)
    device: Optional[PDFDevice] = None
-    if output_type != 'text' and outfp == sys.stdout:
+    if output_type != "text" and outfp == sys.stdout:
        outfp = sys.stdout.buffer
-    if output_type == 'text':
+    if output_type == "text":
-        device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
+        device = TextConverter(
-                               imagewriter=imagewriter)
+            rsrcmgr, outfp, codec=codec, laparams=laparams, imagewriter=imagewriter
        )
-    elif output_type == 'xml':
+    elif output_type == "xml":
-        device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
+        device = XMLConverter(
-                              imagewriter=imagewriter,
+            rsrcmgr,
-                              stripcontrol=strip_control)
+            outfp,
            codec=codec,
            laparams=laparams,
            imagewriter=imagewriter,
            stripcontrol=strip_control,
        )
-    elif output_type == 'html':
+    elif output_type == "html":
-        device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
+        device = HTMLConverter(
-                               layoutmode=layoutmode, laparams=laparams,
+            rsrcmgr,
-                               imagewriter=imagewriter)
+            outfp,
            codec=codec,
            scale=scale,
            layoutmode=layoutmode,
            laparams=laparams,
            imagewriter=imagewriter,
        )
-    elif output_type == 'tag':
+    elif output_type == "tag":
        # Binary I/O is required, but we have no good way to test it here.
        device = TagExtractor(rsrcmgr, cast(BinaryIO, outfp), codec=codec)
    else:
-        msg = f"Output type can be text, html, xml or tag but is " \
+        msg = f"Output type can be text, html, xml or tag but is " f"{output_type}"
              f"{output_type}"
        raise ValueError(msg)
    assert device is not None
    interpreter = PDFPageInterpreter(rsrcmgr, device)
-    for page in PDFPage.get_pages(inf,
+    for page in PDFPage.get_pages(
-                                  page_numbers,
+        inf,
-                                  maxpages=maxpages,
+        page_numbers,
-                                  password=password,
+        maxpages=maxpages,
-                                  caching=not disable_caching):
+        password=password,
        caching=not disable_caching,
    ):
        page.rotate = (page.rotate + rotation) % 360
        interpreter.process_page(page)
@ -113,12 +125,12 @@ def extract_text_to_fp(
 def extract_text(
    pdf_file: FileOrName,
-    password: str = '',
+    password: str = "",
    page_numbers: Optional[Container[int]] = None,
    maxpages: int = 0,
    caching: bool = True,
-    codec: str = 'utf-8',
+    codec: str = "utf-8",
-    laparams: Optional[LAParams] = None
+    laparams: Optional[LAParams] = None,
 ) -> str:
    """Parse and return the text contained in a PDF file.
@ -139,16 +151,15 @@ def extract_text(
    with open_filename(pdf_file, "rb") as fp, StringIO() as output_string:
        fp = cast(BinaryIO, fp)  # we opened in binary mode
        rsrcmgr = PDFResourceManager(caching=caching)
-        device = TextConverter(rsrcmgr, output_string, codec=codec,
+        device = TextConverter(rsrcmgr, output_string, codec=codec, laparams=laparams)
                               laparams=laparams)
        interpreter = PDFPageInterpreter(rsrcmgr, device)
        for page in PDFPage.get_pages(
-                fp,
+            fp,
-                page_numbers,
+            page_numbers,
-                maxpages=maxpages,
+            maxpages=maxpages,
-                password=password,
+            password=password,
-                caching=caching,
+            caching=caching,
        ):
            interpreter.process_page(page)
@ -157,11 +168,11 @@ def extract_text(
 def extract_pages(
    pdf_file: FileOrName,
-    password: str = '',
+    password: str = "",
    page_numbers: Optional[Container[int]] = None,
    maxpages: int = 0,
    caching: bool = True,
-    laparams: Optional[LAParams] = None
+    laparams: Optional[LAParams] = None,
 ) -> Iterator[LTPage]:
    """Extract and yield LTPage objects
@ -183,8 +194,9 @@ def extract_pages(
        resource_manager = PDFResourceManager(caching=caching)
        device = PDFPageAggregator(resource_manager, laparams=laparams)
        interpreter = PDFPageInterpreter(resource_manager, device)
-        for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages,
+        for page in PDFPage.get_pages(
-                                      password=password, caching=caching):
+            fp, page_numbers, maxpages=maxpages, password=password, caching=caching
        ):
            interpreter.process_page(page)
            layout = device.get_result()
            yield layout
--- a/pdfminer/image.py
+++ b/pdfminer/image.py
@ -9,22 +9,15 @@ from .layout import LTImage
 from .pdfcolor import LITERAL_DEVICE_CMYK
 from .pdfcolor import LITERAL_DEVICE_GRAY
 from .pdfcolor import LITERAL_DEVICE_RGB
-from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, \
+from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, LITERALS_JPX_DECODE
    LITERALS_JPX_DECODE
 def align32(x: int) -> int:
-    return ((x+3)//4)*4
+    return ((x + 3) // 4) * 4
 class BMPWriter:
-    def __init__(
+    def __init__(self, fp: BinaryIO, bits: int, width: int, height: int) -> None:
        self,
        fp: BinaryIO,
        bits: int,
        width: int,
        height: int
    ) -> None:
        self.fp = fp
        self.bits = bits
        self.width = width
@ -37,30 +30,43 @@ class BMPWriter:
            ncols = 0
        else:
            raise ValueError(bits)
-        self.linesize = align32((self.width*self.bits+7)//8)
+        self.linesize = align32((self.width * self.bits + 7) // 8)
        self.datasize = self.linesize * self.height
-        headersize = 14+40+ncols*4
+        headersize = 14 + 40 + ncols * 4
-        info = struct.pack('<IiiHHIIIIII', 40, self.width, self.height,
+        info = struct.pack(
-                           1, self.bits, 0, self.datasize, 0, 0, ncols, 0)
+            "<IiiHHIIIIII",
            40,
            self.width,
            self.height,
            1,
            self.bits,
            0,
            self.datasize,
            0,
            0,
            ncols,
            0,
        )
        assert len(info) == 40, str(len(info))
-        header = struct.pack('<ccIHHI', b'B', b'M',
+        header = struct.pack(
-                             headersize+self.datasize, 0, 0, headersize)
+            "<ccIHHI", b"B", b"M", headersize + self.datasize, 0, 0, headersize
        )
        assert len(header) == 14, str(len(header))
        self.fp.write(header)
        self.fp.write(info)
        if ncols == 2:
            # B&W color table
            for i in (0, 255):
-                self.fp.write(struct.pack('BBBx', i, i, i))
+                self.fp.write(struct.pack("BBBx", i, i, i))
        elif ncols == 256:
            # grayscale color table
            for i in range(256):
-                self.fp.write(struct.pack('BBBx', i, i, i))
+                self.fp.write(struct.pack("BBBx", i, i, i))
        self.pos0 = self.fp.tell()
        self.pos1 = self.pos0 + self.datasize
    def write_line(self, y: int, data: bytes) -> None:
-        self.fp.seek(self.pos1 - (y+1)*self.linesize)
+        self.fp.seek(self.pos1 - (y + 1) * self.linesize)
        self.fp.write(data)
@ -80,43 +86,46 @@ class ImageWriter:
        is_jbig2 = self.is_jbig2_image(image)
        ext = self._get_image_extension(image, width, height, is_jbig2)
-        name, path = self._create_unique_image_name(self.outdir,
+        name, path = self._create_unique_image_name(self.outdir, image.name, ext)
                                                    image.name, ext)
-        fp = open(path, 'wb')
+        fp = open(path, "wb")
-        if ext == '.jpg':
+        if ext == ".jpg":
            raw_data = image.stream.get_rawdata()
            assert raw_data is not None
            if LITERAL_DEVICE_CMYK in image.colorspace:
                from PIL import Image  # type: ignore[import]
                from PIL import ImageChops
                ifp = BytesIO(raw_data)
                i = Image.open(ifp)
                i = ImageChops.invert(i)
-                i = i.convert('RGB')
+                i = i.convert("RGB")
-                i.save(fp, 'JPEG')
+                i.save(fp, "JPEG")
            else:
                fp.write(raw_data)
-        elif ext == '.jp2':
+        elif ext == ".jp2":
            # if we just write the raw data, most image programs
            # that I have tried cannot open the file. However,
            # open and saving with PIL produces a file that
            # seems to be easily opened by other programs
            from PIL import Image
            raw_data = image.stream.get_rawdata()
            assert raw_data is not None
            ifp = BytesIO(raw_data)
            i = Image.open(ifp)
-            i.save(fp, 'JPEG2000')
+            i.save(fp, "JPEG2000")
        elif is_jbig2:
            input_stream = BytesIO()
            global_streams = self.jbig2_global(image)
            if len(global_streams) > 1:
-                msg = 'There should never be more than one JBIG2Globals ' \
+                msg = (
-                      'associated with a JBIG2 embedded image'
+                    "There should never be more than one JBIG2Globals "
                    "associated with a JBIG2 embedded image"
                )
                raise ValueError(msg)
            if len(global_streams) == 1:
-                input_stream.write(global_streams[0].get_data().rstrip(b'\n'))
+                input_stream.write(global_streams[0].get_data().rstrip(b"\n"))
            input_stream.write(image.stream.get_data())
            input_stream.seek(0)
            reader = JBIG2StreamReader(input_stream)
@ -128,24 +137,24 @@ class ImageWriter:
            bmp = BMPWriter(fp, 1, width, height)
            data = image.stream.get_data()
            i = 0
-            width = (width+7)//8
+            width = (width + 7) // 8
            for y in range(height):
-                bmp.write_line(y, data[i:i+width])
+                bmp.write_line(y, data[i : i + width])
                i += width
        elif image.bits == 8 and LITERAL_DEVICE_RGB in image.colorspace:
            bmp = BMPWriter(fp, 24, width, height)
            data = image.stream.get_data()
            i = 0
-            width = width*3
+            width = width * 3
            for y in range(height):
-                bmp.write_line(y, data[i:i+width])
+                bmp.write_line(y, data[i : i + width])
                i += width
        elif image.bits == 8 and LITERAL_DEVICE_GRAY in image.colorspace:
            bmp = BMPWriter(fp, 8, width, height)
            data = image.stream.get_data()
            i = 0
            for y in range(height):
-                bmp.write_line(y, data[i:i+width])
+                bmp.write_line(y, data[i : i + width])
                i += width
        else:
            fp.write(image.stream.get_data())
@ -168,43 +177,42 @@ class ImageWriter:
        filters = image.stream.get_filters()
        for filter_name, params in filters:
            if filter_name in LITERALS_JBIG2_DECODE:
-                global_streams.append(params['JBIG2Globals'].resolve())
+                global_streams.append(params["JBIG2Globals"].resolve())
        return global_streams
    @staticmethod
    def _get_image_extension(
-        image: LTImage,
+        image: LTImage, width: int, height: int, is_jbig2: bool
        width: int,
        height: int,
        is_jbig2: bool
    ) -> str:
        filters = image.stream.get_filters()
        if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
-            ext = '.jpg'
+            ext = ".jpg"
        elif len(filters) == 1 and filters[0][0] in LITERALS_JPX_DECODE:
-            ext = '.jp2'
+            ext = ".jp2"
        elif is_jbig2:
-            ext = '.jb2'
+            ext = ".jb2"
-        elif (image.bits == 1 or
+        elif (
-              image.bits == 8 and
+            image.bits == 1
-              (LITERAL_DEVICE_RGB in image.colorspace or
+            or image.bits == 8
-               LITERAL_DEVICE_GRAY in image.colorspace)):
+            and (
-            ext = '.%dx%d.bmp' % (width, height)
+                LITERAL_DEVICE_RGB in image.colorspace
                or LITERAL_DEVICE_GRAY in image.colorspace
            )
        ):
            ext = ".%dx%d.bmp" % (width, height)
        else:
-            ext = '.%d.%dx%d.img' % (image.bits, width, height)
+            ext = ".%d.%dx%d.img" % (image.bits, width, height)
        return ext
    @staticmethod
    def _create_unique_image_name(
-        dirname: str,
+        dirname: str, image_name: str, ext: str
        image_name: str,
        ext: str
    ) -> Tuple[str, str]:
        name = image_name + ext
        path = os.path.join(dirname, name)
        img_index = 0
        while os.path.exists(path):
-            name = '%s.%d%s' % (image_name, img_index, ext)
+            name = "%s.%d%s" % (image_name, img_index, ext)
            path = os.path.join(dirname, name)
            img_index += 1
        return name, path
--- a/pdfminer/jbig2.py
+++ b/pdfminer/jbig2.py
@ -19,10 +19,10 @@ HEADER_FLAG_PAGE_ASSOC_LONG = 0b01000000
 SEG_TYPE_MASK = 0b00111111
 REF_COUNT_SHORT_MASK = 0b11100000
-REF_COUNT_LONG_MASK = 0x1fffffff
+REF_COUNT_LONG_MASK = 0x1FFFFFFF
 REF_COUNT_LONG = 7
-DATA_LEN_UNKNOWN = 0xffffffff
+DATA_LEN_UNKNOWN = 0xFFFFFFFF
 # segment types
 SEG_TYPE_IMMEDIATE_GEN_REGION = 38
@ -30,7 +30,7 @@ SEG_TYPE_END_OF_PAGE = 49
 SEG_TYPE_END_OF_FILE = 51
 # file literals
-FILE_HEADER_ID = b'\x97\x4A\x42\x32\x0D\x0A\x1A\x0A'
+FILE_HEADER_ID = b"\x97\x4A\x42\x32\x0D\x0A\x1A\x0A"
 FILE_HEAD_FLAG_SEQUENTIAL = 0b00000001
@ -66,12 +66,14 @@ def unpack_int(format: str, buffer: bytes) -> int:
 JBIG2SegmentFlags = Dict[str, Union[int, bool]]
 JBIG2RetentionFlags = Dict[str, Union[int, List[int], List[bool]]]
-JBIG2Segment = Dict[str, Union[bool, int, bytes, JBIG2SegmentFlags,
+JBIG2Segment = Dict[
-                               JBIG2RetentionFlags]]
+    str, Union[bool, int, bytes, JBIG2SegmentFlags, JBIG2RetentionFlags]
 ]
 class JBIG2StreamReader:
    """Read segments from a JBIG2 byte stream"""
    def __init__(self, stream: BinaryIO) -> None:
        self.stream = stream
@ -96,29 +98,23 @@ class JBIG2StreamReader:
        return segments
    def is_eof(self) -> bool:
-        if self.stream.read(1) == b'':
+        if self.stream.read(1) == b"":
            return True
        else:
            self.stream.seek(-1, os.SEEK_CUR)
            return False
    def parse_flags(
-        self,
+        self, segment: JBIG2Segment, flags: int, field: bytes
        segment: JBIG2Segment,
        flags: int,
        field: bytes
    ) -> JBIG2SegmentFlags:
        return {
            "deferred": check_flag(HEADER_FLAG_DEFERRED, flags),
            "page_assoc_long": check_flag(HEADER_FLAG_PAGE_ASSOC_LONG, flags),
-            "type": masked_value(SEG_TYPE_MASK, flags)
+            "type": masked_value(SEG_TYPE_MASK, flags),
        }
    def parse_retention_flags(
-        self,
+        self, segment: JBIG2Segment, flags: int, field: bytes
        segment: JBIG2Segment,
        flags: int,
        field: bytes
    ) -> JBIG2RetentionFlags:
        ref_count = masked_value(REF_COUNT_SHORT_MASK, flags)
        retain_segments = []
@ -159,31 +155,23 @@ class JBIG2StreamReader:
            "ref_segments": ref_segments,
        }
-    def parse_page_assoc(
+    def parse_page_assoc(self, segment: JBIG2Segment, page: int, field: bytes) -> int:
        self,
        segment: JBIG2Segment,
        page: int,
        field: bytes
    ) -> int:
        if cast(JBIG2SegmentFlags, segment["flags"])["page_assoc_long"]:
            field += self.stream.read(3)
            page = unpack_int(">L", field)
        return page
    def parse_data_length(
-        self,
+        self, segment: JBIG2Segment, length: int, field: bytes
        segment: JBIG2Segment,
        length: int,
        field: bytes
    ) -> int:
        if length:
-            if (cast(JBIG2SegmentFlags, segment["flags"])["type"] ==
+            if (
-                    SEG_TYPE_IMMEDIATE_GEN_REGION) \
+                cast(JBIG2SegmentFlags, segment["flags"])["type"]
-                    and (length == DATA_LEN_UNKNOWN):
+                == SEG_TYPE_IMMEDIATE_GEN_REGION
            ) and (length == DATA_LEN_UNKNOWN):
                raise NotImplementedError(
-                    "Working with unknown segment length "
+                    "Working with unknown segment length " "is not implemented yet"
                    "is not implemented yet"
                )
            else:
                segment["raw_data"] = self.stream.read(length)
@ -195,18 +183,16 @@ class JBIG2StreamWriter:
    """Write JBIG2 segments to a file in JBIG2 format"""
    EMPTY_RETENTION_FLAGS: JBIG2RetentionFlags = {
-        'ref_count': 0,
+        "ref_count": 0,
-        'ref_segments': cast(List[int], []),
+        "ref_segments": cast(List[int], []),
-        'retain_segments': cast(List[bool], [])
+        "retain_segments": cast(List[bool], []),
    }
    def __init__(self, stream: BinaryIO) -> None:
        self.stream = stream
    def write_segments(
-        self,
+        self, segments: Iterable[JBIG2Segment], fix_last_page: bool = True
        segments: Iterable[JBIG2Segment],
        fix_last_page: bool = True
    ) -> int:
        data_len = 0
        current_page: Optional[int] = None
@ -222,8 +208,10 @@ class JBIG2StreamWriter:
            if fix_last_page:
                seg_page = cast(int, segment.get("page_assoc"))
-                if cast(JBIG2SegmentFlags, segment["flags"])["type"] == \
+                if (
-                        SEG_TYPE_END_OF_PAGE:
+                    cast(JBIG2SegmentFlags, segment["flags"])["type"]
                    == SEG_TYPE_END_OF_PAGE
                ):
                    current_page = None
                elif seg_page:
                    current_page = seg_page
@ -237,9 +225,7 @@ class JBIG2StreamWriter:
        return data_len
    def write_file(
-        self,
+        self, segments: Iterable[JBIG2Segment], fix_last_page: bool = True
        segments: Iterable[JBIG2Segment],
        fix_last_page: bool = True
    ) -> int:
        header = FILE_HEADER_ID
        header_flags = FILE_HEAD_FLAG_SEQUENTIAL
@ -270,7 +256,7 @@ class JBIG2StreamWriter:
        return data_len
    def encode_segment(self, segment: JBIG2Segment) -> bytes:
-        data = b''
+        data = b""
        for field_format, name in SEG_STRUCT:
            value = segment.get(name)
            encoder = getattr(self, "encode_%s" % name, None)
@ -281,27 +267,26 @@ class JBIG2StreamWriter:
            data += field
        return data
-    def encode_flags(self, value: JBIG2SegmentFlags, segment: JBIG2Segment
+    def encode_flags(self, value: JBIG2SegmentFlags, segment: JBIG2Segment) -> bytes:
                     ) -> bytes:
        flags = 0
        if value.get("deferred"):
            flags |= HEADER_FLAG_DEFERRED
        if "page_assoc_long" in value:
-            flags |= HEADER_FLAG_PAGE_ASSOC_LONG \
+            flags |= HEADER_FLAG_PAGE_ASSOC_LONG if value["page_assoc_long"] else flags
                if value["page_assoc_long"] else flags
        else:
-            flags |= HEADER_FLAG_PAGE_ASSOC_LONG \
+            flags |= (
-                if cast(int, segment.get("page", 0)) > 255 else flags
+                HEADER_FLAG_PAGE_ASSOC_LONG
                if cast(int, segment.get("page", 0)) > 255
                else flags
            )
        flags |= mask_value(SEG_TYPE_MASK, value["type"])
        return pack(">B", flags)
    def encode_retention_flags(
-        self,
+        self, value: JBIG2RetentionFlags, segment: JBIG2Segment
        value: JBIG2RetentionFlags,
        segment: JBIG2Segment
    ) -> bytes:
        flags = []
        flags_format = ">B"
@ -318,15 +303,12 @@ class JBIG2StreamWriter:
        else:
            bytes_count = math.ceil((ref_count + 1) / 8)
            flags_format = ">L" + ("B" * bytes_count)
-            flags_dword = mask_value(
+            flags_dword = mask_value(REF_COUNT_SHORT_MASK, REF_COUNT_LONG) << 24
                REF_COUNT_SHORT_MASK,
                REF_COUNT_LONG
            ) << 24
            flags.append(flags_dword)
            for byte_index in range(bytes_count):
                ret_byte = 0
-                ret_part = retain_segments[byte_index * 8:byte_index * 8 + 8]
+                ret_part = retain_segments[byte_index * 8 : byte_index * 8 + 8]
                for bit_pos, ret_seg in enumerate(ret_part):
                    ret_byte |= 1 << bit_pos if ret_seg else ret_byte
@ -353,26 +335,22 @@ class JBIG2StreamWriter:
        data += cast(bytes, segment["raw_data"])
        return data
-    def get_eop_segment(
+    def get_eop_segment(self, seg_number: int, page_number: int) -> JBIG2Segment:
        self,
        seg_number: int,
        page_number: int
    ) -> JBIG2Segment:
        return {
-            'data_length': 0,
+            "data_length": 0,
-            'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_PAGE},
+            "flags": {"deferred": False, "type": SEG_TYPE_END_OF_PAGE},
-            'number': seg_number,
+            "number": seg_number,
-            'page_assoc': page_number,
+            "page_assoc": page_number,
-            'raw_data': b'',
+            "raw_data": b"",
-            'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS
+            "retention_flags": JBIG2StreamWriter.EMPTY_RETENTION_FLAGS,
        }
    def get_eof_segment(self, seg_number: int) -> JBIG2Segment:
        return {
-            'data_length': 0,
+            "data_length": 0,
-            'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_FILE},
+            "flags": {"deferred": False, "type": SEG_TYPE_END_OF_FILE},
-            'number': seg_number,
+            "number": seg_number,
-            'page_assoc': 0,
+            "page_assoc": 0,
-            'raw_data': b'',
+            "raw_data": b"",
-            'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS
+            "retention_flags": JBIG2StreamWriter.EMPTY_RETENTION_FLAGS,
        }
--- a/pdfminer/latin_enc.py
+++ b/pdfminer/latin_enc.py
@ -7,241 +7,240 @@ This table is extracted from PDF Reference Manual 1.6, pp.925
 from typing import List, Optional, Tuple
-EncodingRow = \
+EncodingRow = Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]]
    Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]]
 ENCODING: List[EncodingRow] = [
-  # (name, std, mac, win, pdf)
+    # (name, std, mac, win, pdf)
-  ('A', 65, 65, 65, 65),
+    ("A", 65, 65, 65, 65),
-  ('AE', 225, 174, 198, 198),
+    ("AE", 225, 174, 198, 198),
-  ('Aacute', None, 231, 193, 193),
+    ("Aacute", None, 231, 193, 193),
-  ('Acircumflex', None, 229, 194, 194),
+    ("Acircumflex", None, 229, 194, 194),
-  ('Adieresis', None, 128, 196, 196),
+    ("Adieresis", None, 128, 196, 196),
-  ('Agrave', None, 203, 192, 192),
+    ("Agrave", None, 203, 192, 192),
-  ('Aring', None, 129, 197, 197),
+    ("Aring", None, 129, 197, 197),
-  ('Atilde', None, 204, 195, 195),
+    ("Atilde", None, 204, 195, 195),
-  ('B', 66, 66, 66, 66),
+    ("B", 66, 66, 66, 66),
-  ('C', 67, 67, 67, 67),
+    ("C", 67, 67, 67, 67),
-  ('Ccedilla', None, 130, 199, 199),
+    ("Ccedilla", None, 130, 199, 199),
-  ('D', 68, 68, 68, 68),
+    ("D", 68, 68, 68, 68),
-  ('E', 69, 69, 69, 69),
+    ("E", 69, 69, 69, 69),
-  ('Eacute', None, 131, 201, 201),
+    ("Eacute", None, 131, 201, 201),
-  ('Ecircumflex', None, 230, 202, 202),
+    ("Ecircumflex", None, 230, 202, 202),
-  ('Edieresis', None, 232, 203, 203),
+    ("Edieresis", None, 232, 203, 203),
-  ('Egrave', None, 233, 200, 200),
+    ("Egrave", None, 233, 200, 200),
-  ('Eth', None, None, 208, 208),
+    ("Eth", None, None, 208, 208),
-  ('Euro', None, None, 128, 160),
+    ("Euro", None, None, 128, 160),
-  ('F', 70, 70, 70, 70),
+    ("F", 70, 70, 70, 70),
-  ('G', 71, 71, 71, 71),
+    ("G", 71, 71, 71, 71),
-  ('H', 72, 72, 72, 72),
+    ("H", 72, 72, 72, 72),
-  ('I', 73, 73, 73, 73),
+    ("I", 73, 73, 73, 73),
-  ('Iacute', None, 234, 205, 205),
+    ("Iacute", None, 234, 205, 205),
-  ('Icircumflex', None, 235, 206, 206),
+    ("Icircumflex", None, 235, 206, 206),
-  ('Idieresis', None, 236, 207, 207),
+    ("Idieresis", None, 236, 207, 207),
-  ('Igrave', None, 237, 204, 204),
+    ("Igrave", None, 237, 204, 204),
-  ('J', 74, 74, 74, 74),
+    ("J", 74, 74, 74, 74),
-  ('K', 75, 75, 75, 75),
+    ("K", 75, 75, 75, 75),
-  ('L', 76, 76, 76, 76),
+    ("L", 76, 76, 76, 76),
-  ('Lslash', 232, None, None, 149),
+    ("Lslash", 232, None, None, 149),
-  ('M', 77, 77, 77, 77),
+    ("M", 77, 77, 77, 77),
-  ('N', 78, 78, 78, 78),
+    ("N", 78, 78, 78, 78),
-  ('Ntilde', None, 132, 209, 209),
+    ("Ntilde", None, 132, 209, 209),
-  ('O', 79, 79, 79, 79),
+    ("O", 79, 79, 79, 79),
-  ('OE', 234, 206, 140, 150),
+    ("OE", 234, 206, 140, 150),
-  ('Oacute', None, 238, 211, 211),
+    ("Oacute", None, 238, 211, 211),
-  ('Ocircumflex', None, 239, 212, 212),
+    ("Ocircumflex", None, 239, 212, 212),
-  ('Odieresis', None, 133, 214, 214),
+    ("Odieresis", None, 133, 214, 214),
-  ('Ograve', None, 241, 210, 210),
+    ("Ograve", None, 241, 210, 210),
-  ('Oslash', 233, 175, 216, 216),
+    ("Oslash", 233, 175, 216, 216),
-  ('Otilde', None, 205, 213, 213),
+    ("Otilde", None, 205, 213, 213),
-  ('P', 80, 80, 80, 80),
+    ("P", 80, 80, 80, 80),
-  ('Q', 81, 81, 81, 81),
+    ("Q", 81, 81, 81, 81),
-  ('R', 82, 82, 82, 82),
+    ("R", 82, 82, 82, 82),
-  ('S', 83, 83, 83, 83),
+    ("S", 83, 83, 83, 83),
-  ('Scaron', None, None, 138, 151),
+    ("Scaron", None, None, 138, 151),
-  ('T', 84, 84, 84, 84),
+    ("T", 84, 84, 84, 84),
-  ('Thorn', None, None, 222, 222),
+    ("Thorn", None, None, 222, 222),
-  ('U', 85, 85, 85, 85),
+    ("U", 85, 85, 85, 85),
-  ('Uacute', None, 242, 218, 218),
+    ("Uacute", None, 242, 218, 218),
-  ('Ucircumflex', None, 243, 219, 219),
+    ("Ucircumflex", None, 243, 219, 219),
-  ('Udieresis', None, 134, 220, 220),
+    ("Udieresis", None, 134, 220, 220),
-  ('Ugrave', None, 244, 217, 217),
+    ("Ugrave", None, 244, 217, 217),
-  ('V', 86, 86, 86, 86),
+    ("V", 86, 86, 86, 86),
-  ('W', 87, 87, 87, 87),
+    ("W", 87, 87, 87, 87),
-  ('X', 88, 88, 88, 88),
+    ("X", 88, 88, 88, 88),
-  ('Y', 89, 89, 89, 89),
+    ("Y", 89, 89, 89, 89),
-  ('Yacute', None, None, 221, 221),
+    ("Yacute", None, None, 221, 221),
-  ('Ydieresis', None, 217, 159, 152),
+    ("Ydieresis", None, 217, 159, 152),
-  ('Z', 90, 90, 90, 90),
+    ("Z", 90, 90, 90, 90),
-  ('Zcaron', None, None, 142, 153),
+    ("Zcaron", None, None, 142, 153),
-  ('a', 97, 97, 97, 97),
+    ("a", 97, 97, 97, 97),
-  ('aacute', None, 135, 225, 225),
+    ("aacute", None, 135, 225, 225),
-  ('acircumflex', None, 137, 226, 226),
+    ("acircumflex", None, 137, 226, 226),
-  ('acute', 194, 171, 180, 180),
+    ("acute", 194, 171, 180, 180),
-  ('adieresis', None, 138, 228, 228),
+    ("adieresis", None, 138, 228, 228),
-  ('ae', 241, 190, 230, 230),
+    ("ae", 241, 190, 230, 230),
-  ('agrave', None, 136, 224, 224),
+    ("agrave", None, 136, 224, 224),
-  ('ampersand', 38, 38, 38, 38),
+    ("ampersand", 38, 38, 38, 38),
-  ('aring', None, 140, 229, 229),
+    ("aring", None, 140, 229, 229),
-  ('asciicircum', 94, 94, 94, 94),
+    ("asciicircum", 94, 94, 94, 94),
-  ('asciitilde', 126, 126, 126, 126),
+    ("asciitilde", 126, 126, 126, 126),
-  ('asterisk', 42, 42, 42, 42),
+    ("asterisk", 42, 42, 42, 42),
-  ('at', 64, 64, 64, 64),
+    ("at", 64, 64, 64, 64),
-  ('atilde', None, 139, 227, 227),
+    ("atilde", None, 139, 227, 227),
-  ('b', 98, 98, 98, 98),
+    ("b", 98, 98, 98, 98),
-  ('backslash', 92, 92, 92, 92),
+    ("backslash", 92, 92, 92, 92),
-  ('bar', 124, 124, 124, 124),
+    ("bar", 124, 124, 124, 124),
-  ('braceleft', 123, 123, 123, 123),
+    ("braceleft", 123, 123, 123, 123),
-  ('braceright', 125, 125, 125, 125),
+    ("braceright", 125, 125, 125, 125),
-  ('bracketleft', 91, 91, 91, 91),
+    ("bracketleft", 91, 91, 91, 91),
-  ('bracketright', 93, 93, 93, 93),
+    ("bracketright", 93, 93, 93, 93),
-  ('breve', 198, 249, None, 24),
+    ("breve", 198, 249, None, 24),
-  ('brokenbar', None, None, 166, 166),
+    ("brokenbar", None, None, 166, 166),
-  ('bullet', 183, 165, 149, 128),
+    ("bullet", 183, 165, 149, 128),
-  ('c', 99, 99, 99, 99),
+    ("c", 99, 99, 99, 99),
-  ('caron', 207, 255, None, 25),
+    ("caron", 207, 255, None, 25),
-  ('ccedilla', None, 141, 231, 231),
+    ("ccedilla", None, 141, 231, 231),
-  ('cedilla', 203, 252, 184, 184),
+    ("cedilla", 203, 252, 184, 184),
-  ('cent', 162, 162, 162, 162),
+    ("cent", 162, 162, 162, 162),
-  ('circumflex', 195, 246, 136, 26),
+    ("circumflex", 195, 246, 136, 26),
-  ('colon', 58, 58, 58, 58),
+    ("colon", 58, 58, 58, 58),
-  ('comma', 44, 44, 44, 44),
+    ("comma", 44, 44, 44, 44),
-  ('copyright', None, 169, 169, 169),
+    ("copyright", None, 169, 169, 169),
-  ('currency', 168, 219, 164, 164),
+    ("currency", 168, 219, 164, 164),
-  ('d', 100, 100, 100, 100),
+    ("d", 100, 100, 100, 100),
-  ('dagger', 178, 160, 134, 129),
+    ("dagger", 178, 160, 134, 129),
-  ('daggerdbl', 179, 224, 135, 130),
+    ("daggerdbl", 179, 224, 135, 130),
-  ('degree', None, 161, 176, 176),
+    ("degree", None, 161, 176, 176),
-  ('dieresis', 200, 172, 168, 168),
+    ("dieresis", 200, 172, 168, 168),
-  ('divide', None, 214, 247, 247),
+    ("divide", None, 214, 247, 247),
-  ('dollar', 36, 36, 36, 36),
+    ("dollar", 36, 36, 36, 36),
-  ('dotaccent', 199, 250, None, 27),
+    ("dotaccent", 199, 250, None, 27),
-  ('dotlessi', 245, 245, None, 154),
+    ("dotlessi", 245, 245, None, 154),
-  ('e', 101, 101, 101, 101),
+    ("e", 101, 101, 101, 101),
-  ('eacute', None, 142, 233, 233),
+    ("eacute", None, 142, 233, 233),
-  ('ecircumflex', None, 144, 234, 234),
+    ("ecircumflex", None, 144, 234, 234),
-  ('edieresis', None, 145, 235, 235),
+    ("edieresis", None, 145, 235, 235),
-  ('egrave', None, 143, 232, 232),
+    ("egrave", None, 143, 232, 232),
-  ('eight', 56, 56, 56, 56),
+    ("eight", 56, 56, 56, 56),
-  ('ellipsis', 188, 201, 133, 131),
+    ("ellipsis", 188, 201, 133, 131),
-  ('emdash', 208, 209, 151, 132),
+    ("emdash", 208, 209, 151, 132),
-  ('endash', 177, 208, 150, 133),
+    ("endash", 177, 208, 150, 133),
-  ('equal', 61, 61, 61, 61),
+    ("equal", 61, 61, 61, 61),
-  ('eth', None, None, 240, 240),
+    ("eth", None, None, 240, 240),
-  ('exclam', 33, 33, 33, 33),
+    ("exclam", 33, 33, 33, 33),
-  ('exclamdown', 161, 193, 161, 161),
+    ("exclamdown", 161, 193, 161, 161),
-  ('f', 102, 102, 102, 102),
+    ("f", 102, 102, 102, 102),
-  ('fi', 174, 222, None, 147),
+    ("fi", 174, 222, None, 147),
-  ('five', 53, 53, 53, 53),
+    ("five", 53, 53, 53, 53),
-  ('fl', 175, 223, None, 148),
+    ("fl", 175, 223, None, 148),
-  ('florin', 166, 196, 131, 134),
+    ("florin", 166, 196, 131, 134),
-  ('four', 52, 52, 52, 52),
+    ("four", 52, 52, 52, 52),
-  ('fraction', 164, 218, None, 135),
+    ("fraction", 164, 218, None, 135),
-  ('g', 103, 103, 103, 103),
+    ("g", 103, 103, 103, 103),
-  ('germandbls', 251, 167, 223, 223),
+    ("germandbls", 251, 167, 223, 223),
-  ('grave', 193, 96, 96, 96),
+    ("grave", 193, 96, 96, 96),
-  ('greater', 62, 62, 62, 62),
+    ("greater", 62, 62, 62, 62),
-  ('guillemotleft', 171, 199, 171, 171),
+    ("guillemotleft", 171, 199, 171, 171),
-  ('guillemotright', 187, 200, 187, 187),
+    ("guillemotright", 187, 200, 187, 187),
-  ('guilsinglleft', 172, 220, 139, 136),
+    ("guilsinglleft", 172, 220, 139, 136),
-  ('guilsinglright', 173, 221, 155, 137),
+    ("guilsinglright", 173, 221, 155, 137),
-  ('h', 104, 104, 104, 104),
+    ("h", 104, 104, 104, 104),
-  ('hungarumlaut', 205, 253, None, 28),
+    ("hungarumlaut", 205, 253, None, 28),
-  ('hyphen', 45, 45, 45, 45),
+    ("hyphen", 45, 45, 45, 45),
-  ('i', 105, 105, 105, 105),
+    ("i", 105, 105, 105, 105),
-  ('iacute', None, 146, 237, 237),
+    ("iacute", None, 146, 237, 237),
-  ('icircumflex', None, 148, 238, 238),
+    ("icircumflex", None, 148, 238, 238),
-  ('idieresis', None, 149, 239, 239),
+    ("idieresis", None, 149, 239, 239),
-  ('igrave', None, 147, 236, 236),
+    ("igrave", None, 147, 236, 236),
-  ('j', 106, 106, 106, 106),
+    ("j", 106, 106, 106, 106),
-  ('k', 107, 107, 107, 107),
+    ("k", 107, 107, 107, 107),
-  ('l', 108, 108, 108, 108),
+    ("l", 108, 108, 108, 108),
-  ('less', 60, 60, 60, 60),
+    ("less", 60, 60, 60, 60),
-  ('logicalnot', None, 194, 172, 172),
+    ("logicalnot", None, 194, 172, 172),
-  ('lslash', 248, None, None, 155),
+    ("lslash", 248, None, None, 155),
-  ('m', 109, 109, 109, 109),
+    ("m", 109, 109, 109, 109),
-  ('macron', 197, 248, 175, 175),
+    ("macron", 197, 248, 175, 175),
-  ('minus', None, None, None, 138),
+    ("minus", None, None, None, 138),
-  ('mu', None, 181, 181, 181),
+    ("mu", None, 181, 181, 181),
-  ('multiply', None, None, 215, 215),
+    ("multiply", None, None, 215, 215),
-  ('n', 110, 110, 110, 110),
+    ("n", 110, 110, 110, 110),
-  ('nbspace', None, 202, 160, None),
+    ("nbspace", None, 202, 160, None),
-  ('nine', 57, 57, 57, 57),
+    ("nine", 57, 57, 57, 57),
-  ('ntilde', None, 150, 241, 241),
+    ("ntilde", None, 150, 241, 241),
-  ('numbersign', 35, 35, 35, 35),
+    ("numbersign", 35, 35, 35, 35),
-  ('o', 111, 111, 111, 111),
+    ("o", 111, 111, 111, 111),
-  ('oacute', None, 151, 243, 243),
+    ("oacute", None, 151, 243, 243),
-  ('ocircumflex', None, 153, 244, 244),
+    ("ocircumflex", None, 153, 244, 244),
-  ('odieresis', None, 154, 246, 246),
+    ("odieresis", None, 154, 246, 246),
-  ('oe', 250, 207, 156, 156),
+    ("oe", 250, 207, 156, 156),
-  ('ogonek', 206, 254, None, 29),
+    ("ogonek", 206, 254, None, 29),
-  ('ograve', None, 152, 242, 242),
+    ("ograve", None, 152, 242, 242),
-  ('one', 49, 49, 49, 49),
+    ("one", 49, 49, 49, 49),
-  ('onehalf', None, None, 189, 189),
+    ("onehalf", None, None, 189, 189),
-  ('onequarter', None, None, 188, 188),
+    ("onequarter", None, None, 188, 188),
-  ('onesuperior', None, None, 185, 185),
+    ("onesuperior", None, None, 185, 185),
-  ('ordfeminine', 227, 187, 170, 170),
+    ("ordfeminine", 227, 187, 170, 170),
-  ('ordmasculine', 235, 188, 186, 186),
+    ("ordmasculine", 235, 188, 186, 186),
-  ('oslash', 249, 191, 248, 248),
+    ("oslash", 249, 191, 248, 248),
-  ('otilde', None, 155, 245, 245),
+    ("otilde", None, 155, 245, 245),
-  ('p', 112, 112, 112, 112),
+    ("p", 112, 112, 112, 112),
-  ('paragraph', 182, 166, 182, 182),
+    ("paragraph", 182, 166, 182, 182),
-  ('parenleft', 40, 40, 40, 40),
+    ("parenleft", 40, 40, 40, 40),
-  ('parenright', 41, 41, 41, 41),
+    ("parenright", 41, 41, 41, 41),
-  ('percent', 37, 37, 37, 37),
+    ("percent", 37, 37, 37, 37),
-  ('period', 46, 46, 46, 46),
+    ("period", 46, 46, 46, 46),
-  ('periodcentered', 180, 225, 183, 183),
+    ("periodcentered", 180, 225, 183, 183),
-  ('perthousand', 189, 228, 137, 139),
+    ("perthousand", 189, 228, 137, 139),
-  ('plus', 43, 43, 43, 43),
+    ("plus", 43, 43, 43, 43),
-  ('plusminus', None, 177, 177, 177),
+    ("plusminus", None, 177, 177, 177),
-  ('q', 113, 113, 113, 113),
+    ("q", 113, 113, 113, 113),
-  ('question', 63, 63, 63, 63),
+    ("question", 63, 63, 63, 63),
-  ('questiondown', 191, 192, 191, 191),
+    ("questiondown", 191, 192, 191, 191),
-  ('quotedbl', 34, 34, 34, 34),
+    ("quotedbl", 34, 34, 34, 34),
-  ('quotedblbase', 185, 227, 132, 140),
+    ("quotedblbase", 185, 227, 132, 140),
-  ('quotedblleft', 170, 210, 147, 141),
+    ("quotedblleft", 170, 210, 147, 141),
-  ('quotedblright', 186, 211, 148, 142),
+    ("quotedblright", 186, 211, 148, 142),
-  ('quoteleft', 96, 212, 145, 143),
+    ("quoteleft", 96, 212, 145, 143),
-  ('quoteright', 39, 213, 146, 144),
+    ("quoteright", 39, 213, 146, 144),
-  ('quotesinglbase', 184, 226, 130, 145),
+    ("quotesinglbase", 184, 226, 130, 145),
-  ('quotesingle', 169, 39, 39, 39),
+    ("quotesingle", 169, 39, 39, 39),
-  ('r', 114, 114, 114, 114),
+    ("r", 114, 114, 114, 114),
-  ('registered', None, 168, 174, 174),
+    ("registered", None, 168, 174, 174),
-  ('ring', 202, 251, None, 30),
+    ("ring", 202, 251, None, 30),
-  ('s', 115, 115, 115, 115),
+    ("s", 115, 115, 115, 115),
-  ('scaron', None, None, 154, 157),
+    ("scaron", None, None, 154, 157),
-  ('section', 167, 164, 167, 167),
+    ("section", 167, 164, 167, 167),
-  ('semicolon', 59, 59, 59, 59),
+    ("semicolon", 59, 59, 59, 59),
-  ('seven', 55, 55, 55, 55),
+    ("seven", 55, 55, 55, 55),
-  ('six', 54, 54, 54, 54),
+    ("six", 54, 54, 54, 54),
-  ('slash', 47, 47, 47, 47),
+    ("slash", 47, 47, 47, 47),
-  ('space', 32, 32, 32, 32),
+    ("space", 32, 32, 32, 32),
-  ('space', None, 202, 160, None),
+    ("space", None, 202, 160, None),
-  ('space', None, 202, 173, None),
+    ("space", None, 202, 173, None),
-  ('sterling', 163, 163, 163, 163),
+    ("sterling", 163, 163, 163, 163),
-  ('t', 116, 116, 116, 116),
+    ("t", 116, 116, 116, 116),
-  ('thorn', None, None, 254, 254),
+    ("thorn", None, None, 254, 254),
-  ('three', 51, 51, 51, 51),
+    ("three", 51, 51, 51, 51),
-  ('threequarters', None, None, 190, 190),
+    ("threequarters", None, None, 190, 190),
-  ('threesuperior', None, None, 179, 179),
+    ("threesuperior", None, None, 179, 179),
-  ('tilde', 196, 247, 152, 31),
+    ("tilde", 196, 247, 152, 31),
-  ('trademark', None, 170, 153, 146),
+    ("trademark", None, 170, 153, 146),
-  ('two', 50, 50, 50, 50),
+    ("two", 50, 50, 50, 50),
-  ('twosuperior', None, None, 178, 178),
+    ("twosuperior", None, None, 178, 178),
-  ('u', 117, 117, 117, 117),
+    ("u", 117, 117, 117, 117),
-  ('uacute', None, 156, 250, 250),
+    ("uacute", None, 156, 250, 250),
-  ('ucircumflex', None, 158, 251, 251),
+    ("ucircumflex", None, 158, 251, 251),
-  ('udieresis', None, 159, 252, 252),
+    ("udieresis", None, 159, 252, 252),
-  ('ugrave', None, 157, 249, 249),
+    ("ugrave", None, 157, 249, 249),
-  ('underscore', 95, 95, 95, 95),
+    ("underscore", 95, 95, 95, 95),
-  ('v', 118, 118, 118, 118),
+    ("v", 118, 118, 118, 118),
-  ('w', 119, 119, 119, 119),
+    ("w", 119, 119, 119, 119),
-  ('x', 120, 120, 120, 120),
+    ("x", 120, 120, 120, 120),
-  ('y', 121, 121, 121, 121),
+    ("y", 121, 121, 121, 121),
-  ('yacute', None, None, 253, 253),
+    ("yacute", None, None, 253, 253),
-  ('ydieresis', None, 216, 255, 255),
+    ("ydieresis", None, 216, 255, 255),
-  ('yen', 165, 180, 165, 165),
+    ("yen", 165, 180, 165, 165),
-  ('z', 122, 122, 122, 122),
+    ("z", 122, 122, 122, 122),
-  ('zcaron', None, None, 158, 158),
+    ("zcaron", None, None, 158, 158),
-  ('zero', 48, 48, 48, 48),
+    ("zero", 48, 48, 48, 48),
 ]
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@ -1,7 +1,19 @@
 import heapq
 import logging
-from typing import (Dict, Generic, Iterable, Iterator, List, Optional,
+from typing import (
-                    Sequence, Set, Tuple, TypeVar, Union, cast)
+    Dict,
    Generic,
    Iterable,
    Iterator,
    List,
    Optional,
    Sequence,
    Set,
    Tuple,
    TypeVar,
    Union,
    cast,
 )
 from .pdfcolor import PDFColorSpace
 from .pdffont import PDFFont
@ -25,7 +37,6 @@ logger = logging.getLogger(__name__)
 class IndexAssigner:
    def __init__(self, index: int = 0) -> None:
        self.index = index
@ -74,7 +85,7 @@ class LAParams:
        word_margin: float = 0.1,
        boxes_flow: Optional[float] = 0.5,
        detect_vertical: bool = False,
-        all_texts: bool = False
+        all_texts: bool = False,
    ) -> None:
        self.line_overlap = line_overlap
        self.char_margin = char_margin
@ -88,19 +99,22 @@ class LAParams:
    def _validate(self) -> None:
        if self.boxes_flow is not None:
-            boxes_flow_err_msg = ("LAParam boxes_flow should be None, or a "
+            boxes_flow_err_msg = (
-                                  "number between -1 and +1")
+                "LAParam boxes_flow should be None, or a " "number between -1 and +1"
-            if not (isinstance(self.boxes_flow, int) or
+            )
-                    isinstance(self.boxes_flow, float)):
+            if not (
                isinstance(self.boxes_flow, int) or isinstance(self.boxes_flow, float)
            ):
                raise TypeError(boxes_flow_err_msg)
            if not -1 <= self.boxes_flow <= 1:
                raise ValueError(boxes_flow_err_msg)
    def __repr__(self) -> str:
-        return '<LAParams: char_margin=%.1f, line_margin=%.1f, ' \
+        return (
-               'word_margin=%.1f all_texts=%r>' % \
+            "<LAParams: char_margin=%.1f, line_margin=%.1f, "
-               (self.char_margin, self.line_margin, self.word_margin,
+            "word_margin=%.1f all_texts=%r>"
-                self.all_texts)
+            % (self.char_margin, self.line_margin, self.word_margin, self.all_texts)
        )
 class LTItem:
@ -115,8 +129,7 @@ class LTText:
    """Interface for things that have text"""
    def __repr__(self) -> str:
-        return ('<%s %r>' %
+        return "<%s %r>" % (self.__class__.__name__, self.get_text())
                (self.__class__.__name__, self.get_text()))
    def get_text(self) -> str:
        """Text contained in this object"""
@ -131,8 +144,7 @@ class LTComponent(LTItem):
        self.set_bbox(bbox)
    def __repr__(self) -> str:
-        return ('<%s %s>' %
+        return "<%s %s>" % (self.__class__.__name__, bbox2str(self.bbox))
                (self.__class__.__name__, bbox2str(self.bbox)))
    # Disable comparison.
    def __lt__(self, _: object) -> bool:
@ -153,8 +165,8 @@ class LTComponent(LTItem):
        self.y0 = y0
        self.x1 = x1
        self.y1 = y1
-        self.width = x1-x0
+        self.width = x1 - x0
-        self.height = y1-y0
+        self.height = y1 - y0
        self.bbox = bbox
    def is_empty(self) -> bool:
@ -169,12 +181,12 @@ class LTComponent(LTItem):
        if self.is_hoverlap(obj):
            return 0
        else:
-            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
+            return min(abs(self.x0 - obj.x1), abs(self.x1 - obj.x0))
    def hoverlap(self, obj: "LTComponent") -> float:
        assert isinstance(obj, LTComponent), str(type(obj))
        if self.is_hoverlap(obj):
-            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
+            return min(abs(self.x0 - obj.x1), abs(self.x1 - obj.x0))
        else:
            return 0
@ -187,12 +199,12 @@ class LTComponent(LTItem):
        if self.is_voverlap(obj):
            return 0
        else:
-            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
+            return min(abs(self.y0 - obj.y1), abs(self.y1 - obj.y0))
    def voverlap(self, obj: "LTComponent") -> float:
        assert isinstance(obj, LTComponent), str(type(obj))
        if self.is_voverlap(obj):
-            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
+            return min(abs(self.y0 - obj.y1), abs(self.y1 - obj.y0))
        else:
            return 0
@ -208,7 +220,7 @@ class LTCurve(LTComponent):
        fill: bool = False,
        evenodd: bool = False,
        stroking_color: Optional[Color] = None,
-        non_stroking_color: Optional[Color] = None
+        non_stroking_color: Optional[Color] = None,
    ) -> None:
        LTComponent.__init__(self, get_bound(pts))
        self.pts = pts
@ -220,7 +232,7 @@ class LTCurve(LTComponent):
        self.non_stroking_color = non_stroking_color
    def get_pts(self) -> str:
-        return ','.join('%.3f,%.3f' % p for p in self.pts)
+        return ",".join("%.3f,%.3f" % p for p in self.pts)
 class LTLine(LTCurve):
@ -238,10 +250,18 @@ class LTLine(LTCurve):
        fill: bool = False,
        evenodd: bool = False,
        stroking_color: Optional[Color] = None,
-        non_stroking_color: Optional[Color] = None
+        non_stroking_color: Optional[Color] = None,
    ) -> None:
-        LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd,
+        LTCurve.__init__(
-                         stroking_color, non_stroking_color)
+            self,
            linewidth,
            [p0, p1],
            stroke,
            fill,
            evenodd,
            stroking_color,
            non_stroking_color,
        )
 class LTRect(LTCurve):
@ -258,12 +278,19 @@ class LTRect(LTCurve):
        fill: bool = False,
        evenodd: bool = False,
        stroking_color: Optional[Color] = None,
-        non_stroking_color: Optional[Color] = None
+        non_stroking_color: Optional[Color] = None,
    ) -> None:
        (x0, y0, x1, y1) = bbox
-        LTCurve.__init__(self, linewidth,
+        LTCurve.__init__(
-                         [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke,
+            self,
-                         fill, evenodd, stroking_color, non_stroking_color)
+            linewidth,
            [(x0, y0), (x1, y0), (x1, y1), (x0, y1)],
            stroke,
            fill,
            evenodd,
            stroking_color,
            non_stroking_color,
        )
 class LTImage(LTComponent):
@ -276,18 +303,20 @@ class LTImage(LTComponent):
        LTComponent.__init__(self, bbox)
        self.name = name
        self.stream = stream
-        self.srcsize = (stream.get_any(('W', 'Width')),
+        self.srcsize = (stream.get_any(("W", "Width")), stream.get_any(("H", "Height")))
-                        stream.get_any(('H', 'Height')))
+        self.imagemask = stream.get_any(("IM", "ImageMask"))
-        self.imagemask = stream.get_any(('IM', 'ImageMask'))
+        self.bits = stream.get_any(("BPC", "BitsPerComponent"), 1)
-        self.bits = stream.get_any(('BPC', 'BitsPerComponent'), 1)
+        self.colorspace = stream.get_any(("CS", "ColorSpace"))
        self.colorspace = stream.get_any(('CS', 'ColorSpace'))
        if not isinstance(self.colorspace, list):
            self.colorspace = [self.colorspace]
    def __repr__(self) -> str:
-        return ('<%s(%s) %s %r>' %
+        return "<%s(%s) %s %r>" % (
-                (self.__class__.__name__, self.name,
+            self.__class__.__name__,
-                 bbox2str(self.bbox), self.srcsize))
+            self.name,
            bbox2str(self.bbox),
            self.srcsize,
        )
 class LTAnno(LTItem, LTText):
@ -320,7 +349,7 @@ class LTChar(LTComponent, LTText):
        textwidth: float,
        textdisp: Union[float, Tuple[Optional[float], float]],
        ncs: PDFColorSpace,
-        graphicstate: PDFGraphicState
+        graphicstate: PDFGraphicState,
    ) -> None:
        LTText.__init__(self)
        self._text = text
@ -337,8 +366,8 @@ class LTChar(LTComponent, LTText):
            if vx is None:
                vx = fontsize * 0.5
            else:
-                vx = vx * fontsize * .001
+                vx = vx * fontsize * 0.001
-            vy = (1000 - vy) * fontsize * .001
+            vy = (1000 - vy) * fontsize * 0.001
            bbox_lower_left = (-vx, vy + rise + self.adv)
            bbox_upper_right = (-vx + fontsize, vy + rise)
        else:
@ -347,7 +376,7 @@ class LTChar(LTComponent, LTText):
            bbox_lower_left = (0, descent + rise)
            bbox_upper_right = (self.adv, descent + rise + fontsize)
        (a, b, c, d, e, f) = self.matrix
-        self.upright = (0 < a*d*scaling and b*c <= 0)
+        self.upright = 0 < a * d * scaling and b * c <= 0
        (x0, y0) = apply_matrix_pt(self.matrix, bbox_lower_left)
        (x1, y1) = apply_matrix_pt(self.matrix, bbox_upper_right)
        if x1 < x0:
@ -362,10 +391,14 @@ class LTChar(LTComponent, LTText):
        return
    def __repr__(self) -> str:
-        return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
+        return "<%s %s matrix=%s font=%r adv=%s text=%r>" % (
-                (self.__class__.__name__, bbox2str(self.bbox),
+            self.__class__.__name__,
-                 matrix2str(self.matrix), self.fontname, self.adv,
+            bbox2str(self.bbox),
-                 self.get_text()))
+            matrix2str(self.matrix),
            self.fontname,
            self.adv,
            self.get_text(),
        )
    def get_text(self) -> str:
        return self._text
@ -375,7 +408,7 @@ class LTChar(LTComponent, LTText):
        return True
-LTItemT = TypeVar('LTItemT', bound=LTItem)
+LTItemT = TypeVar("LTItemT", bound=LTItem)
 class LTContainer(LTComponent, Generic[LTItemT]):
@ -416,8 +449,14 @@ class LTExpandableContainer(LTContainer[LTItemT]):
    # super() LTContainer only considers LTItem (no bounding box).
    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
        LTContainer.add(self, cast(LTItemT, obj))
-        self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0),
+        self.set_bbox(
-                       max(self.x1, obj.x1), max(self.y1, obj.y1)))
+            (
                min(self.x0, obj.x0),
                min(self.y0, obj.y0),
                max(self.x1, obj.x1),
                max(self.y1, obj.y1),
            )
        )
        return
@ -428,8 +467,9 @@ class LTTextContainer(LTExpandableContainer[LTItemT], LTText):
        return
    def get_text(self) -> str:
-        return ''.join(cast(LTText, obj).get_text() for obj in self
+        return "".join(
-                       if isinstance(obj, LTText))
+            cast(LTText, obj).get_text() for obj in self if isinstance(obj, LTText)
        )
 TextLineElement = Union[LTChar, LTAnno]
@ -448,17 +488,20 @@ class LTTextLine(LTTextContainer[TextLineElement]):
        return
    def __repr__(self) -> str:
-        return ('<%s %s %r>' %
+        return "<%s %s %r>" % (
-                (self.__class__.__name__, bbox2str(self.bbox),
+            self.__class__.__name__,
-                 self.get_text()))
+            bbox2str(self.bbox),
            self.get_text(),
        )
    def analyze(self, laparams: LAParams) -> None:
        LTTextContainer.analyze(self, laparams)
-        LTContainer.add(self, LTAnno('\n'))
+        LTContainer.add(self, LTAnno("\n"))
        return
-    def find_neighbors(self, plane: Plane[LTComponentT], ratio: float
+    def find_neighbors(
-                       ) -> List["LTTextLine"]:
+        self, plane: Plane[LTComponentT], ratio: float
    ) -> List["LTTextLine"]:
        raise NotImplementedError
@ -474,15 +517,13 @@ class LTTextLineHorizontal(LTTextLine):
        if isinstance(obj, LTChar) and self.word_margin:
            margin = self.word_margin * max(obj.width, obj.height)
            if self._x1 < obj.x0 - margin:
-                LTContainer.add(self, LTAnno(' '))
+                LTContainer.add(self, LTAnno(" "))
        self._x1 = obj.x1
        super().add(obj)
        return
    def find_neighbors(
-        self,
+        self, plane: Plane[LTComponentT], ratio: float
        plane: Plane[LTComponentT],
        ratio: float
    ) -> List[LTTextLine]:
        """
        Finds neighboring LTTextLineHorizontals in the plane.
@ -494,49 +535,41 @@ class LTTextLineHorizontal(LTTextLine):
        """
        d = ratio * self.height
        objs = plane.find((self.x0, self.y0 - d, self.x1, self.y1 + d))
-        return [obj for obj in objs
+        return [
-                if (isinstance(obj, LTTextLineHorizontal) and
+            obj
-                    self._is_same_height_as(obj, tolerance=d) and
+            for obj in objs
-                    (self._is_left_aligned_with(obj, tolerance=d) or
+            if (
-                     self._is_right_aligned_with(obj, tolerance=d) or
+                isinstance(obj, LTTextLineHorizontal)
-                     self._is_centrally_aligned_with(obj, tolerance=d)))]
+                and self._is_same_height_as(obj, tolerance=d)
                and (
                    self._is_left_aligned_with(obj, tolerance=d)
                    or self._is_right_aligned_with(obj, tolerance=d)
                    or self._is_centrally_aligned_with(obj, tolerance=d)
                )
            )
        ]
-    def _is_left_aligned_with(
+    def _is_left_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool:
        self,
        other: LTComponent,
        tolerance: float = 0
    ) -> bool:
        """
        Whether the left-hand edge of `other` is within `tolerance`.
        """
        return abs(other.x0 - self.x0) <= tolerance
-    def _is_right_aligned_with(
+    def _is_right_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool:
        self,
        other: LTComponent,
        tolerance: float = 0
    ) -> bool:
        """
        Whether the right-hand edge of `other` is within `tolerance`.
        """
        return abs(other.x1 - self.x1) <= tolerance
    def _is_centrally_aligned_with(
-        self,
+        self, other: LTComponent, tolerance: float = 0
        other: LTComponent,
        tolerance: float = 0
    ) -> bool:
        """
        Whether the horizontal center of `other` is within `tolerance`.
        """
-        return abs(
+        return abs((other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance
            (other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance
-    def _is_same_height_as(
+    def _is_same_height_as(self, other: LTComponent, tolerance: float = 0) -> bool:
        self,
        other: LTComponent,
        tolerance: float = 0
    ) -> bool:
        return abs(other.height - self.height) <= tolerance
@ -552,15 +585,13 @@ class LTTextLineVertical(LTTextLine):
        if isinstance(obj, LTChar) and self.word_margin:
            margin = self.word_margin * max(obj.width, obj.height)
            if obj.y1 + margin < self._y0:
-                LTContainer.add(self, LTAnno(' '))
+                LTContainer.add(self, LTAnno(" "))
        self._y0 = obj.y0
        super().add(obj)
        return
    def find_neighbors(
-        self,
+        self, plane: Plane[LTComponentT], ratio: float
        plane: Plane[LTComponentT],
        ratio: float
    ) -> List[LTTextLine]:
        """
        Finds neighboring LTTextLineVerticals in the plane.
@ -572,43 +603,39 @@ class LTTextLineVertical(LTTextLine):
        """
        d = ratio * self.width
        objs = plane.find((self.x0 - d, self.y0, self.x1 + d, self.y1))
-        return [obj for obj in objs
+        return [
-                if (isinstance(obj, LTTextLineVertical) and
+            obj
-                    self._is_same_width_as(obj, tolerance=d) and
+            for obj in objs
-                    (self._is_lower_aligned_with(obj, tolerance=d) or
+            if (
-                     self._is_upper_aligned_with(obj, tolerance=d) or
+                isinstance(obj, LTTextLineVertical)
-                     self._is_centrally_aligned_with(obj, tolerance=d)))]
+                and self._is_same_width_as(obj, tolerance=d)
                and (
                    self._is_lower_aligned_with(obj, tolerance=d)
                    or self._is_upper_aligned_with(obj, tolerance=d)
                    or self._is_centrally_aligned_with(obj, tolerance=d)
                )
            )
        ]
-    def _is_lower_aligned_with(
+    def _is_lower_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool:
        self,
        other: LTComponent,
        tolerance: float = 0
    ) -> bool:
        """
        Whether the lower edge of `other` is within `tolerance`.
        """
        return abs(other.y0 - self.y0) <= tolerance
-    def _is_upper_aligned_with(
+    def _is_upper_aligned_with(self, other: LTComponent, tolerance: float = 0) -> bool:
        self,
        other: LTComponent,
        tolerance: float = 0
    ) -> bool:
        """
        Whether the upper edge of `other` is within `tolerance`.
        """
        return abs(other.y1 - self.y1) <= tolerance
    def _is_centrally_aligned_with(
-        self,
+        self, other: LTComponent, tolerance: float = 0
        other: LTComponent,
        tolerance: float = 0
    ) -> bool:
        """
        Whether the vertical center of `other` is within `tolerance`.
        """
-        return abs(
+        return abs((other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance
            (other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance
    def _is_same_width_as(self, other: LTComponent, tolerance: float) -> bool:
        return abs(other.width - self.width) <= tolerance
@ -628,9 +655,12 @@ class LTTextBox(LTTextContainer[LTTextLine]):
        return
    def __repr__(self) -> str:
-        return ('<%s(%s) %s %r>' %
+        return "<%s(%s) %s %r>" % (
-                (self.__class__.__name__,
+            self.__class__.__name__,
-                 self.index, bbox2str(self.bbox), self.get_text()))
+            self.index,
            bbox2str(self.bbox),
            self.get_text(),
        )
    def get_writing_mode(self) -> str:
        raise NotImplementedError
@ -643,7 +673,7 @@ class LTTextBoxHorizontal(LTTextBox):
        return
    def get_writing_mode(self) -> str:
-        return 'lr-tb'
+        return "lr-tb"
 class LTTextBoxVertical(LTTextBox):
@ -653,7 +683,7 @@ class LTTextBoxVertical(LTTextBox):
        return
    def get_writing_mode(self) -> str:
-        return 'tb-rl'
+        return "tb-rl"
 TextGroupElement = Union[LTTextBox, "LTTextGroup"]
@ -674,7 +704,8 @@ class LTTextGroupLRTB(LTTextGroup):
        # reorder the objects from top-left to bottom-right.
        self._objs.sort(
            key=lambda obj: (1 - boxes_flow) * obj.x0
-            - (1 + boxes_flow) * (obj.y0 + obj.y1))
+            - (1 + boxes_flow) * (obj.y0 + obj.y1)
        )
        return
@ -685,8 +716,9 @@ class LTTextGroupTBRL(LTTextGroup):
        boxes_flow = laparams.boxes_flow
        # reorder the objects from top-right to bottom-left.
        self._objs.sort(
-            key=lambda obj: - (1 + boxes_flow) * (obj.x0 + obj.x1)
+            key=lambda obj: -(1 + boxes_flow) * (obj.x0 + obj.x1)
-                            - (1 - boxes_flow) * obj.y1)
+            - (1 - boxes_flow) * obj.y1
        )
        return
@ -698,9 +730,7 @@ class LTLayoutContainer(LTContainer[LTComponent]):
    # group_objects: group text object to textlines.
    def group_objects(
-        self,
+        self, laparams: LAParams, objs: Iterable[LTComponent]
        laparams: LAParams,
        objs: Iterable[LTComponent]
    ) -> Iterator[LTTextLine]:
        obj0 = None
        line = None
@ -716,13 +746,14 @@ class LTLayoutContainer(LTContainer[LTComponent]):
                #
                #          |<--->|
                #        (char_margin)
-                halign = \
+                halign = (
-                    obj0.is_compatible(obj1) \
+                    obj0.is_compatible(obj1)
-                    and obj0.is_voverlap(obj1) \
+                    and obj0.is_voverlap(obj1)
-                    and min(obj0.height, obj1.height) * laparams.line_overlap \
+                    and min(obj0.height, obj1.height) * laparams.line_overlap
-                    < obj0.voverlap(obj1) \
+                    < obj0.voverlap(obj1)
-                    and obj0.hdistance(obj1) \
+                    and obj0.hdistance(obj1)
                    < max(obj0.width, obj1.width) * laparams.char_margin
                )
                # valign: obj0 and obj1 is vertically aligned.
                #
@ -738,17 +769,19 @@ class LTLayoutContainer(LTContainer[LTComponent]):
                #
                #     |<-->|
                #   (line_overlap)
-                valign = \
+                valign = (
-                    laparams.detect_vertical \
+                    laparams.detect_vertical
-                    and obj0.is_compatible(obj1) \
+                    and obj0.is_compatible(obj1)
-                    and obj0.is_hoverlap(obj1) \
+                    and obj0.is_hoverlap(obj1)
-                    and min(obj0.width, obj1.width) * laparams.line_overlap \
+                    and min(obj0.width, obj1.width) * laparams.line_overlap
-                    < obj0.hoverlap(obj1) \
+                    < obj0.hoverlap(obj1)
-                    and obj0.vdistance(obj1) \
+                    and obj0.vdistance(obj1)
                    < max(obj0.height, obj1.height) * laparams.char_margin
                )
-                if ((halign and isinstance(line, LTTextLineHorizontal)) or
+                if (halign and isinstance(line, LTTextLineHorizontal)) or (
-                        (valign and isinstance(line, LTTextLineVertical))):
+                    valign and isinstance(line, LTTextLineVertical)
                ):
                    line.add(obj1)
                elif line is not None:
@ -777,9 +810,7 @@ class LTLayoutContainer(LTContainer[LTComponent]):
        return
    def group_textlines(
-        self,
+        self, laparams: LAParams, lines: Iterable[LTTextLine]
        laparams: LAParams,
        lines: Iterable[LTTextLine]
    ) -> Iterator[LTTextBox]:
        """Group neighboring lines to textboxes"""
        plane: Plane[LTTextLine] = Plane(self.bbox)
@ -812,9 +843,7 @@ class LTLayoutContainer(LTContainer[LTComponent]):
        return
    def group_textboxes(
-        self,
+        self, laparams: LAParams, boxes: Sequence[LTTextBox]
        laparams: LAParams,
        boxes: Sequence[LTTextBox]
    ) -> List[LTTextGroup]:
        """Group textboxes hierarchically.
@ -853,8 +882,11 @@ class LTLayoutContainer(LTContainer[LTComponent]):
            y0 = min(obj1.y0, obj2.y0)
            x1 = max(obj1.x1, obj2.x1)
            y1 = max(obj1.y1, obj2.y1)
-            return (x1 - x0) * (y1 - y0) \
+            return (
-                - obj1.width*obj1.height - obj2.width*obj2.height
+                (x1 - x0) * (y1 - y0)
                - obj1.width * obj1.height
                - obj2.width * obj2.height
            )
        def isany(obj1: ElementT, obj2: ElementT) -> Set[ElementT]:
            """Check if there's any other object between obj1 and obj2."""
@ -868,10 +900,9 @@ class LTLayoutContainer(LTContainer[LTComponent]):
        dists: List[Tuple[bool, float, int, int, ElementT, ElementT]] = []
        for i in range(len(boxes)):
            box1 = boxes[i]
-            for j in range(i+1, len(boxes)):
+            for j in range(i + 1, len(boxes)):
                box2 = boxes[j]
-                dists.append((False, dist(box1, box2), id(box1), id(box2),
+                dists.append((False, dist(box1, box2), id(box1), id(box2), box1, box2))
                              box1, box2))
        heapq.heapify(dists)
        plane.extend(boxes)
@ -883,8 +914,9 @@ class LTLayoutContainer(LTContainer[LTComponent]):
                if not skip_isany and isany(obj1, obj2):
                    heapq.heappush(dists, (True, d, id1, id2, obj1, obj2))
                    continue
-                if isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or \
+                if isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or isinstance(
-                        isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL)):
+                    obj2, (LTTextBoxVertical, LTTextGroupTBRL)
                ):
                    group: LTTextGroup = LTTextGroupTBRL([obj1, obj2])
                else:
                    group = LTTextGroupLRTB([obj1, obj2])
@ -893,8 +925,10 @@ class LTLayoutContainer(LTContainer[LTComponent]):
                done.update([id1, id2])
                for other in plane:
-                    heapq.heappush(dists, (False, dist(group, other),
+                    heapq.heappush(
-                                           id(group), id(other), group, other))
+                        dists,
                        (False, dist(group, other), id(group), id(other), group, other),
                    )
                plane.add(group)
        # By now only groups are in the plane
        return list(cast(LTTextGroup, g) for g in plane)
@ -902,8 +936,7 @@ class LTLayoutContainer(LTContainer[LTComponent]):
    def analyze(self, laparams: LAParams) -> None:
        # textobjs is a list of LTChar objects, i.e.
        # it has all the individual characters in the page.
-        (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar),
+        (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar), self)
                                       self)
        for obj in otherobjs:
            obj.analyze(laparams)
        if not textobjs:
@ -922,6 +955,7 @@ class LTLayoutContainer(LTContainer[LTComponent]):
                    return (0, -box.x1, -box.y0)
                else:
                    return (1, -box.y0, box.x0)
            textboxes.sort(key=getkey)
        else:
            self.groups = self.group_textboxes(laparams, textboxes)
@ -930,8 +964,11 @@ class LTLayoutContainer(LTContainer[LTComponent]):
                group.analyze(laparams)
                assigner.run(group)
            textboxes.sort(key=lambda box: box.index)
-        self._objs = (cast(List[LTComponent], textboxes) + otherobjs
+        self._objs = (
-                      + cast(List[LTComponent], empties))
+            cast(List[LTComponent], textboxes)
            + otherobjs
            + cast(List[LTComponent], empties)
        )
        return
@ -953,9 +990,12 @@ class LTFigure(LTLayoutContainer):
        return
    def __repr__(self) -> str:
-        return ('<%s(%s) %s matrix=%s>' %
+        return "<%s(%s) %s matrix=%s>" % (
-                (self.__class__.__name__, self.name,
+            self.__class__.__name__,
-                 bbox2str(self.bbox), matrix2str(self.matrix)))
+            self.name,
            bbox2str(self.bbox),
            matrix2str(self.matrix),
        )
    def analyze(self, laparams: LAParams) -> None:
        if not laparams.all_texts:
@ -978,6 +1018,9 @@ class LTPage(LTLayoutContainer):
        return
    def __repr__(self) -> str:
-        return ('<%s(%r) %s rotate=%r>' %
+        return "<%s(%r) %s rotate=%r>" % (
-                (self.__class__.__name__, self.pageid,
+            self.__class__.__name__,
-                 bbox2str(self.bbox), self.rotate))
+            self.pageid,
            bbox2str(self.bbox),
            self.rotate,
        )
--- a/pdfminer/lzw.py
+++ b/pdfminer/lzw.py
@ -10,7 +10,6 @@ class CorruptDataError(Exception):
 class LZWDecoder:
    def __init__(self, fp: BinaryIO) -> None:
        self.fp = fp
        self.buff = 0
@ -24,19 +23,19 @@ class LZWDecoder:
        v = 0
        while 1:
            # the number of remaining bits we can get from the current buffer.
-            r = 8-self.bpos
+            r = 8 - self.bpos
            if bits <= r:
                # |-----8-bits-----|
                # |-bpos-|-bits-|  |
                # |      |----r----|
-                v = (v << bits) | ((self.buff >> (r-bits)) & ((1 << bits)-1))
+                v = (v << bits) | ((self.buff >> (r - bits)) & ((1 << bits) - 1))
                self.bpos += bits
                break
            else:
                # |-----8-bits-----|
                # |-bpos-|---bits----...
                # |      |----r----|
-                v = (v << r) | (self.buff & ((1 << r)-1))
+                v = (v << r) | (self.buff & ((1 << r) - 1))
                bits -= r
                x = self.fp.read(1)
                if not x:
@ -46,12 +45,12 @@ class LZWDecoder:
        return v
    def feed(self, code: int) -> bytes:
-        x = b''
+        x = b""
        if code == 256:
            self.table = [bytes((c,)) for c in range(256)]  # 0-255
            self.table.append(None)  # 256
            self.table.append(None)  # 257
-            self.prevbuf = b''
+            self.prevbuf = b""
            self.nbits = 9
        elif code == 257:
            pass
@ -62,9 +61,9 @@ class LZWDecoder:
            assert self.table is not None
            if code < len(self.table):
                x = cast(bytes, self.table[code])  # assume not None
-                self.table.append(self.prevbuf+x[:1])
+                self.table.append(self.prevbuf + x[:1])
            elif code == len(self.table):
-                self.table.append(self.prevbuf+self.prevbuf[:1])
+                self.table.append(self.prevbuf + self.prevbuf[:1])
                x = cast(bytes, self.table[code])
            else:
                raise CorruptDataError
@ -91,11 +90,13 @@ class LZWDecoder:
                break
            yield x
            assert self.table is not None
-            logger.debug('nbits=%d, code=%d, output=%r, table=%r'
+            logger.debug(
-                         % (self.nbits, code, x, self.table[258:]))
+                "nbits=%d, code=%d, output=%r, table=%r"
                % (self.nbits, code, x, self.table[258:])
            )
 def lzwdecode(data: bytes) -> bytes:
    fp = BytesIO(data)
    s = LZWDecoder(fp).run()
-    return b''.join(s)
+    return b"".join(s)
--- a/pdfminer/pdfcolor.py
+++ b/pdfminer/pdfcolor.py
@ -3,33 +3,31 @@ from typing import Dict
 from .psparser import LIT
-LITERAL_DEVICE_GRAY = LIT('DeviceGray')
+LITERAL_DEVICE_GRAY = LIT("DeviceGray")
-LITERAL_DEVICE_RGB = LIT('DeviceRGB')
+LITERAL_DEVICE_RGB = LIT("DeviceRGB")
-LITERAL_DEVICE_CMYK = LIT('DeviceCMYK')
+LITERAL_DEVICE_CMYK = LIT("DeviceCMYK")
 class PDFColorSpace:
    def __init__(self, name: str, ncomponents: int) -> None:
        self.name = name
        self.ncomponents = ncomponents
    def __repr__(self) -> str:
-        return '<PDFColorSpace: %s, ncomponents=%d>' % \
+        return "<PDFColorSpace: %s, ncomponents=%d>" % (self.name, self.ncomponents)
               (self.name, self.ncomponents)
 PREDEFINED_COLORSPACE: Dict[str, PDFColorSpace] = collections.OrderedDict()
 for (name, n) in [
-    ('DeviceGray', 1),  # default value first
+    ("DeviceGray", 1),  # default value first
-    ('CalRGB', 3),
+    ("CalRGB", 3),
-    ('CalGray', 1),
+    ("CalGray", 1),
-    ('Lab', 3),
+    ("Lab", 3),
-    ('DeviceRGB', 3),
+    ("DeviceRGB", 3),
-    ('DeviceCMYK', 4),
+    ("DeviceCMYK", 4),
-    ('Separation', 1),
+    ("Separation", 1),
-    ('Indexed', 1),
+    ("Indexed", 1),
-    ('Pattern', 1),
+    ("Pattern", 1),
 ]:
    PREDEFINED_COLORSPACE[name] = PDFColorSpace(name, n)
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@ -1,5 +1,13 @@
-from typing import (BinaryIO, Iterable, List, Optional, Sequence,
+from typing import (
-                    TYPE_CHECKING, Union, cast)
+    BinaryIO,
    Iterable,
    List,
    Optional,
    Sequence,
    TYPE_CHECKING,
    Union,
    cast,
 )
 from pdfminer.psparser import PSLiteral
 from . import utils
@ -21,25 +29,19 @@ PDFTextSeq = Iterable[Union[int, float, bytes]]
 class PDFDevice:
-    """Translate the output of PDFPageInterpreter to the output that is needed
+    """Translate the output of PDFPageInterpreter to the output that is needed"""
    """
    def __init__(self, rsrcmgr: "PDFResourceManager") -> None:
        self.rsrcmgr = rsrcmgr
        self.ctm: Optional[Matrix] = None
    def __repr__(self) -> str:
-        return '<PDFDevice>'
+        return "<PDFDevice>"
    def __enter__(self) -> "PDFDevice":
        return self
-    def __exit__(
+    def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
        self,
        exc_type: object,
        exc_val: object,
        exc_tb: object
    ) -> None:
        self.close()
    def close(self) -> None:
@ -48,21 +50,13 @@ class PDFDevice:
    def set_ctm(self, ctm: Matrix) -> None:
        self.ctm = ctm
-    def begin_tag(
+    def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None:
        self,
        tag: PSLiteral,
        props: Optional["PDFStackT"] = None
    ) -> None:
        pass
    def end_tag(self) -> None:
        pass
-    def do_tag(
+    def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None:
        self,
        tag: PSLiteral,
        props: Optional["PDFStackT"] = None
    ) -> None:
        pass
    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
@ -83,7 +77,7 @@ class PDFDevice:
        stroke: bool,
        fill: bool,
        evenodd: bool,
-        path: Sequence[PathSegment]
+        path: Sequence[PathSegment],
    ) -> None:
        pass
@ -95,42 +89,61 @@ class PDFDevice:
        textstate: "PDFTextState",
        seq: PDFTextSeq,
        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState"
+        graphicstate: "PDFGraphicState",
    ) -> None:
        pass
 class PDFTextDevice(PDFDevice):
    def render_string(
        self,
        textstate: "PDFTextState",
        seq: PDFTextSeq,
        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState"
+        graphicstate: "PDFGraphicState",
    ) -> None:
        assert self.ctm is not None
        matrix = utils.mult_matrix(textstate.matrix, self.ctm)
        font = textstate.font
        fontsize = textstate.fontsize
-        scaling = textstate.scaling * .01
+        scaling = textstate.scaling * 0.01
        charspace = textstate.charspace * scaling
        wordspace = textstate.wordspace * scaling
        rise = textstate.rise
        assert font is not None
        if font.is_multibyte():
            wordspace = 0
-        dxscale = .001 * fontsize * scaling
+        dxscale = 0.001 * fontsize * scaling
        if font.is_vertical():
            textstate.linematrix = self.render_string_vertical(
-                seq, matrix, textstate.linematrix, font, fontsize,
+                seq,
-                scaling, charspace, wordspace, rise, dxscale, ncs,
+                matrix,
-                graphicstate)
+                textstate.linematrix,
                font,
                fontsize,
                scaling,
                charspace,
                wordspace,
                rise,
                dxscale,
                ncs,
                graphicstate,
            )
        else:
            textstate.linematrix = self.render_string_horizontal(
-                seq, matrix, textstate.linematrix, font, fontsize,
+                seq,
-                scaling, charspace, wordspace, rise, dxscale, ncs,
+                matrix,
-                graphicstate)
+                textstate.linematrix,
                font,
                fontsize,
                scaling,
                charspace,
                wordspace,
                rise,
                dxscale,
                ncs,
                graphicstate,
            )
    def render_string_horizontal(
        self,
@ -145,21 +158,28 @@ class PDFTextDevice(PDFDevice):
        rise: float,
        dxscale: float,
        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState"
+        graphicstate: "PDFGraphicState",
    ) -> Point:
        (x, y) = pos
        needcharspace = False
        for obj in seq:
            if isinstance(obj, (int, float)):
-                x -= obj*dxscale
+                x -= obj * dxscale
                needcharspace = True
            else:
                for cid in font.decode(obj):
                    if needcharspace:
                        x += charspace
                    x += self.render_char(
-                        utils.translate_matrix(matrix, (x, y)), font,
+                        utils.translate_matrix(matrix, (x, y)),
-                        fontsize, scaling, rise, cid, ncs, graphicstate)
+                        font,
                        fontsize,
                        scaling,
                        rise,
                        cid,
                        ncs,
                        graphicstate,
                    )
                    if cid == 32 and wordspace:
                        x += wordspace
                    needcharspace = True
@ -178,21 +198,28 @@ class PDFTextDevice(PDFDevice):
        rise: float,
        dxscale: float,
        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState"
+        graphicstate: "PDFGraphicState",
    ) -> Point:
        (x, y) = pos
        needcharspace = False
        for obj in seq:
            if isinstance(obj, (int, float)):
-                y -= obj*dxscale
+                y -= obj * dxscale
                needcharspace = True
            else:
                for cid in font.decode(obj):
                    if needcharspace:
                        y += charspace
                    y += self.render_char(
-                        utils.translate_matrix(matrix, (x, y)), font, fontsize,
+                        utils.translate_matrix(matrix, (x, y)),
-                        scaling, rise, cid, ncs, graphicstate)
+                        font,
                        fontsize,
                        scaling,
                        rise,
                        cid,
                        ncs,
                        graphicstate,
                    )
                    if cid == 32 and wordspace:
                        y += wordspace
                    needcharspace = True
@ -207,18 +234,14 @@ class PDFTextDevice(PDFDevice):
        rise: float,
        cid: int,
        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState"
+        graphicstate: "PDFGraphicState",
    ) -> float:
        return 0
 class TagExtractor(PDFDevice):
    def __init__(
-        self,
+        self, rsrcmgr: "PDFResourceManager", outfp: BinaryIO, codec: str = "utf-8"
        rsrcmgr: "PDFResourceManager",
        outfp: BinaryIO,
        codec: str = 'utf-8'
    ) -> None:
        PDFDevice.__init__(self, rsrcmgr)
        self.outfp = outfp
@ -231,11 +254,11 @@ class TagExtractor(PDFDevice):
        textstate: "PDFTextState",
        seq: PDFTextSeq,
        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState"
+        graphicstate: "PDFGraphicState",
    ) -> None:
        font = textstate.font
        assert font is not None
-        text = ''
+        text = ""
        for obj in seq:
            if isinstance(obj, str):
                obj = utils.make_compat_bytes(obj)
@ -251,25 +274,29 @@ class TagExtractor(PDFDevice):
        self._write(utils.enc(text))
    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
-        output = '<page id="%s" bbox="%s" rotate="%d">' %\
+        output = '<page id="%s" bbox="%s" rotate="%d">' % (
-                 (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
+            self.pageno,
            utils.bbox2str(page.mediabox),
            page.rotate,
        )
        self._write(output)
        return
    def end_page(self, page: PDFPage) -> None:
-        self._write('</page>\n')
+        self._write("</page>\n")
        self.pageno += 1
        return
-    def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
+    def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None:
-                  ) -> None:
+        s = ""
        s = ''
        if isinstance(props, dict):
-            s = ''.join([
+            s = "".join(
-                ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
+                [
-                for (k, v) in sorted(props.items())
+                    ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
-            ])
+                    for (k, v) in sorted(props.items())
-        out_s = '<{}{}>'.format(utils.enc(cast(str, tag.name)), s)
+                ]
            )
        out_s = "<{}{}>".format(utils.enc(cast(str, tag.name)), s)
        self._write(out_s)
        self._stack.append(tag)
        return
@ -277,12 +304,11 @@ class TagExtractor(PDFDevice):
    def end_tag(self) -> None:
        assert self._stack, str(self.pageno)
        tag = self._stack.pop(-1)
-        out_s = '</%s>' % utils.enc(cast(str, tag.name))
+        out_s = "</%s>" % utils.enc(cast(str, tag.name))
        self._write(out_s)
        return
-    def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
+    def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None:
               ) -> None:
        self.begin_tag(tag, props)
        self._stack.pop(-1)
        return
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@ -3,8 +3,21 @@ import logging
 import re
 import struct
 from hashlib import sha256, md5, sha384, sha512
-from typing import (Any, Callable, Dict, Iterable, Iterator, KeysView, List,
+from typing import (
-                    Optional, Sequence, Tuple, Type, Union, cast)
+    Any,
    Callable,
    Dict,
    Iterable,
    Iterator,
    KeysView,
    List,
    Optional,
    Sequence,
    Tuple,
    Type,
    Union,
    cast,
 )
 from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
@ -13,12 +26,22 @@ from . import settings
 from .arcfour import Arcfour
 from .data_structures import NumberTree
 from .pdfparser import PDFSyntaxError, PDFParser, PDFStreamParser
-from .pdftypes import DecipherCallable, PDFException, PDFTypeError, \
+from .pdftypes import (
-    PDFStream, PDFObjectNotFound, decipher_all, int_value, str_value, \
+    DecipherCallable,
-    list_value, uint_value, dict_value, stream_value
+    PDFException,
    PDFTypeError,
    PDFStream,
    PDFObjectNotFound,
    decipher_all,
    int_value,
    str_value,
    list_value,
    uint_value,
    dict_value,
    stream_value,
 )
 from .psparser import PSEOF, literal_name, LIT, KWD
-from .utils import choplist, decode_text, nunpack, format_int_roman, \
+from .utils import choplist, decode_text, nunpack, format_int_roman, format_int_alpha
    format_int_alpha
 log = logging.getLogger(__name__)
@ -32,6 +55,7 @@ class PDFNoValidXRefWarning(SyntaxWarning):
    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    """
    pass
@ -60,6 +84,7 @@ class PDFEncryptionWarning(UserWarning):
    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    """
    pass
@ -68,6 +93,7 @@ class PDFTextExtractionNotAllowedWarning(UserWarning):
    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    """
    pass
@ -78,15 +104,19 @@ class PDFTextExtractionNotAllowed(PDFEncryptionError):
 class PDFTextExtractionNotAllowedError(PDFTextExtractionNotAllowed):
    def __init__(self, *args: object) -> None:
        from warnings import warn
-        warn('PDFTextExtractionNotAllowedError will be removed in the future. '
+
-             'Use PDFTextExtractionNotAllowed instead.', DeprecationWarning)
+        warn(
            "PDFTextExtractionNotAllowedError will be removed in the future. "
            "Use PDFTextExtractionNotAllowed instead.",
            DeprecationWarning,
        )
        super().__init__(*args)
 # some predefined literals and keywords.
-LITERAL_OBJSTM = LIT('ObjStm')
+LITERAL_OBJSTM = LIT("ObjStm")
-LITERAL_XREF = LIT('XRef')
+LITERAL_XREF = LIT("XRef")
-LITERAL_CATALOG = LIT('Catalog')
+LITERAL_CATALOG = LIT("Catalog")
 class PDFBaseXRef:
@ -107,13 +137,12 @@ class PDFBaseXRef:
 class PDFXRef(PDFBaseXRef):
    def __init__(self) -> None:
        self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {}
        self.trailer: Dict[str, Any] = {}
    def __repr__(self) -> str:
-        return '<PDFXRef: offsets=%r>' % (self.offsets.keys())
+        return "<PDFXRef: offsets=%r>" % (self.offsets.keys())
    def load(self, parser: PDFParser) -> None:
        while True:
@ -123,51 +152,50 @@ class PDFXRef(PDFBaseXRef):
                if not line:
                    continue
            except PSEOF:
-                raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
+                raise PDFNoValidXRef("Unexpected EOF - file corrupted?")
-            if line.startswith(b'trailer'):
+            if line.startswith(b"trailer"):
                parser.seek(pos)
                break
-            f = line.split(b' ')
+            f = line.split(b" ")
            if len(f) != 2:
-                error_msg = 'Trailer not found: {!r}: line={!r}'\
+                error_msg = "Trailer not found: {!r}: line={!r}".format(parser, line)
                    .format(parser, line)
                raise PDFNoValidXRef(error_msg)
            try:
                (start, nobjs) = map(int, f)
            except ValueError:
-                error_msg = 'Invalid line: {!r}: line={!r}'\
+                error_msg = "Invalid line: {!r}: line={!r}".format(parser, line)
                    .format(parser, line)
                raise PDFNoValidXRef(error_msg)
-            for objid in range(start, start+nobjs):
+            for objid in range(start, start + nobjs):
                try:
                    (_, line) = parser.nextline()
                    line = line.strip()
                except PSEOF:
-                    raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
+                    raise PDFNoValidXRef("Unexpected EOF - file corrupted?")
-                f = line.split(b' ')
+                f = line.split(b" ")
                if len(f) != 3:
-                    error_msg = 'Invalid XRef format: {!r}, line={!r}'\
+                    error_msg = "Invalid XRef format: {!r}, line={!r}".format(
-                        .format(parser, line)
+                        parser, line
                    )
                    raise PDFNoValidXRef(error_msg)
                (pos_b, genno_b, use_b) = f
-                if use_b != b'n':
+                if use_b != b"n":
                    continue
                self.offsets[objid] = (None, int(pos_b), int(genno_b))
-        log.debug('xref objects: %r', self.offsets)
+        log.debug("xref objects: %r", self.offsets)
        self.load_trailer(parser)
    def load_trailer(self, parser: PDFParser) -> None:
        try:
            (_, kwd) = parser.nexttoken()
-            assert kwd is KWD(b'trailer'), str(kwd)
+            assert kwd is KWD(b"trailer"), str(kwd)
            (_, dic) = parser.nextobject()
        except PSEOF:
            x = parser.pop(1)
            if not x:
-                raise PDFNoValidXRef('Unexpected EOF - file corrupted')
+                raise PDFNoValidXRef("Unexpected EOF - file corrupted")
            (_, dic) = x[0]
        self.trailer.update(dict_value(dic))
-        log.debug('trailer=%r', self.trailer)
+        log.debug("trailer=%r", self.trailer)
    def get_trailer(self) -> Dict[str, Any]:
        return self.trailer
@ -183,11 +211,10 @@ class PDFXRef(PDFBaseXRef):
 class PDFXRefFallback(PDFXRef):
    def __repr__(self) -> str:
-        return '<PDFXRefFallback: offsets=%r>' % (self.offsets.keys())
+        return "<PDFXRefFallback: offsets=%r>" % (self.offsets.keys())
-    PDFOBJ_CUE = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
+    PDFOBJ_CUE = re.compile(r"^(\d+)\s+(\d+)\s+obj\b")
    def load(self, parser: PDFParser) -> None:
        parser.seek(0)
@ -196,12 +223,12 @@ class PDFXRefFallback(PDFXRef):
                (pos, line_bytes) = parser.nextline()
            except PSEOF:
                break
-            if line_bytes.startswith(b'trailer'):
+            if line_bytes.startswith(b"trailer"):
                parser.seek(pos)
                self.load_trailer(parser)
-                log.debug('trailer: %r', self.trailer)
+                log.debug("trailer: %r", self.trailer)
                break
-            line = line_bytes.decode('latin-1')  # default pdf encoding
+            line = line_bytes.decode("latin-1")  # default pdf encoding
            m = self.PDFOBJ_CUE.match(line)
            if not m:
                continue
@ -212,14 +239,13 @@ class PDFXRefFallback(PDFXRef):
            # expand ObjStm.
            parser.seek(pos)
            (_, obj) = parser.nextobject()
-            if isinstance(obj, PDFStream) \
+            if isinstance(obj, PDFStream) and obj.get("Type") is LITERAL_OBJSTM:
                    and obj.get('Type') is LITERAL_OBJSTM:
                stream = stream_value(obj)
                try:
-                    n = stream['N']
+                    n = stream["N"]
                except KeyError:
                    if settings.STRICT:
-                        raise PDFSyntaxError('N is not defined: %r' % stream)
+                        raise PDFSyntaxError("N is not defined: %r" % stream)
                    n = 0
                parser1 = PDFStreamParser(stream.get_data())
                objs: List[int] = []
@ -229,14 +255,13 @@ class PDFXRefFallback(PDFXRef):
                        objs.append(cast(int, obj))
                except PSEOF:
                    pass
-                n = min(n, len(objs)//2)
+                n = min(n, len(objs) // 2)
                for index in range(n):
-                    objid1 = objs[index*2]
+                    objid1 = objs[index * 2]
                    self.offsets[objid1] = (objid, index, 0)
 class PDFXRefStream(PDFBaseXRef):
    def __init__(self) -> None:
        self.data: Optional[bytes] = None
        self.entlen: Optional[int] = None
@ -246,31 +271,32 @@ class PDFXRefStream(PDFBaseXRef):
        self.ranges: List[Tuple[int, int]] = []
    def __repr__(self) -> str:
-        return '<PDFXRefStream: ranges=%r>' % (self.ranges)
+        return "<PDFXRefStream: ranges=%r>" % (self.ranges)
    def load(self, parser: PDFParser) -> None:
        (_, objid) = parser.nexttoken()  # ignored
        (_, genno) = parser.nexttoken()  # ignored
        (_, kwd) = parser.nexttoken()
        (_, stream) = parser.nextobject()
-        if not isinstance(stream, PDFStream) \
+        if not isinstance(stream, PDFStream) or stream.get("Type") is not LITERAL_XREF:
-                or stream.get('Type') is not LITERAL_XREF:
+            raise PDFNoValidXRef("Invalid PDF stream spec.")
-            raise PDFNoValidXRef('Invalid PDF stream spec.')
+        size = stream["Size"]
-        size = stream['Size']
+        index_array = stream.get("Index", (0, size))
        index_array = stream.get('Index', (0, size))
        if len(index_array) % 2 != 0:
-            raise PDFSyntaxError('Invalid index number')
+            raise PDFSyntaxError("Invalid index number")
-        self.ranges.extend(cast(Iterator[Tuple[int, int]],
+        self.ranges.extend(cast(Iterator[Tuple[int, int]], choplist(2, index_array)))
-                                choplist(2, index_array)))
+        (self.fl1, self.fl2, self.fl3) = stream["W"]
-        (self.fl1, self.fl2, self.fl3) = stream['W']
+        assert self.fl1 is not None and self.fl2 is not None and self.fl3 is not None
        assert (self.fl1 is not None and self.fl2 is not None
                and self.fl3 is not None)
        self.data = stream.get_data()
-        self.entlen = self.fl1+self.fl2+self.fl3
+        self.entlen = self.fl1 + self.fl2 + self.fl3
        self.trailer = stream.attrs
-        log.debug('xref stream: objid=%s, fields=%d,%d,%d',
+        log.debug(
-                  ', '.join(map(repr, self.ranges)),
+            "xref stream: objid=%s, fields=%d,%d,%d",
-                  self.fl1, self.fl2, self.fl3)
+            ", ".join(map(repr, self.ranges)),
            self.fl1,
            self.fl2,
            self.fl3,
        )
        return
    def get_trailer(self) -> Dict[str, Any]:
@ -282,16 +308,16 @@ class PDFXRefStream(PDFBaseXRef):
                assert self.entlen is not None
                assert self.data is not None
                offset = self.entlen * i
-                ent = self.data[offset:offset+self.entlen]
+                ent = self.data[offset : offset + self.entlen]
-                f1 = nunpack(ent[:self.fl1], 1)
+                f1 = nunpack(ent[: self.fl1], 1)
                if f1 == 1 or f1 == 2:
-                    yield start+i
+                    yield start + i
        return
    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
        index = 0
        for (start, nobjs) in self.ranges:
-            if start <= objid and objid < start+nobjs:
+            if start <= objid and objid < start + nobjs:
                index += objid - start
                break
            else:
@ -300,13 +326,12 @@ class PDFXRefStream(PDFBaseXRef):
            raise KeyError(objid)
        assert self.entlen is not None
        assert self.data is not None
-        assert (self.fl1 is not None and self.fl2 is not None
+        assert self.fl1 is not None and self.fl2 is not None and self.fl3 is not None
                and self.fl3 is not None)
        offset = self.entlen * index
-        ent = self.data[offset:offset+self.entlen]
+        ent = self.data[offset : offset + self.entlen]
-        f1 = nunpack(ent[:self.fl1], 1)
+        f1 = nunpack(ent[: self.fl1], 1)
-        f2 = nunpack(ent[self.fl1:self.fl1+self.fl2])
+        f2 = nunpack(ent[self.fl1 : self.fl1 + self.fl2])
-        f3 = nunpack(ent[self.fl1+self.fl2:])
+        f3 = nunpack(ent[self.fl1 + self.fl2 :])
        if f1 == 1:
            return (None, f2, f3)
        elif f1 == 2:
@ -318,15 +343,14 @@ class PDFXRefStream(PDFBaseXRef):
 class PDFStandardSecurityHandler:
-    PASSWORD_PADDING = (b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08'
+    PASSWORD_PADDING = (
-                        b'..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz')
+        b"(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08"
        b"..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz"
    )
    supported_revisions: Tuple[int, ...] = (2, 3)
    def __init__(
-        self,
+        self, docid: Sequence[bytes], param: Dict[str, Any], password: str = ""
        docid: Sequence[bytes],
        param: Dict[str, Any],
        password: str = ''
    ) -> None:
        self.docid = docid
        self.param = param
@ -337,18 +361,18 @@ class PDFStandardSecurityHandler:
    def init(self) -> None:
        self.init_params()
        if self.r not in self.supported_revisions:
-            error_msg = 'Unsupported revision: param=%r' % self.param
+            error_msg = "Unsupported revision: param=%r" % self.param
            raise PDFEncryptionError(error_msg)
        self.init_key()
        return
    def init_params(self) -> None:
-        self.v = int_value(self.param.get('V', 0))
+        self.v = int_value(self.param.get("V", 0))
-        self.r = int_value(self.param['R'])
+        self.r = int_value(self.param["R"])
-        self.p = uint_value(self.param['P'], 32)
+        self.p = uint_value(self.param["P"], 32)
-        self.o = str_value(self.param['O'])
+        self.o = str_value(self.param["O"])
-        self.u = str_value(self.param['U'])
+        self.u = str_value(self.param["U"])
-        self.length = int_value(self.param.get('Length', 40))
+        self.length = int_value(self.param.get("Length", 40))
        return
    def init_key(self) -> None:
@ -376,7 +400,7 @@ class PDFStandardSecurityHandler:
            hash.update(self.docid[0])  # 3
            result = Arcfour(key).encrypt(hash.digest())  # 4
            for i in range(1, 20):  # 5
-                k = b''.join(bytes((c ^ i,)) for c in iter(key))
+                k = b"".join(bytes((c ^ i,)) for c in iter(key))
                result = Arcfour(k).encrypt(result)
            result += result  # 6
            return result
@ -387,11 +411,11 @@ class PDFStandardSecurityHandler:
        hash = md5(password)  # 2
        hash.update(self.o)  # 3
        # See https://github.com/pdfminer/pdfminer.six/issues/186
-        hash.update(struct.pack('<L', self.p))  # 4
+        hash.update(struct.pack("<L", self.p))  # 4
        hash.update(self.docid[0])  # 5
        if self.r >= 4:
            if not cast(PDFStandardSecurityHandlerV4, self).encrypt_metadata:
-                hash.update(b'\xff\xff\xff\xff')
+                hash.update(b"\xff\xff\xff\xff")
        result = hash.digest()
        n = 5
        if self.r >= 3:
@ -437,7 +461,7 @@ class PDFStandardSecurityHandler:
        else:
            user_password = self.o
            for i in range(19, -1, -1):
-                k = b''.join(bytes((c ^ i,)) for c in iter(key))
+                k = b"".join(bytes((c ^ i,)) for c in iter(key))
                user_password = Arcfour(k).decrypt(user_password)
        return self.authenticate_user_password(user_password)
@ -446,16 +470,15 @@ class PDFStandardSecurityHandler:
        objid: int,
        genno: int,
        data: bytes,
-        attrs: Optional[Dict[str, Any]] = None
+        attrs: Optional[Dict[str, Any]] = None,
    ) -> bytes:
        return self.decrypt_rc4(objid, genno, data)
    def decrypt_rc4(self, objid: int, genno: int, data: bytes) -> bytes:
        assert self.key is not None
-        key = self.key + struct.pack('<L', objid)[:3] \
+        key = self.key + struct.pack("<L", objid)[:3] + struct.pack("<L", genno)[:2]
            + struct.pack('<L', genno)[:2]
        hash = md5(key)
-        key = hash.digest()[:min(len(key), 16)]
+        key = hash.digest()[: min(len(key), 16)]
        return Arcfour(key).decrypt(data)
@ -466,34 +489,30 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
    def init_params(self) -> None:
        super().init_params()
        self.length = 128
-        self.cf = dict_value(self.param.get('CF'))
+        self.cf = dict_value(self.param.get("CF"))
-        self.stmf = literal_name(self.param['StmF'])
+        self.stmf = literal_name(self.param["StmF"])
-        self.strf = literal_name(self.param['StrF'])
+        self.strf = literal_name(self.param["StrF"])
-        self.encrypt_metadata = bool(self.param.get('EncryptMetadata', True))
+        self.encrypt_metadata = bool(self.param.get("EncryptMetadata", True))
        if self.stmf != self.strf:
-            error_msg = 'Unsupported crypt filter: param=%r' % self.param
+            error_msg = "Unsupported crypt filter: param=%r" % self.param
            raise PDFEncryptionError(error_msg)
        self.cfm = {}
        for k, v in self.cf.items():
-            f = self.get_cfm(literal_name(v['CFM']))
+            f = self.get_cfm(literal_name(v["CFM"]))
            if f is None:
-                error_msg = 'Unknown crypt filter method: param=%r' \
+                error_msg = "Unknown crypt filter method: param=%r" % self.param
                            % self.param
                raise PDFEncryptionError(error_msg)
            self.cfm[k] = f
-        self.cfm['Identity'] = self.decrypt_identity
+        self.cfm["Identity"] = self.decrypt_identity
        if self.strf not in self.cfm:
-            error_msg = 'Undefined crypt filter: param=%r' % self.param
+            error_msg = "Undefined crypt filter: param=%r" % self.param
            raise PDFEncryptionError(error_msg)
        return
-    def get_cfm(
+    def get_cfm(self, name: str) -> Optional[Callable[[int, int, bytes], bytes]]:
-        self,
+        if name == "V2":
        name: str
    ) -> Optional[Callable[[int, int, bytes], bytes]]:
        if name == 'V2':
            return self.decrypt_rc4
-        elif name == 'AESV2':
+        elif name == "AESV2":
            return self.decrypt_aes128
        else:
            return None
@ -504,11 +523,11 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
        genno: int,
        data: bytes,
        attrs: Optional[Dict[str, Any]] = None,
-        name: Optional[str] = None
+        name: Optional[str] = None,
    ) -> bytes:
        if not self.encrypt_metadata and attrs is not None:
-            t = attrs.get('Type')
+            t = attrs.get("Type")
-            if t is not None and literal_name(t) == 'Metadata':
+            if t is not None and literal_name(t) == "Metadata":
                return data
        if name is None:
            name = self.strf
@ -519,15 +538,21 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
    def decrypt_aes128(self, objid: int, genno: int, data: bytes) -> bytes:
        assert self.key is not None
-        key = self.key + struct.pack('<L', objid)[:3] \
+        key = (
-            + struct.pack('<L', genno)[:2] + b'sAlT'
+            self.key
            + struct.pack("<L", objid)[:3]
            + struct.pack("<L", genno)[:2]
            + b"sAlT"
        )
        hash = md5(key)
-        key = hash.digest()[:min(len(key), 16)]
+        key = hash.digest()[: min(len(key), 16)]
        initialization_vector = data[:16]
        ciphertext = data[16:]
-        cipher = Cipher(algorithms.AES(key),
+        cipher = Cipher(
-                        modes.CBC(initialization_vector),
+            algorithms.AES(key),
-                        backend=default_backend())  # type: ignore
+            modes.CBC(initialization_vector),
            backend=default_backend(),
        )  # type: ignore
        return cipher.decryptor().update(ciphertext)  # type: ignore
@ -538,8 +563,8 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
    def init_params(self) -> None:
        super().init_params()
        self.length = 256
-        self.oe = str_value(self.param['OE'])
+        self.oe = str_value(self.param["OE"])
-        self.ue = str_value(self.param['UE'])
+        self.ue = str_value(self.param["UE"])
        self.o_hash = self.o[:32]
        self.o_validation_salt = self.o[32:40]
        self.o_key_salt = self.o[40:]
@ -548,11 +573,8 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        self.u_key_salt = self.u[40:]
        return
-    def get_cfm(
+    def get_cfm(self, name: str) -> Optional[Callable[[int, int, bytes], bytes]]:
-        self,
+        if name == "AESV3":
        name: str
    ) -> Optional[Callable[[int, int, bytes], bytes]]:
        if name == 'AESV3':
            return self.decrypt_aes256
        else:
            return None
@ -562,16 +584,16 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        hash = self._password_hash(password_b, self.o_validation_salt, self.u)
        if hash == self.o_hash:
            hash = self._password_hash(password_b, self.o_key_salt, self.u)
-            cipher = Cipher(algorithms.AES(hash),
+            cipher = Cipher(
-                            modes.CBC(b'\0' * 16),
+                algorithms.AES(hash), modes.CBC(b"\0" * 16), backend=default_backend()
-                            backend=default_backend())  # type: ignore
+            )  # type: ignore
            return cipher.decryptor().update(self.oe)  # type: ignore
        hash = self._password_hash(password_b, self.u_validation_salt)
        if hash == self.u_hash:
            hash = self._password_hash(password_b, self.u_key_salt)
-            cipher = Cipher(algorithms.AES(hash),
+            cipher = Cipher(
-                            modes.CBC(b'\0' * 16),
+                algorithms.AES(hash), modes.CBC(b"\0" * 16), backend=default_backend()
-                            backend=default_backend())  # type: ignore
+            )  # type: ignore
            return cipher.decryptor().update(self.ue)  # type: ignore
        return None
@ -579,16 +601,14 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        if self.r == 6:
            # saslprep expects non-empty strings, apparently
            if not password:
-                return b''
+                return b""
            from ._saslprep import saslprep
            password = saslprep(password)
-        return password.encode('utf-8')[:127]
+        return password.encode("utf-8")[:127]
    def _password_hash(
-        self,
+        self, password: bytes, salt: bytes, vector: Optional[bytes] = None
        password: bytes,
        salt: bytes,
        vector: Optional[bytes] = None
    ) -> bytes:
        """
        Compute password hash depending on revision number
@ -598,10 +618,7 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        return self._r6_password(password, salt[0:8], vector)
    def _r5_password(
-        self,
+        self, password: bytes, salt: bytes, vector: Optional[bytes] = None
        password: bytes,
        salt: bytes,
        vector: Optional[bytes] = None
    ) -> bytes:
        """
        Compute the password for revision 5
@ -613,10 +630,7 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        return hash.digest()
    def _r6_password(
-        self,
+        self, password: bytes, salt: bytes, vector: Optional[bytes] = None
        password: bytes,
        salt: bytes,
        vector: Optional[bytes] = None
    ) -> bytes:
        """
        Compute the password for revision 6
@ -629,10 +643,8 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        hashes = (sha256, sha384, sha512)
        round_no = last_byte_val = 0
        while round_no < 64 or last_byte_val > round_no - 32:
-            k1 = (password + k + (vector or b'')) * 64
+            k1 = (password + k + (vector or b"")) * 64
-            e = self._aes_cbc_encrypt(
+            e = self._aes_cbc_encrypt(key=k[:16], iv=k[16:32], data=k1)
                key=k[:16], iv=k[16:32], data=k1
            )
            # compute the first 16 bytes of e,
            # interpreted as an unsigned integer mod 3
            next_hash = hashes[self._bytes_mod_3(e[:16])]
@ -646,12 +658,7 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        # 256 is 1 mod 3, so we can just sum 'em
        return sum(b % 3 for b in input_bytes) % 3
-    def _aes_cbc_encrypt(
+    def _aes_cbc_encrypt(self, key: bytes, iv: bytes, data: bytes) -> bytes:
        self,
        key: bytes,
        iv: bytes,
        data: bytes
    ) -> bytes:
        cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
        encryptor = cipher.encryptor()  # type: ignore
        return encryptor.update(data) + encryptor.finalize()  # type: ignore
@ -660,9 +667,11 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        initialization_vector = data[:16]
        ciphertext = data[16:]
        assert self.key is not None
-        cipher = Cipher(algorithms.AES(self.key),
+        cipher = Cipher(
-                        modes.CBC(initialization_vector),
+            algorithms.AES(self.key),
-                        backend=default_backend())  # type: ignore
+            modes.CBC(initialization_vector),
            backend=default_backend(),
        )  # type: ignore
        return cipher.decryptor().update(ciphertext)  # type: ignore
@ -689,9 +698,9 @@ class PDFDocument:
    def __init__(
        self,
        parser: PDFParser,
-        password: str = '',
+        password: str = "",
        caching: bool = True,
-        fallback: bool = True
+        fallback: bool = True,
    ) -> None:
        "Set the document to use a given PDFParser object."
        self.caching = caching
@ -723,43 +732,42 @@ class PDFDocument:
            if not trailer:
                continue
            # If there's an encryption info, remember it.
-            if 'Encrypt' in trailer:
+            if "Encrypt" in trailer:
-                if 'ID' in trailer:
+                if "ID" in trailer:
-                    id_value = list_value(trailer['ID'])
+                    id_value = list_value(trailer["ID"])
                else:
                    # Some documents may not have a /ID, use two empty
                    # byte strings instead. Solves
                    # https://github.com/pdfminer/pdfminer.six/issues/594
-                    id_value = (b'', b'')
+                    id_value = (b"", b"")
-                self.encryption = (id_value,
+                self.encryption = (id_value, dict_value(trailer["Encrypt"]))
                                   dict_value(trailer['Encrypt']))
                self._initialize_password(password)
-            if 'Info' in trailer:
+            if "Info" in trailer:
-                self.info.append(dict_value(trailer['Info']))
+                self.info.append(dict_value(trailer["Info"]))
-            if 'Root' in trailer:
+            if "Root" in trailer:
                # Every PDF file must have exactly one /Root dictionary.
-                self.catalog = dict_value(trailer['Root'])
+                self.catalog = dict_value(trailer["Root"])
                break
        else:
-            raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
+            raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
-        if self.catalog.get('Type') is not LITERAL_CATALOG:
+        if self.catalog.get("Type") is not LITERAL_CATALOG:
            if settings.STRICT:
-                raise PDFSyntaxError('Catalog not found!')
+                raise PDFSyntaxError("Catalog not found!")
        return
-    KEYWORD_OBJ = KWD(b'obj')
+    KEYWORD_OBJ = KWD(b"obj")
    # _initialize_password(password=b'')
    #   Perform the initialization with a given password.
-    def _initialize_password(self, password: str = '') -> None:
+    def _initialize_password(self, password: str = "") -> None:
        assert self.encryption is not None
        (docid, param) = self.encryption
-        if literal_name(param.get('Filter')) != 'Standard':
+        if literal_name(param.get("Filter")) != "Standard":
-            raise PDFEncryptionError('Unknown filter: param=%r' % param)
+            raise PDFEncryptionError("Unknown filter: param=%r" % param)
-        v = int_value(param.get('V', 0))
+        v = int_value(param.get("V", 0))
        factory = self.security_handler_registry.get(v)
        if factory is None:
-            raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
+            raise PDFEncryptionError("Unknown algorithm: param=%r" % param)
        handler = factory(docid, param, password)
        self.decipher = handler.decrypt
        self.is_printable = handler.is_printable()
@ -769,12 +777,7 @@ class PDFDocument:
        self._parser.fallback = False  # need to read streams with exact length
        return
-    def _getobj_objstm(
+    def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> object:
        self,
        stream: PDFStream,
        index: int,
        objid: int
    ) -> object:
        if stream.objid in self._parsed_objs:
            (objs, n) = self._parsed_objs[stream.objid]
        else:
@ -782,22 +785,22 @@ class PDFDocument:
            if self.caching:
                assert stream.objid is not None
                self._parsed_objs[stream.objid] = (objs, n)
-        i = n*2+index
+        i = n * 2 + index
        try:
            obj = objs[i]
        except IndexError:
-            raise PDFSyntaxError('index too big: %r' % index)
+            raise PDFSyntaxError("index too big: %r" % index)
        return obj
    def _get_objects(self, stream: PDFStream) -> Tuple[List[object], int]:
-        if stream.get('Type') is not LITERAL_OBJSTM:
+        if stream.get("Type") is not LITERAL_OBJSTM:
            if settings.STRICT:
-                raise PDFSyntaxError('Not a stream object: %r' % stream)
+                raise PDFSyntaxError("Not a stream object: %r" % stream)
        try:
-            n = cast(int, stream['N'])
+            n = cast(int, stream["N"])
        except KeyError:
            if settings.STRICT:
-                raise PDFSyntaxError('N is not defined: %r' % stream)
+                raise PDFSyntaxError("N is not defined: %r" % stream)
            n = 0
        parser = PDFStreamParser(stream.get_data())
        parser.set_document(self)
@ -830,11 +833,10 @@ class PDFDocument:
                objid1 = x[-2]
        # #### end hack around malformed pdf files
        if objid1 != objid:
-            raise PDFSyntaxError('objid mismatch: {!r}={!r}'
+            raise PDFSyntaxError("objid mismatch: {!r}={!r}".format(objid1, objid))
                                 .format(objid1, objid))
-        if kwd != KWD(b'obj'):
+        if kwd != KWD(b"obj"):
-            raise PDFSyntaxError('Invalid object spec: offset=%r' % pos)
+            raise PDFSyntaxError("Invalid object spec: offset=%r" % pos)
        (_, obj) = self._parser.nextobject()
        return obj
@ -846,8 +848,8 @@ class PDFDocument:
        :raises PDFObjectNotFound if objid does not exist in PDF
        """
        if not self.xrefs:
-            raise PDFException('PDFDocument is not initialized')
+            raise PDFException("PDFDocument is not initialized")
-        log.debug('getobj: objid=%r', objid)
+        log.debug("getobj: objid=%r", objid)
        if objid in self._cached_objs:
            (obj, genno) = self._cached_objs[objid]
        else:
@ -863,8 +865,7 @@ class PDFDocument:
                    else:
                        obj = self._getobj_parse(index, objid)
                        if self.decipher:
-                            obj = decipher_all(self.decipher, objid, genno,
+                            obj = decipher_all(self.decipher, objid, genno, obj)
                                               obj)
                    if isinstance(obj, PDFStream):
                        obj.set_objid(objid, genno)
@ -873,7 +874,7 @@ class PDFDocument:
                    continue
            else:
                raise PDFObjectNotFound(objid)
-            log.debug('register: objid=%r: %r', objid, obj)
+            log.debug("register: objid=%r: %r", objid, obj)
            if self.caching:
                self._cached_objs[objid] = (obj, genno)
        return obj
@ -881,25 +882,25 @@ class PDFDocument:
    OutlineType = Tuple[Any, Any, Any, Any, Any]
    def get_outlines(self) -> Iterator[OutlineType]:
-        if 'Outlines' not in self.catalog:
+        if "Outlines" not in self.catalog:
            raise PDFNoOutlines
-        def search(entry: object, level: int
+        def search(entry: object, level: int) -> Iterator[PDFDocument.OutlineType]:
                   ) -> Iterator[PDFDocument.OutlineType]:
            entry = dict_value(entry)
-            if 'Title' in entry:
+            if "Title" in entry:
-                if 'A' in entry or 'Dest' in entry:
+                if "A" in entry or "Dest" in entry:
-                    title = decode_text(str_value(entry['Title']))
+                    title = decode_text(str_value(entry["Title"]))
-                    dest = entry.get('Dest')
+                    dest = entry.get("Dest")
-                    action = entry.get('A')
+                    action = entry.get("A")
-                    se = entry.get('SE')
+                    se = entry.get("SE")
                    yield (level, title, dest, action, se)
-            if 'First' in entry and 'Last' in entry:
+            if "First" in entry and "Last" in entry:
-                yield from search(entry['First'], level+1)
+                yield from search(entry["First"], level + 1)
-            if 'Next' in entry:
+            if "Next" in entry:
-                yield from search(entry['Next'], level)
+                yield from search(entry["Next"], level)
            return
-        return search(self.catalog['Outlines'], 0)
+
        return search(self.catalog["Outlines"], 0)
    def get_page_labels(self) -> Iterator[str]:
        """
@ -913,51 +914,49 @@ class PDFDocument:
        assert self.catalog is not None
        try:
-            page_labels = PageLabels(self.catalog['PageLabels'])
+            page_labels = PageLabels(self.catalog["PageLabels"])
        except (PDFTypeError, KeyError):
            raise PDFNoPageLabels
        return page_labels.labels
-    def lookup_name(
+    def lookup_name(self, cat: str, key: Union[str, bytes]) -> Any:
        self,
        cat: str,
        key: Union[str, bytes]
    ) -> Any:
        try:
-            names = dict_value(self.catalog['Names'])
+            names = dict_value(self.catalog["Names"])
        except (PDFTypeError, KeyError):
            raise KeyError((cat, key))
        # may raise KeyError
        d0 = dict_value(names[cat])
        def lookup(d: Dict[str, Any]) -> Any:
-            if 'Limits' in d:
+            if "Limits" in d:
-                (k1, k2) = list_value(d['Limits'])
+                (k1, k2) = list_value(d["Limits"])
                if key < k1 or k2 < key:
                    return None
-            if 'Names' in d:
+            if "Names" in d:
-                objs = list_value(d['Names'])
+                objs = list_value(d["Names"])
-                names = dict(cast(Iterator[Tuple[Union[str, bytes], Any]],
+                names = dict(
-                                  choplist(2, objs)))
+                    cast(Iterator[Tuple[Union[str, bytes], Any]], choplist(2, objs))
                )
                return names[key]
-            if 'Kids' in d:
+            if "Kids" in d:
-                for c in list_value(d['Kids']):
+                for c in list_value(d["Kids"]):
                    v = lookup(dict_value(c))
                    if v:
                        return v
            raise KeyError((cat, key))
        return lookup(d0)
    def get_dest(self, name: Union[str, bytes]) -> Any:
        try:
            # PDF-1.2 or later
-            obj = self.lookup_name('Dests', name)
+            obj = self.lookup_name("Dests", name)
        except KeyError:
            # PDF-1.1 or prior
-            if 'Dests' not in self.catalog:
+            if "Dests" not in self.catalog:
                raise PDFDestinationNotFound(name)
-            d0 = dict_value(self.catalog['Dests'])
+            d0 = dict_value(self.catalog["Dests"])
            if name not in d0:
                raise PDFDestinationNotFound(name)
            obj = d0[name]
@ -970,23 +969,20 @@ class PDFDocument:
        prev = None
        for line in parser.revreadlines():
            line = line.strip()
-            log.debug('find_xref: %r', line)
+            log.debug("find_xref: %r", line)
-            if line == b'startxref':
+            if line == b"startxref":
                break
            if line:
                prev = line
        else:
-            raise PDFNoValidXRef('Unexpected EOF')
+            raise PDFNoValidXRef("Unexpected EOF")
-        log.debug('xref found: pos=%r', prev)
+        log.debug("xref found: pos=%r", prev)
        assert prev is not None
        return int(prev)
    # read xref table
    def read_xref_from(
-        self,
+        self, parser: PDFParser, start: int, xrefs: List[PDFBaseXRef]
        parser: PDFParser,
        start: int,
        xrefs: List[PDFBaseXRef]
    ) -> None:
        """Reads XRefs from the given location."""
        parser.seek(start)
@ -994,8 +990,8 @@ class PDFDocument:
        try:
            (pos, token) = parser.nexttoken()
        except PSEOF:
-            raise PDFNoValidXRef('Unexpected EOF')
+            raise PDFNoValidXRef("Unexpected EOF")
-        log.debug('read_xref_from: start=%d, token=%r', start, token)
+        log.debug("read_xref_from: start=%d, token=%r", start, token)
        if isinstance(token, int):
            # XRefStream: PDF-1.5
            parser.seek(pos)
@ -1009,13 +1005,13 @@ class PDFDocument:
            xref.load(parser)
        xrefs.append(xref)
        trailer = xref.get_trailer()
-        log.debug('trailer: %r', trailer)
+        log.debug("trailer: %r", trailer)
-        if 'XRefStm' in trailer:
+        if "XRefStm" in trailer:
-            pos = int_value(trailer['XRefStm'])
+            pos = int_value(trailer["XRefStm"])
            self.read_xref_from(parser, pos, xrefs)
-        if 'Prev' in trailer:
+        if "Prev" in trailer:
            # find previous xref
-            pos = int_value(trailer['Prev'])
+            pos = int_value(trailer["Prev"])
            self.read_xref_from(parser, pos, xrefs)
        return
@ -1033,16 +1029,16 @@ class PageLabels(NumberTree):
        # The tree must begin with page index 0
        if len(ranges) == 0 or ranges[0][0] != 0:
            if settings.STRICT:
-                raise PDFSyntaxError('PageLabels is missing page index 0')
+                raise PDFSyntaxError("PageLabels is missing page index 0")
            else:
                # Try to cope, by assuming empty labels for the initial pages
                ranges.insert(0, (0, {}))
        for (next, (start, label_dict_unchecked)) in enumerate(ranges, 1):
            label_dict = dict_value(label_dict_unchecked)
-            style = label_dict.get('S')
+            style = label_dict.get("S")
-            prefix = decode_text(str_value(label_dict.get('P', b'')))
+            prefix = decode_text(str_value(label_dict.get("P", b"")))
-            first_value = int_value(label_dict.get('St', 1))
+            first_value = int_value(label_dict.get("St", 1))
            if next == len(ranges):
                # This is the last specified range. It continues until the end
@ -1061,18 +1057,18 @@ class PageLabels(NumberTree):
    def _format_page_label(value: int, style: Any) -> str:
        """Format page label value in a specific style"""
        if style is None:
-            label = ''
+            label = ""
-        elif style is LIT('D'):  # Decimal arabic numerals
+        elif style is LIT("D"):  # Decimal arabic numerals
            label = str(value)
-        elif style is LIT('R'):  # Uppercase roman numerals
+        elif style is LIT("R"):  # Uppercase roman numerals
            label = format_int_roman(value).upper()
-        elif style is LIT('r'):  # Lowercase roman numerals
+        elif style is LIT("r"):  # Lowercase roman numerals
            label = format_int_roman(value)
-        elif style is LIT('A'):  # Uppercase letters A-Z, AA-ZZ...
+        elif style is LIT("A"):  # Uppercase letters A-Z, AA-ZZ...
            label = format_int_alpha(value).upper()
-        elif style is LIT('a'):  # Lowercase letters a-z, aa-zz...
+        elif style is LIT("a"):  # Lowercase letters a-z, aa-zz...
            label = format_int_alpha(value)
        else:
-            log.warning('Unknown page label style: %r', style)
+            log.warning("Unknown page label style: %r", style)
-            label = ''
+            label = ""
        return label
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -50,11 +50,11 @@ class PDFInterpreterError(PDFException):
    pass
-LITERAL_PDF = LIT('PDF')
+LITERAL_PDF = LIT("PDF")
-LITERAL_TEXT = LIT('Text')
+LITERAL_TEXT = LIT("Text")
-LITERAL_FONT = LIT('Font')
+LITERAL_FONT = LIT("Font")
-LITERAL_FORM = LIT('Form')
+LITERAL_FORM = LIT("Form")
-LITERAL_IMAGE = LIT('Image')
+LITERAL_IMAGE = LIT("Image")
 class PDFTextState:
@ -75,12 +75,23 @@ class PDFTextState:
        # self.linematrix is set
    def __repr__(self) -> str:
-        return '<PDFTextState: font=%r, fontsize=%r, charspace=%r, ' \
+        return (
-               'wordspace=%r, scaling=%r, leading=%r, render=%r, rise=%r, ' \
+            "<PDFTextState: font=%r, fontsize=%r, charspace=%r, "
-               'matrix=%r, linematrix=%r>' \
+            "wordspace=%r, scaling=%r, leading=%r, render=%r, rise=%r, "
-               % (self.font, self.fontsize, self.charspace, self.wordspace,
+            "matrix=%r, linematrix=%r>"
-                  self.scaling, self.leading, self.render, self.rise,
+            % (
-                  self.matrix, self.linematrix)
+                self.font,
                self.fontsize,
                self.charspace,
                self.wordspace,
                self.scaling,
                self.leading,
                self.render,
                self.rise,
                self.matrix,
                self.linematrix,
            )
        )
    def copy(self) -> "PDFTextState":
        obj = PDFTextState()
@ -102,13 +113,13 @@ class PDFTextState:
 Color = Union[
-    float,                              # Greyscale
+    float,  # Greyscale
-    Tuple[float, float, float],         # R, G, B
+    Tuple[float, float, float],  # R, G, B
-    Tuple[float, float, float, float]]  # C, M, Y, K
+    Tuple[float, float, float, float],
 ]  # C, M, Y, K
 class PDFGraphicState:
    def __init__(self) -> None:
        self.linewidth: float = 0
        self.linecap: Optional[object] = None
@ -138,12 +149,22 @@ class PDFGraphicState:
        return obj
    def __repr__(self) -> str:
-        return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
+        return (
-                ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
+            "<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, "
-                ' stroking color=%r, non stroking color=%r>' %
+            " miterlimit=%r, dash=%r, intent=%r, flatness=%r, "
-                (self.linewidth, self.linecap, self.linejoin,
+            " stroking color=%r, non stroking color=%r>"
-                 self.miterlimit, self.dash, self.intent, self.flatness,
+            % (
-                 self.scolor, self.ncolor))
+                self.linewidth,
                self.linecap,
                self.linejoin,
                self.miterlimit,
                self.dash,
                self.intent,
                self.flatness,
                self.scolor,
                self.ncolor,
            )
        )
 class PDFResourceManager:
@ -179,41 +200,41 @@ class PDFResourceManager:
        if objid and objid in self._cached_fonts:
            font = self._cached_fonts[objid]
        else:
-            log.debug('get_font: create: objid=%r, spec=%r', objid, spec)
+            log.debug("get_font: create: objid=%r, spec=%r", objid, spec)
            if settings.STRICT:
-                if spec['Type'] is not LITERAL_FONT:
+                if spec["Type"] is not LITERAL_FONT:
-                    raise PDFFontError('Type is not /Font')
+                    raise PDFFontError("Type is not /Font")
            # Create a Font object.
-            if 'Subtype' in spec:
+            if "Subtype" in spec:
-                subtype = literal_name(spec['Subtype'])
+                subtype = literal_name(spec["Subtype"])
            else:
                if settings.STRICT:
-                    raise PDFFontError('Font Subtype is not specified.')
+                    raise PDFFontError("Font Subtype is not specified.")
-                subtype = 'Type1'
+                subtype = "Type1"
-            if subtype in ('Type1', 'MMType1'):
+            if subtype in ("Type1", "MMType1"):
                # Type1 Font
                font = PDFType1Font(self, spec)
-            elif subtype == 'TrueType':
+            elif subtype == "TrueType":
                # TrueType Font
                font = PDFTrueTypeFont(self, spec)
-            elif subtype == 'Type3':
+            elif subtype == "Type3":
                # Type3 Font
                font = PDFType3Font(self, spec)
-            elif subtype in ('CIDFontType0', 'CIDFontType2'):
+            elif subtype in ("CIDFontType0", "CIDFontType2"):
                # CID Font
                font = PDFCIDFont(self, spec)
-            elif subtype == 'Type0':
+            elif subtype == "Type0":
                # Type0 Font
-                dfonts = list_value(spec['DescendantFonts'])
+                dfonts = list_value(spec["DescendantFonts"])
                assert dfonts
                subspec = dict_value(dfonts[0]).copy()
-                for k in ('Encoding', 'ToUnicode'):
+                for k in ("Encoding", "ToUnicode"):
                    if k in spec:
                        subspec[k] = resolve1(spec[k])
                font = self.get_font(None, subspec)
            else:
                if settings.STRICT:
-                    raise PDFFontError('Invalid Font spec: %r' % spec)
+                    raise PDFFontError("Invalid Font spec: %r" % spec)
                font = PDFType1Font(self, spec)  # this is so wrong!
            if objid and self.caching:
                self._cached_fonts[objid] = font
@ -221,7 +242,6 @@ class PDFResourceManager:
 class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
    def __init__(self, streams: Sequence[object]) -> None:
        self.streams = streams
        self.istream = 0
@ -236,7 +256,7 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
                strm = stream_value(self.streams[self.istream])
                self.istream += 1
            else:
-                raise PSEOF('Unexpected EOF, file truncated?')
+                raise PSEOF("Unexpected EOF, file truncated?")
            self.fp = BytesIO(strm.get_data())
    def seek(self, pos: int) -> None:
@ -255,14 +275,10 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
            self.fp = None  # type: ignore[assignment]
        self.charpos = 0
-    def get_inline_data(
+    def get_inline_data(self, pos: int, target: bytes = b"EI") -> Tuple[int, bytes]:
        self,
        pos: int,
        target: bytes = b'EI'
    ) -> Tuple[int, bytes]:
        self.seek(pos)
        i = 0
-        data = b''
+        data = b""
        while i <= len(target):
            self.fillbuf()
            if i:
@ -279,36 +295,35 @@ class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
            else:
                try:
                    j = self.buf.index(target[0], self.charpos)
-                    data += self.buf[self.charpos:j+1]
+                    data += self.buf[self.charpos : j + 1]
-                    self.charpos = j+1
+                    self.charpos = j + 1
                    i = 1
                except ValueError:
-                    data += self.buf[self.charpos:]
+                    data += self.buf[self.charpos :]
                    self.charpos = len(self.buf)
-        data = data[:-(len(target)+1)]  # strip the last part
+        data = data[: -(len(target) + 1)]  # strip the last part
-        data = re.sub(br'(\x0d\x0a|[\x0d\x0a])$', b'', data)
+        data = re.sub(rb"(\x0d\x0a|[\x0d\x0a])$", b"", data)
        return (pos, data)
    def flush(self) -> None:
        self.add_results(*self.popall())
-    KEYWORD_BI = KWD(b'BI')
+    KEYWORD_BI = KWD(b"BI")
-    KEYWORD_ID = KWD(b'ID')
+    KEYWORD_ID = KWD(b"ID")
-    KEYWORD_EI = KWD(b'EI')
+    KEYWORD_EI = KWD(b"EI")
    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        if token is self.KEYWORD_BI:
            # inline image within a content stream
-            self.start_type(pos, 'inline')
+            self.start_type(pos, "inline")
        elif token is self.KEYWORD_ID:
            try:
-                (_, objs) = self.end_type('inline')
+                (_, objs) = self.end_type("inline")
                if len(objs) % 2 != 0:
-                    error_msg = 'Invalid dictionary construct: {!r}' \
+                    error_msg = "Invalid dictionary construct: {!r}".format(objs)
                        .format(objs)
                    raise PSTypeError(error_msg)
                d = {literal_name(k): v for (k, v) in choplist(2, objs)}
-                (pos, data) = self.get_inline_data(pos+len(b'ID '))
+                (pos, data) = self.get_inline_data(pos + len(b"ID "))
                obj = PDFStream(d, data)
                self.push((pos, obj))
                self.push((pos, self.KEYWORD_EI))
@ -351,32 +366,30 @@ class PDFPageInterpreter:
                name = literal_name(spec[0])
            else:
                name = literal_name(spec)
-            if name == 'ICCBased' and isinstance(spec, list) \
+            if name == "ICCBased" and isinstance(spec, list) and 2 <= len(spec):
-                    and 2 <= len(spec):
+                return PDFColorSpace(name, stream_value(spec[1])["N"])
-                return PDFColorSpace(name, stream_value(spec[1])['N'])
+            elif name == "DeviceN" and isinstance(spec, list) and 2 <= len(spec):
            elif name == 'DeviceN' and isinstance(spec, list) \
                    and 2 <= len(spec):
                return PDFColorSpace(name, len(list_value(spec[1])))
            else:
                return PREDEFINED_COLORSPACE.get(name)
        for (k, v) in dict_value(resources).items():
-            log.debug('Resource: %r: %r', k, v)
+            log.debug("Resource: %r: %r", k, v)
-            if k == 'Font':
+            if k == "Font":
                for (fontid, spec) in dict_value(v).items():
                    objid = None
                    if isinstance(spec, PDFObjRef):
                        objid = spec.objid
                    spec = dict_value(spec)
                    self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
-            elif k == 'ColorSpace':
+            elif k == "ColorSpace":
                for (csid, spec) in dict_value(v).items():
                    colorspace = get_colorspace(resolve1(spec))
                    if colorspace is not None:
                        self.csmap[csid] = colorspace
-            elif k == 'ProcSet':
+            elif k == "ProcSet":
                self.rsrcmgr.get_procset(list_value(v))
-            elif k == 'XObject':
+            elif k == "XObject":
                for (xobjid, xobjstrm) in dict_value(v).items():
                    self.xobjmap[xobjid] = xobjstrm
        return
@ -410,14 +423,11 @@ class PDFPageInterpreter:
        self.argstack = self.argstack[:-n]
        return x
-    def get_current_state(
+    def get_current_state(self) -> Tuple[Matrix, PDFTextState, PDFGraphicState]:
        self
    ) -> Tuple[Matrix, PDFTextState, PDFGraphicState]:
        return (self.ctm, self.textstate.copy(), self.graphicstate.copy())
    def set_current_state(
-        self,
+        self, state: Tuple[Matrix, PDFTextState, PDFGraphicState]
        state: Tuple[Matrix, PDFTextState, PDFGraphicState]
    ) -> None:
        (self.ctm, self.textstate, self.graphicstate) = state
        self.device.set_ctm(self.ctm)
@ -441,11 +451,10 @@ class PDFPageInterpreter:
        c1: PDFStackT,
        d1: PDFStackT,
        e1: PDFStackT,
-        f1: PDFStackT
+        f1: PDFStackT,
    ) -> None:
        """Concatenate matrix to current transformation matrix"""
-        self.ctm = \
+        self.ctm = mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm)
            mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm)
        self.device.set_ctm(self.ctm)
        return
@ -491,12 +500,12 @@ class PDFPageInterpreter:
    def do_m(self, x: PDFStackT, y: PDFStackT) -> None:
        """Begin new subpath"""
-        self.curpath.append(('m', cast(float, x), cast(float, y)))
+        self.curpath.append(("m", cast(float, x), cast(float, y)))
        return
    def do_l(self, x: PDFStackT, y: PDFStackT) -> None:
        """Append straight line segment to path"""
-        self.curpath.append(('l', cast(float, x), cast(float, y)))
+        self.curpath.append(("l", cast(float, x), cast(float, y)))
        return
    def do_c(
@ -506,66 +515,57 @@ class PDFPageInterpreter:
        x2: PDFStackT,
        y2: PDFStackT,
        x3: PDFStackT,
-        y3: PDFStackT
+        y3: PDFStackT,
    ) -> None:
        """Append curved segment to path (three control points)"""
-        self.curpath.append(('c', cast(float, x1), cast(float, y1),
+        self.curpath.append(
-                             cast(float, x2), cast(float, y2),
+            (
-                             cast(float, x3), cast(float, y3)))
+                "c",
                cast(float, x1),
                cast(float, y1),
                cast(float, x2),
                cast(float, y2),
                cast(float, x3),
                cast(float, y3),
            )
        )
        return
-    def do_v(
+    def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None:
        self,
        x2: PDFStackT,
        y2: PDFStackT,
        x3: PDFStackT,
        y3: PDFStackT
    ) -> None:
        """Append curved segment to path (initial point replicated)"""
-        self.curpath.append(('v', cast(float, x2), cast(float, y2),
+        self.curpath.append(
-                             cast(float, x3), cast(float, y3)))
+            ("v", cast(float, x2), cast(float, y2), cast(float, x3), cast(float, y3))
        )
        return
-    def do_y(
+    def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None:
        self,
        x1: PDFStackT,
        y1: PDFStackT,
        x3: PDFStackT,
        y3: PDFStackT
    ) -> None:
        """Append curved segment to path (final point replicated)"""
-        self.curpath.append(('y', cast(float, x1), cast(float, y1),
+        self.curpath.append(
-                             cast(float, x3), cast(float, y3)))
+            ("y", cast(float, x1), cast(float, y1), cast(float, x3), cast(float, y3))
        )
        return
    def do_h(self) -> None:
        """Close subpath"""
-        self.curpath.append(('h',))
+        self.curpath.append(("h",))
        return
-    def do_re(
+    def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None:
        self,
        x: PDFStackT,
        y: PDFStackT,
        w: PDFStackT,
        h: PDFStackT
    ) -> None:
        """Append rectangle to path"""
        x = cast(float, x)
        y = cast(float, y)
        w = cast(float, w)
        h = cast(float, h)
-        self.curpath.append(('m', x, y))
+        self.curpath.append(("m", x, y))
-        self.curpath.append(('l', x+w, y))
+        self.curpath.append(("l", x + w, y))
-        self.curpath.append(('l', x+w, y+h))
+        self.curpath.append(("l", x + w, y + h))
-        self.curpath.append(('l', x, y+h))
+        self.curpath.append(("l", x, y + h))
-        self.curpath.append(('h',))
+        self.curpath.append(("h",))
        return
    def do_S(self) -> None:
        """Stroke path"""
-        self.device.paint_path(self.graphicstate, True, False, False,
+        self.device.paint_path(self.graphicstate, True, False, False, self.curpath)
                               self.curpath)
        self.curpath = []
        return
@ -577,8 +577,7 @@ class PDFPageInterpreter:
    def do_f(self) -> None:
        """Fill path using nonzero winding number rule"""
-        self.device.paint_path(self.graphicstate, False, True, False,
+        self.device.paint_path(self.graphicstate, False, True, False, self.curpath)
                               self.curpath)
        self.curpath = []
        return
@ -588,22 +587,19 @@ class PDFPageInterpreter:
    def do_f_a(self) -> None:
        """Fill path using even-odd rule"""
-        self.device.paint_path(self.graphicstate, False, True, True,
+        self.device.paint_path(self.graphicstate, False, True, True, self.curpath)
                               self.curpath)
        self.curpath = []
        return
    def do_B(self) -> None:
        """Fill and stroke path using nonzero winding number rule"""
-        self.device.paint_path(self.graphicstate, True, True, False,
+        self.device.paint_path(self.graphicstate, True, True, False, self.curpath)
                               self.curpath)
        self.curpath = []
        return
    def do_B_a(self) -> None:
        """Fill and stroke path using even-odd rule"""
-        self.device.paint_path(self.graphicstate, True, True, True,
+        self.device.paint_path(self.graphicstate, True, True, True, self.curpath)
                               self.curpath)
        self.curpath = []
        return
@ -641,7 +637,7 @@ class PDFPageInterpreter:
            self.scs = self.csmap[literal_name(name)]
        except KeyError:
            if settings.STRICT:
-                raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
+                raise PDFInterpreterError("Undefined ColorSpace: %r" % name)
        return
    def do_cs(self, name: PDFStackT) -> None:
@ -650,7 +646,7 @@ class PDFPageInterpreter:
            self.ncs = self.csmap[literal_name(name)]
        except KeyError:
            if settings.STRICT:
-                raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
+                raise PDFInterpreterError("Undefined ColorSpace: %r" % name)
        return
    def do_G(self, gray: PDFStackT) -> None:
@ -665,38 +661,32 @@ class PDFPageInterpreter:
    def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
        """Set RGB color for stroking operations"""
-        self.graphicstate.scolor = \
+        self.graphicstate.scolor = (cast(float, r), cast(float, g), cast(float, b))
            (cast(float, r), cast(float, g), cast(float, b))
        return
    def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
        """Set RGB color for nonstroking operations"""
-        self.graphicstate.ncolor = \
+        self.graphicstate.ncolor = (cast(float, r), cast(float, g), cast(float, b))
            (cast(float, r), cast(float, g), cast(float, b))
        return
-    def do_K(
+    def do_K(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None:
        self,
        c: PDFStackT,
        m: PDFStackT,
        y: PDFStackT,
        k: PDFStackT
    ) -> None:
        """Set CMYK color for stroking operations"""
-        self.graphicstate.scolor = \
+        self.graphicstate.scolor = (
-            (cast(float, c), cast(float, m), cast(float, y), cast(float, k))
+            cast(float, c),
            cast(float, m),
            cast(float, y),
            cast(float, k),
        )
        return
-    def do_k(
+    def do_k(self, c: PDFStackT, m: PDFStackT, y: PDFStackT, k: PDFStackT) -> None:
        self,
        c: PDFStackT,
        m: PDFStackT,
        y: PDFStackT,
        k: PDFStackT
    ) -> None:
        """Set CMYK color for nonstroking operations"""
-        self.graphicstate.ncolor = \
+        self.graphicstate.ncolor = (
-            (cast(float, c), cast(float, m), cast(float, y), cast(float, k))
+            cast(float, c),
            cast(float, m),
            cast(float, y),
            cast(float, k),
        )
        return
    def do_SCN(self) -> None:
@ -705,7 +695,7 @@ class PDFPageInterpreter:
            n = self.scs.ncomponents
        else:
            if settings.STRICT:
-                raise PDFInterpreterError('No colorspace specified!')
+                raise PDFInterpreterError("No colorspace specified!")
            n = 1
        self.graphicstate.scolor = cast(Color, self.pop(n))
        return
@ -716,7 +706,7 @@ class PDFPageInterpreter:
            n = self.ncs.ncomponents
        else:
            if settings.STRICT:
-                raise PDFInterpreterError('No colorspace specified!')
+                raise PDFInterpreterError("No colorspace specified!")
            n = 1
        self.graphicstate.ncolor = cast(Color, self.pop(n))
        return
@ -831,7 +821,7 @@ class PDFPageInterpreter:
            self.textstate.font = self.fontmap[literal_name(fontid)]
        except KeyError:
            if settings.STRICT:
-                raise PDFInterpreterError('Undefined Font id: %r' % fontid)
+                raise PDFInterpreterError("Undefined Font id: %r" % fontid)
            self.textstate.font = self.rsrcmgr.get_font(None, {})
        self.textstate.fontsize = cast(float, fontsize)
        return
@ -854,7 +844,7 @@ class PDFPageInterpreter:
        tx = cast(float, tx)
        ty = cast(float, ty)
        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
+        self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f)
        self.textstate.linematrix = (0, 0)
        return
@ -863,7 +853,7 @@ class PDFPageInterpreter:
        tx = cast(float, tx)
        ty = cast(float, ty)
        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
+        self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f)
        self.textstate.leading = ty
        self.textstate.linematrix = (0, 0)
        return
@ -875,7 +865,7 @@ class PDFPageInterpreter:
        c: PDFStackT,
        d: PDFStackT,
        e: PDFStackT,
-        f: PDFStackT
+        f: PDFStackT,
    ) -> None:
        """Set text matrix and text line matrix"""
        self.textstate.matrix = cast(Matrix, (a, b, c, d, e, f))
@ -885,8 +875,14 @@ class PDFPageInterpreter:
    def do_T_a(self) -> None:
        """Move to start of next text line"""
        (a, b, c, d, e, f) = self.textstate.matrix
-        self.textstate.matrix = (a, b, c, d, self.textstate.leading*c+e,
+        self.textstate.matrix = (
-                                 self.textstate.leading*d+f)
+            a,
            b,
            c,
            d,
            self.textstate.leading * c + e,
            self.textstate.leading * d + f,
        )
        self.textstate.linematrix = (0, 0)
        return
@ -894,11 +890,12 @@ class PDFPageInterpreter:
        """Show text, allowing individual glyph positioning"""
        if self.textstate.font is None:
            if settings.STRICT:
-                raise PDFInterpreterError('No font specified!')
+                raise PDFInterpreterError("No font specified!")
            return
        assert self.ncs is not None
-        self.device.render_string(self.textstate, cast(PDFTextSeq, seq),
+        self.device.render_string(
-                                  self.ncs, self.graphicstate.copy())
+            self.textstate, cast(PDFTextSeq, seq), self.ncs, self.graphicstate.copy()
        )
        return
    def do_Tj(self, s: PDFStackT) -> None:
@ -935,7 +932,7 @@ class PDFPageInterpreter:
    def do_EI(self, obj: PDFStackT) -> None:
        """End inline image object"""
-        if isinstance(obj, PDFStream) and 'W' in obj and 'H' in obj:
+        if isinstance(obj, PDFStream) and "W" in obj and "H" in obj:
            iobjid = str(id(obj))
            self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
            self.device.render_image(iobjid, obj)
@ -949,28 +946,28 @@ class PDFPageInterpreter:
            xobj = stream_value(self.xobjmap[xobjid])
        except KeyError:
            if settings.STRICT:
-                raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
+                raise PDFInterpreterError("Undefined xobject id: %r" % xobjid)
            return
-        log.debug('Processing xobj: %r', xobj)
+        log.debug("Processing xobj: %r", xobj)
-        subtype = xobj.get('Subtype')
+        subtype = xobj.get("Subtype")
-        if subtype is LITERAL_FORM and 'BBox' in xobj:
+        if subtype is LITERAL_FORM and "BBox" in xobj:
            interpreter = self.dup()
-            bbox = cast(Rect, list_value(xobj['BBox']))
+            bbox = cast(Rect, list_value(xobj["BBox"]))
-            matrix = cast(Matrix, list_value(
+            matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY)))
                xobj.get('Matrix', MATRIX_IDENTITY)))
            # According to PDF reference 1.7 section 4.9.1, XObjects in
            # earlier PDFs (prior to v1.2) use the page's Resources entry
            # instead of having their own Resources entry.
-            xobjres = xobj.get('Resources')
+            xobjres = xobj.get("Resources")
            if xobjres:
                resources = dict_value(xobjres)
            else:
                resources = self.resources.copy()
            self.device.begin_figure(xobjid, bbox, matrix)
-            interpreter.render_contents(resources, [xobj],
+            interpreter.render_contents(
-                                        ctm=mult_matrix(matrix, self.ctm))
+                resources, [xobj], ctm=mult_matrix(matrix, self.ctm)
            )
            self.device.end_figure(xobjid)
-        elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
+        elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj:
            self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
            self.device.render_image(xobjid, xobj)
            self.device.end_figure(xobjid)
@ -980,7 +977,7 @@ class PDFPageInterpreter:
        return
    def process_page(self, page: PDFPage) -> None:
-        log.debug('Processing page: %r', page)
+        log.debug("Processing page: %r", page)
        (x0, y0, x1, y1) = page.mediabox
        if page.rotate == 90:
            ctm = (0, -1, 1, 0, -y0, x1)
@ -999,14 +996,15 @@ class PDFPageInterpreter:
        self,
        resources: Dict[object, object],
        streams: Sequence[object],
-        ctm: Matrix = MATRIX_IDENTITY
+        ctm: Matrix = MATRIX_IDENTITY,
    ) -> None:
        """Render the content streams.
        This method may be called recursively.
        """
-        log.debug('render_contents: resources=%r, streams=%r, ctm=%r',
+        log.debug(
-                  resources, streams, ctm)
+            "render_contents: resources=%r, streams=%r, ctm=%r", resources, streams, ctm
        )
        self.init_resources(resources)
        self.init_state(ctm)
        self.execute(list_value(streams))
@ -1025,22 +1023,23 @@ class PDFPageInterpreter:
                break
            if isinstance(obj, PSKeyword):
                name = keyword_name(obj)
-                method = 'do_%s' % name.replace('*', '_a').replace('"', '_w')\
+                method = "do_%s" % name.replace("*", "_a").replace('"', "_w").replace(
-                    .replace("'", '_q')
+                    "'", "_q"
                )
                if hasattr(self, method):
                    func = getattr(self, method)
-                    nargs = func.__code__.co_argcount-1
+                    nargs = func.__code__.co_argcount - 1
                    if nargs:
                        args = self.pop(nargs)
-                        log.debug('exec: %s %r', name, args)
+                        log.debug("exec: %s %r", name, args)
                        if len(args) == nargs:
                            func(*args)
                    else:
-                        log.debug('exec: %s', name)
+                        log.debug("exec: %s", name)
                        func()
                else:
                    if settings.STRICT:
-                        error_msg = 'Unknown operator: %r' % name
+                        error_msg = "Unknown operator: %r" % name
                        raise PDFInterpreterError(error_msg)
            else:
                self.push(obj)
--- a/pdfminer/pdfpage.py
+++ b/pdfminer/pdfpage.py
@ -4,8 +4,7 @@ from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
 from pdfminer.utils import Rect
 from . import settings
-from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed, \
+from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed, PDFNoPageLabels
    PDFNoPageLabels
 from .pdfparser import PDFParser
 from .pdftypes import PDFObjectNotFound
 from .pdftypes import dict_value
@ -17,8 +16,8 @@ from .psparser import LIT
 log = logging.getLogger(__name__)
 # some predefined literals and keywords.
-LITERAL_PAGE = LIT('Page')
+LITERAL_PAGE = LIT("Page")
-LITERAL_PAGES = LIT('Pages')
+LITERAL_PAGES = LIT("Pages")
 class PDFPage:
@ -44,11 +43,7 @@ class PDFPage:
    """
    def __init__(
-        self,
+        self, doc: PDFDocument, pageid: object, attrs: object, label: Optional[str]
        doc: PDFDocument,
        pageid: object,
        attrs: object,
        label: Optional[str]
    ) -> None:
        """Initialize a page object.
@ -61,19 +56,20 @@ class PDFPage:
        self.pageid = pageid
        self.attrs = dict_value(attrs)
        self.label = label
-        self.lastmod = resolve1(self.attrs.get('LastModified'))
+        self.lastmod = resolve1(self.attrs.get("LastModified"))
-        self.resources: Dict[object, object] = \
+        self.resources: Dict[object, object] = resolve1(
-            resolve1(self.attrs.get('Resources', dict()))
+            self.attrs.get("Resources", dict())
-        self.mediabox: Rect = resolve1(self.attrs['MediaBox'])
+        )
-        if 'CropBox' in self.attrs:
+        self.mediabox: Rect = resolve1(self.attrs["MediaBox"])
-            self.cropbox: Rect = resolve1(self.attrs['CropBox'])
+        if "CropBox" in self.attrs:
            self.cropbox: Rect = resolve1(self.attrs["CropBox"])
        else:
            self.cropbox = self.mediabox
-        self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360
+        self.rotate = (int_value(self.attrs.get("Rotate", 0)) + 360) % 360
-        self.annots = self.attrs.get('Annots')
+        self.annots = self.attrs.get("Annots")
-        self.beads = self.attrs.get('B')
+        self.beads = self.attrs.get("B")
-        if 'Contents' in self.attrs:
+        if "Contents" in self.attrs:
-            contents = resolve1(self.attrs['Contents'])
+            contents = resolve1(self.attrs["Contents"])
        else:
            contents = []
        if not isinstance(contents, list):
@ -81,16 +77,16 @@ class PDFPage:
        self.contents: List[object] = contents
    def __repr__(self) -> str:
-        return '<PDFPage: Resources={!r}, MediaBox={!r}>'\
+        return "<PDFPage: Resources={!r}, MediaBox={!r}>".format(
-            .format(self.resources, self.mediabox)
+            self.resources, self.mediabox
        )
-    INHERITABLE_ATTRS = {'Resources', 'MediaBox', 'CropBox', 'Rotate'}
+    INHERITABLE_ATTRS = {"Resources", "MediaBox", "CropBox", "Rotate"}
    @classmethod
    def create_pages(cls, document: PDFDocument) -> Iterator["PDFPage"]:
        def search(
-            obj: object,
+            obj: object, parent: Dict[str, object]
            parent: Dict[str, object]
        ) -> Iterator[Tuple[int, Dict[object, Dict[object, object]]]]:
            if isinstance(obj, int):
                objid = obj
@ -104,16 +100,16 @@ class PDFPage:
                if k in cls.INHERITABLE_ATTRS and k not in tree:
                    tree[k] = v
-            tree_type = tree.get('Type')
+            tree_type = tree.get("Type")
            if tree_type is None and not settings.STRICT:  # See #64
-                tree_type = tree.get('type')
+                tree_type = tree.get("type")
-            if tree_type is LITERAL_PAGES and 'Kids' in tree:
+            if tree_type is LITERAL_PAGES and "Kids" in tree:
-                log.debug('Pages: Kids=%r', tree['Kids'])
+                log.debug("Pages: Kids=%r", tree["Kids"])
-                for c in list_value(tree['Kids']):
+                for c in list_value(tree["Kids"]):
                    yield from search(c, tree)
            elif tree_type is LITERAL_PAGE:
-                log.debug('Page: %r', tree)
+                log.debug("Page: %r", tree)
                yield (objid, tree)
        try:
@ -122,8 +118,8 @@ class PDFPage:
            page_labels = itertools.repeat(None)
        pages = False
-        if 'Pages' in document.catalog:
+        if "Pages" in document.catalog:
-            objects = search(document.catalog['Pages'], document.catalog)
+            objects = search(document.catalog["Pages"], document.catalog)
            for (objid, tree) in objects:
                yield cls(document, objid, tree, next(page_labels))
                pages = True
@ -133,8 +129,7 @@ class PDFPage:
                for objid in xref.get_objids():
                    try:
                        obj = document.getobj(objid)
-                        if isinstance(obj, dict) \
+                        if isinstance(obj, dict) and obj.get("Type") is LITERAL_PAGE:
                                and obj.get('Type') is LITERAL_PAGE:
                            yield cls(document, objid, obj, next(page_labels))
                    except PDFObjectNotFound:
                        pass
@ -146,9 +141,9 @@ class PDFPage:
        fp: BinaryIO,
        pagenos: Optional[Container[int]] = None,
        maxpages: int = 0,
-        password: str = '',
+        password: str = "",
        caching: bool = True,
-        check_extractable: bool = False
+        check_extractable: bool = False,
    ) -> Iterator["PDFPage"]:
        # Create a PDF parser object associated with the file object.
        parser = PDFParser(fp)
@ -158,20 +153,22 @@ class PDFPage:
        # If not, warn the user and proceed.
        if not doc.is_extractable:
            if check_extractable:
-                error_msg = 'Text extraction is not allowed: %r' % fp
+                error_msg = "Text extraction is not allowed: %r" % fp
                raise PDFTextExtractionNotAllowed(error_msg)
            else:
-                warning_msg = 'The PDF %r contains a metadata field '\
+                warning_msg = (
-                            'indicating that it should not allow '   \
+                    "The PDF %r contains a metadata field "
-                            'text extraction. Ignoring this field '  \
+                    "indicating that it should not allow "
-                            'and proceeding. Use the check_extractable ' \
+                    "text extraction. Ignoring this field "
-                            'if you want to raise an error in this case' % fp
+                    "and proceeding. Use the check_extractable "
                    "if you want to raise an error in this case" % fp
                )
                log.warning(warning_msg)
        # Process each page contained in the document.
        for (pageno, page) in enumerate(cls.create_pages(doc)):
            if pagenos and (pageno not in pagenos):
                continue
            yield page
-            if maxpages and maxpages <= pageno+1:
+            if maxpages and maxpages <= pageno + 1:
                break
        return
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@ -51,12 +51,12 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
        """Associates the parser with a PDFDocument object."""
        self.doc = doc
-    KEYWORD_R = KWD(b'R')
+    KEYWORD_R = KWD(b"R")
-    KEYWORD_NULL = KWD(b'null')
+    KEYWORD_NULL = KWD(b"null")
-    KEYWORD_ENDOBJ = KWD(b'endobj')
+    KEYWORD_ENDOBJ = KWD(b"endobj")
-    KEYWORD_STREAM = KWD(b'stream')
+    KEYWORD_STREAM = KWD(b"stream")
-    KEYWORD_XREF = KWD(b'xref')
+    KEYWORD_XREF = KWD(b"xref")
-    KEYWORD_STARTXREF = KWD(b'startxref')
+    KEYWORD_STARTXREF = KWD(b"startxref")
    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        """Handles PDF-related keywords."""
@ -76,8 +76,7 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
            if len(self.curstack) >= 2:
                try:
                    ((_, objid), (_, genno)) = self.pop(2)
-                    (objid, genno) = (
+                    (objid, genno) = (int(objid), int(genno))  # type: ignore[arg-type]
                        int(objid), int(genno))  # type: ignore[arg-type]
                    assert self.doc is not None
                    obj = PDFObjRef(self.doc, objid, genno)
                    self.push((pos, obj))
@ -90,30 +89,30 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
            objlen = 0
            if not self.fallback:
                try:
-                    objlen = int_value(dic['Length'])
+                    objlen = int_value(dic["Length"])
                except KeyError:
                    if settings.STRICT:
-                        raise PDFSyntaxError('/Length is undefined: %r' % dic)
+                        raise PDFSyntaxError("/Length is undefined: %r" % dic)
            self.seek(pos)
            try:
                (_, line) = self.nextline()  # 'stream'
            except PSEOF:
                if settings.STRICT:
-                    raise PDFSyntaxError('Unexpected EOF')
+                    raise PDFSyntaxError("Unexpected EOF")
                return
            pos += len(line)
            self.fp.seek(pos)
            data = bytearray(self.fp.read(objlen))
-            self.seek(pos+objlen)
+            self.seek(pos + objlen)
            while 1:
                try:
                    (linepos, line) = self.nextline()
                except PSEOF:
                    if settings.STRICT:
-                        raise PDFSyntaxError('Unexpected EOF')
+                        raise PDFSyntaxError("Unexpected EOF")
                    break
-                if b'endstream' in line:
+                if b"endstream" in line:
-                    i = line.index(b'endstream')
+                    i = line.index(b"endstream")
                    objlen += i
                    if self.fallback:
                        data += line[:i]
@ -121,10 +120,15 @@ class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
                objlen += len(line)
                if self.fallback:
                    data += line
-            self.seek(pos+objlen)
+            self.seek(pos + objlen)
            # XXX limit objlen not to exceed object boundary
-            log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos,
+            log.debug(
-                      objlen, dic, data[:10])
+                "Stream: pos=%d, objlen=%d, dic=%r, data=%r...",
                pos,
                objlen,
                dic,
                data[:10],
            )
            assert self.doc is not None
            stream = PDFStream(dic, bytes(data), self.doc.decipher)
            self.push((pos, stream))
@ -149,15 +153,14 @@ class PDFStreamParser(PDFParser):
    def flush(self) -> None:
        self.add_results(*self.popall())
-    KEYWORD_OBJ = KWD(b'obj')
+    KEYWORD_OBJ = KWD(b"obj")
    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        if token is self.KEYWORD_R:
            # reference to indirect object
            try:
                ((_, objid), (_, genno)) = self.pop(2)
-                (objid, genno) = (
+                (objid, genno) = (int(objid), int(genno))  # type: ignore[arg-type]
                    int(objid), int(genno))  # type: ignore[arg-type]
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
            except PSSyntaxError:
@ -167,7 +170,7 @@ class PDFStreamParser(PDFParser):
            if settings.STRICT:
                # See PDF Spec 3.4.6: Only the object values are stored in the
                # stream; the obj and endobj keywords are not used.
-                raise PDFSyntaxError('Keyword endobj found in stream')
+                raise PDFSyntaxError("Keyword endobj found in stream")
            return
        # others
        self.push((pos, token))
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@ -2,8 +2,17 @@ import io
 import logging
 import sys
 import zlib
-from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List,
+from typing import (
-                    Tuple, cast)
+    TYPE_CHECKING,
    Any,
    Dict,
    Iterable,
    Optional,
    Union,
    List,
    Tuple,
    cast,
 )
 from . import settings
 from .ascii85 import ascii85decode
@ -21,18 +30,18 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-LITERAL_CRYPT = LIT('Crypt')
+LITERAL_CRYPT = LIT("Crypt")
 # Abbreviation of Filter names in PDF 4.8.6. "Inline Images"
-LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl'))
+LITERALS_FLATE_DECODE = (LIT("FlateDecode"), LIT("Fl"))
-LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW'))
+LITERALS_LZW_DECODE = (LIT("LZWDecode"), LIT("LZW"))
-LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85'))
+LITERALS_ASCII85_DECODE = (LIT("ASCII85Decode"), LIT("A85"))
-LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx'))
+LITERALS_ASCIIHEX_DECODE = (LIT("ASCIIHexDecode"), LIT("AHx"))
-LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL'))
+LITERALS_RUNLENGTH_DECODE = (LIT("RunLengthDecode"), LIT("RL"))
-LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF'))
+LITERALS_CCITTFAX_DECODE = (LIT("CCITTFaxDecode"), LIT("CCF"))
-LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))
+LITERALS_DCT_DECODE = (LIT("DCTDecode"), LIT("DCT"))
-LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)
+LITERALS_JBIG2_DECODE = (LIT("JBIG2Decode"),)
-LITERALS_JPX_DECODE = (LIT('JPXDecode'),)
+LITERALS_JPX_DECODE = (LIT("JPXDecode"),)
 if sys.version_info >= (3, 8):
@ -40,8 +49,14 @@ if sys.version_info >= (3, 8):
    class DecipherCallable(Protocol):
        """Fully typed a decipher callback, with optional parameter."""
-        def __call__(self, objid: int, genno: int, data: bytes,
+
-                     attrs: Optional[Dict[str, Any]] = None) -> bytes:
+        def __call__(
            self,
            objid: int,
            genno: int,
            data: bytes,
            attrs: Optional[Dict[str, Any]] = None,
        ) -> bytes:
            raise NotImplementedError
 else:  # Fallback for older Python
@ -75,21 +90,15 @@ class PDFNotImplementedError(PDFException):
 class PDFObjRef(PDFObject):
-
+    def __init__(self, doc: Optional["PDFDocument"], objid: int, _: object) -> None:
    def __init__(
        self,
        doc: Optional["PDFDocument"],
        objid: int,
        _: object
    ) -> None:
        if objid == 0:
            if settings.STRICT:
-                raise PDFValueError('PDF object id cannot be 0.')
+                raise PDFValueError("PDF object id cannot be 0.")
        self.doc = doc
        self.objid = objid
    def __repr__(self) -> str:
-        return '<PDFObjRef:%d>' % (self.objid)
+        return "<PDFObjRef:%d>" % (self.objid)
    def resolve(self, default: object = None) -> Any:
        assert self.doc is not None
@ -126,14 +135,8 @@ def resolve_all(x: object, default: object = None) -> Any:
    return x
-def decipher_all(
+def decipher_all(decipher: DecipherCallable, objid: int, genno: int, x: object) -> Any:
-    decipher: DecipherCallable,
+    """Recursively deciphers the given object."""
    objid: int,
    genno: int,
    x: object
 ) -> Any:
    """Recursively deciphers the given object.
    """
    if isinstance(x, bytes):
        return decipher(objid, genno, x)
    if isinstance(x, list):
@ -148,7 +151,7 @@ def int_value(x: object) -> int:
    x = resolve1(x)
    if not isinstance(x, int):
        if settings.STRICT:
-            raise PDFTypeError('Integer required: %r' % x)
+            raise PDFTypeError("Integer required: %r" % x)
        return 0
    return x
@ -157,7 +160,7 @@ def float_value(x: object) -> float:
    x = resolve1(x)
    if not isinstance(x, float):
        if settings.STRICT:
-            raise PDFTypeError('Float required: %r' % x)
+            raise PDFTypeError("Float required: %r" % x)
        return 0.0
    return x
@ -166,7 +169,7 @@ def num_value(x: object) -> float:
    x = resolve1(x)
    if not isinstance(x, (int, float)):  # == utils.isnumber(x)
        if settings.STRICT:
-            raise PDFTypeError('Int or Float required: %r' % x)
+            raise PDFTypeError("Int or Float required: %r" % x)
        return 0
    return x
@ -184,8 +187,8 @@ def str_value(x: object) -> bytes:
    x = resolve1(x)
    if not isinstance(x, bytes):
        if settings.STRICT:
-            raise PDFTypeError('String required: %r' % x)
+            raise PDFTypeError("String required: %r" % x)
-        return b''
+        return b""
    return x
@ -193,7 +196,7 @@ def list_value(x: object) -> Union[List[Any], Tuple[Any, ...]]:
    x = resolve1(x)
    if not isinstance(x, (list, tuple)):
        if settings.STRICT:
-            raise PDFTypeError('List required: %r' % x)
+            raise PDFTypeError("List required: %r" % x)
        return []
    return x
@ -202,8 +205,8 @@ def dict_value(x: object) -> Dict[Any, Any]:
    x = resolve1(x)
    if not isinstance(x, dict):
        if settings.STRICT:
-            logger.error('PDFTypeError : Dict required: %r', x)
+            logger.error("PDFTypeError : Dict required: %r", x)
-            raise PDFTypeError('Dict required: %r' % x)
+            raise PDFTypeError("Dict required: %r" % x)
        return {}
    return x
@ -212,8 +215,8 @@ def stream_value(x: object) -> "PDFStream":
    x = resolve1(x)
    if not isinstance(x, PDFStream):
        if settings.STRICT:
-            raise PDFTypeError('PDFStream required: %r' % x)
+            raise PDFTypeError("PDFStream required: %r" % x)
-        return PDFStream({}, b'')
+        return PDFStream({}, b"")
    return x
@ -223,7 +226,7 @@ def decompress_corrupted(data: bytes) -> bytes:
    """
    d = zlib.decompressobj()
    f = io.BytesIO(data)
-    result_str = b''
+    result_str = b""
    buffer = f.read(1)
    i = 0
    try:
@ -239,12 +242,11 @@ def decompress_corrupted(data: bytes) -> bytes:
 class PDFStream(PDFObject):
    def __init__(
        self,
        attrs: Dict[str, Any],
        rawdata: bytes,
-        decipher: Optional[DecipherCallable] = None
+        decipher: Optional[DecipherCallable] = None,
    ) -> None:
        assert isinstance(attrs, dict), str(type(attrs))
        self.attrs = attrs
@ -261,12 +263,18 @@ class PDFStream(PDFObject):
    def __repr__(self) -> str:
        if self.data is None:
            assert self.rawdata is not None
-            return '<PDFStream(%r): raw=%d, %r>' % \
+            return "<PDFStream(%r): raw=%d, %r>" % (
-                   (self.objid, len(self.rawdata), self.attrs)
+                self.objid,
                len(self.rawdata),
                self.attrs,
            )
        else:
            assert self.data is not None
-            return '<PDFStream(%r): len=%d, %r>' % \
+            return "<PDFStream(%r): len=%d, %r>" % (
-                   (self.objid, len(self.data), self.attrs)
+                self.objid,
                len(self.data),
                self.attrs,
            )
    def __contains__(self, name: object) -> bool:
        return name in self.attrs
@ -284,8 +292,8 @@ class PDFStream(PDFObject):
        return default
    def get_filters(self) -> List[Tuple[Any, Any]]:
-        filters = self.get_any(('F', 'Filter'))
+        filters = self.get_any(("F", "Filter"))
-        params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
+        params = self.get_any(("DP", "DecodeParms", "FDecodeParms"), {})
        if not filters:
            return []
        if not isinstance(filters, list):
@ -298,15 +306,16 @@ class PDFStream(PDFObject):
        # resolve filter if possible
        _filters = []
        for fltr in filters:
-            if hasattr(fltr, 'resolve'):
+            if hasattr(fltr, "resolve"):
                fltr = fltr.resolve()[0]
            _filters.append(fltr)
        # return list solves https://github.com/pdfminer/pdfminer.six/issues/15
        return list(zip(_filters, params))
    def decode(self) -> None:
-        assert self.data is None \
+        assert self.data is None and self.rawdata is not None, str(
-               and self.rawdata is not None, str((self.data, self.rawdata))
+            (self.data, self.rawdata)
        )
        data = self.rawdata
        if self.decipher:
            # Handle encryption
@ -326,14 +335,13 @@ class PDFStream(PDFObject):
                except zlib.error as e:
                    if settings.STRICT:
-                        error_msg = 'Invalid zlib bytes: {!r}, {!r}'\
+                        error_msg = "Invalid zlib bytes: {!r}, {!r}".format(e, data)
                            .format(e, data)
                        raise PDFException(error_msg)
                    try:
                        data = decompress_corrupted(data)
                    except zlib.error:
-                        data = b''
+                        data = b""
            elif f in LITERALS_LZW_DECODE:
                data = lzwdecode(data)
@ -356,25 +364,26 @@ class PDFStream(PDFObject):
                pass
            elif f == LITERAL_CRYPT:
                # not yet..
-                raise PDFNotImplementedError('/Crypt filter is unsupported')
+                raise PDFNotImplementedError("/Crypt filter is unsupported")
            else:
-                raise PDFNotImplementedError('Unsupported filter: %r' % f)
+                raise PDFNotImplementedError("Unsupported filter: %r" % f)
            # apply predictors
-            if params and 'Predictor' in params:
+            if params and "Predictor" in params:
-                pred = int_value(params['Predictor'])
+                pred = int_value(params["Predictor"])
                if pred == 1:
                    # no predictor
                    pass
                elif 10 <= pred:
                    # PNG predictor
-                    colors = int_value(params.get('Colors', 1))
+                    colors = int_value(params.get("Colors", 1))
-                    columns = int_value(params.get('Columns', 1))
+                    columns = int_value(params.get("Columns", 1))
-                    raw_bits_per_component = params.get('BitsPerComponent', 8)
+                    raw_bits_per_component = params.get("BitsPerComponent", 8)
                    bitspercomponent = int_value(raw_bits_per_component)
-                    data = apply_png_predictor(pred, colors, columns,
+                    data = apply_png_predictor(
-                                               bitspercomponent, data)
+                        pred, colors, columns, bitspercomponent, data
                    )
                else:
-                    error_msg = 'Unsupported predictor: %r' % pred
+                    error_msg = "Unsupported predictor: %r" % pred
                    raise PDFNotImplementedError(error_msg)
        self.data = data
        self.rawdata = None
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@ -4,8 +4,19 @@
 import logging
 import re
-from typing import (Any, BinaryIO, Dict, Generic, Iterator, List,
+from typing import (
-                    Optional, Tuple, Type, TypeVar, Union)
+    Any,
    BinaryIO,
    Dict,
    Generic,
    Iterator,
    List,
    Optional,
    Tuple,
    Type,
    TypeVar,
    Union,
 )
 from . import settings
 from .utils import choplist
@ -59,7 +70,7 @@ class PSLiteral(PSObject):
    def __repr__(self) -> str:
        name = self.name
-        return '/%r' % name
+        return "/%r" % name
 class PSKeyword(PSObject):
@ -79,10 +90,10 @@ class PSKeyword(PSObject):
    def __repr__(self) -> str:
        name = self.name
-        return '/%r' % name
+        return "/%r" % name
-_SymbolT = TypeVar('_SymbolT', PSLiteral, PSKeyword)
+_SymbolT = TypeVar("_SymbolT", PSLiteral, PSKeyword)
 class PSSymbolTable(Generic[_SymbolT]):
@ -110,25 +121,25 @@ PSLiteralTable = PSSymbolTable(PSLiteral)
 PSKeywordTable = PSSymbolTable(PSKeyword)
 LIT = PSLiteralTable.intern
 KWD = PSKeywordTable.intern
-KEYWORD_PROC_BEGIN = KWD(b'{')
+KEYWORD_PROC_BEGIN = KWD(b"{")
-KEYWORD_PROC_END = KWD(b'}')
+KEYWORD_PROC_END = KWD(b"}")
-KEYWORD_ARRAY_BEGIN = KWD(b'[')
+KEYWORD_ARRAY_BEGIN = KWD(b"[")
-KEYWORD_ARRAY_END = KWD(b']')
+KEYWORD_ARRAY_END = KWD(b"]")
-KEYWORD_DICT_BEGIN = KWD(b'<<')
+KEYWORD_DICT_BEGIN = KWD(b"<<")
-KEYWORD_DICT_END = KWD(b'>>')
+KEYWORD_DICT_END = KWD(b">>")
 def literal_name(x: object) -> Any:
    if not isinstance(x, PSLiteral):
        if settings.STRICT:
-            raise PSTypeError('Literal required: {!r}'.format(x))
+            raise PSTypeError("Literal required: {!r}".format(x))
        else:
            name = x
    else:
        name = x.name
        if not isinstance(name, str):
            try:
-                name = str(name, 'utf-8')
+                name = str(name, "utf-8")
            except Exception:
                pass
    return name
@ -137,34 +148,34 @@ def literal_name(x: object) -> Any:
 def keyword_name(x: object) -> Any:
    if not isinstance(x, PSKeyword):
        if settings.STRICT:
-            raise PSTypeError('Keyword required: %r' % x)
+            raise PSTypeError("Keyword required: %r" % x)
        else:
            name = x
    else:
-        name = str(x.name, 'utf-8', 'ignore')
+        name = str(x.name, "utf-8", "ignore")
    return name
-EOL = re.compile(br'[\r\n]')
+EOL = re.compile(rb"[\r\n]")
-SPC = re.compile(br'\s')
+SPC = re.compile(rb"\s")
-NONSPC = re.compile(br'\S')
+NONSPC = re.compile(rb"\S")
-HEX = re.compile(br'[0-9a-fA-F]')
+HEX = re.compile(rb"[0-9a-fA-F]")
-END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]')
+END_LITERAL = re.compile(rb"[#/%\[\]()<>{}\s]")
-END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]')
+END_HEX_STRING = re.compile(rb"[^\s0-9a-fA-F]")
-HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.')
+HEX_PAIR = re.compile(rb"[0-9a-fA-F]{2}|.")
-END_NUMBER = re.compile(br'[^0-9]')
+END_NUMBER = re.compile(rb"[^0-9]")
-END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]')
+END_KEYWORD = re.compile(rb"[#/%\[\]()<>{}\s]")
-END_STRING = re.compile(br'[()\134]')
+END_STRING = re.compile(rb"[()\134]")
-OCT_STRING = re.compile(br'[0-7]')
+OCT_STRING = re.compile(rb"[0-7]")
 ESC_STRING = {
-    b'b': 8,
+    b"b": 8,
-    b't': 9,
+    b"t": 9,
-    b'n': 10,
+    b"n": 10,
-    b'f': 12,
+    b"f": 12,
-    b'r': 13,
+    b"r": 13,
-    b'(': 40,
+    b"(": 40,
-    b')': 41,
+    b")": 41,
-    b'\\': 92
+    b"\\": 92,
 }
@ -173,8 +184,8 @@ PSBaseParserToken = Union[float, bool, PSLiteral, PSKeyword, bytes]
 class PSBaseParser:
-    """Most basic PostScript parser that performs only tokenization.
+    """Most basic PostScript parser that performs only tokenization."""
-    """
+
    BUFSIZ = 4096
    def __init__(self, fp: BinaryIO) -> None:
@ -182,8 +193,7 @@ class PSBaseParser:
        self.seek(0)
    def __repr__(self) -> str:
-        return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp,
+        return "<%s: %r, bufpos=%d>" % (self.__class__.__name__, self.fp, self.bufpos)
                                        self.bufpos)
    def flush(self) -> None:
        return
@ -193,29 +203,28 @@ class PSBaseParser:
        return
    def tell(self) -> int:
-        return self.bufpos+self.charpos
+        return self.bufpos + self.charpos
    def poll(self, pos: Optional[int] = None, n: int = 80) -> None:
        pos0 = self.fp.tell()
        if not pos:
-            pos = self.bufpos+self.charpos
+            pos = self.bufpos + self.charpos
        self.fp.seek(pos)
-        log.debug('poll(%d): %r', pos, self.fp.read(n))
+        log.debug("poll(%d): %r", pos, self.fp.read(n))
        self.fp.seek(pos0)
        return
    def seek(self, pos: int) -> None:
-        """Seeks the parser to the given position.
+        """Seeks the parser to the given position."""
-        """
+        log.debug("seek: %r", pos)
        log.debug('seek: %r', pos)
        self.fp.seek(pos)
        # reset the status for nextline()
        self.bufpos = pos
-        self.buf = b''
+        self.buf = b""
        self.charpos = 0
        # reset the status for nexttoken()
        self._parse1 = self._parse_main
-        self._curtoken = b''
+        self._curtoken = b""
        self._curtokenpos = 0
        self._tokens: List[Tuple[int, PSBaseParserToken]] = []
        return
@ -227,37 +236,36 @@ class PSBaseParser:
        self.bufpos = self.fp.tell()
        self.buf = self.fp.read(self.BUFSIZ)
        if not self.buf:
-            raise PSEOF('Unexpected EOF')
+            raise PSEOF("Unexpected EOF")
        self.charpos = 0
        return
    def nextline(self) -> Tuple[int, bytes]:
-        """Fetches a next line that ends either with \\r or \\n.
+        """Fetches a next line that ends either with \\r or \\n."""
-        """
+        linebuf = b""
        linebuf = b''
        linepos = self.bufpos + self.charpos
        eol = False
        while 1:
            self.fillbuf()
            if eol:
-                c = self.buf[self.charpos:self.charpos+1]
+                c = self.buf[self.charpos : self.charpos + 1]
                # handle b'\r\n'
-                if c == b'\n':
+                if c == b"\n":
                    linebuf += c
                    self.charpos += 1
                break
            m = EOL.search(self.buf, self.charpos)
            if m:
-                linebuf += self.buf[self.charpos:m.end(0)]
+                linebuf += self.buf[self.charpos : m.end(0)]
                self.charpos = m.end(0)
-                if linebuf[-1:] == b'\r':
+                if linebuf[-1:] == b"\r":
                    eol = True
                else:
                    break
            else:
-                linebuf += self.buf[self.charpos:]
+                linebuf += self.buf[self.charpos :]
                self.charpos = len(self.buf)
-        log.debug('nextline: %r, %r', linepos, linebuf)
+        log.debug("nextline: %r, %r", linepos, linebuf)
        return (linepos, linebuf)
@ -268,22 +276,22 @@ class PSBaseParser:
        """
        self.fp.seek(0, 2)
        pos = self.fp.tell()
-        buf = b''
+        buf = b""
        while 0 < pos:
            prevpos = pos
-            pos = max(0, pos-self.BUFSIZ)
+            pos = max(0, pos - self.BUFSIZ)
            self.fp.seek(pos)
-            s = self.fp.read(prevpos-pos)
+            s = self.fp.read(prevpos - pos)
            if not s:
                break
            while 1:
-                n = max(s.rfind(b'\r'), s.rfind(b'\n'))
+                n = max(s.rfind(b"\r"), s.rfind(b"\n"))
                if n == -1:
                    buf = s + buf
                    break
                yield s[n:] + buf
                s = s[:n]
-                buf = b''
+                buf = b""
        return
    def _parse_main(self, s: bytes, i: int) -> int:
@ -291,44 +299,44 @@ class PSBaseParser:
        if not m:
            return len(s)
        j = m.start(0)
-        c = s[j:j+1]
+        c = s[j : j + 1]
-        self._curtokenpos = self.bufpos+j
+        self._curtokenpos = self.bufpos + j
-        if c == b'%':
+        if c == b"%":
-            self._curtoken = b'%'
+            self._curtoken = b"%"
            self._parse1 = self._parse_comment
-            return j+1
+            return j + 1
-        elif c == b'/':
+        elif c == b"/":
-            self._curtoken = b''
+            self._curtoken = b""
            self._parse1 = self._parse_literal
-            return j+1
+            return j + 1
-        elif c in b'-+' or c.isdigit():
+        elif c in b"-+" or c.isdigit():
            self._curtoken = c
            self._parse1 = self._parse_number
-            return j+1
+            return j + 1
-        elif c == b'.':
+        elif c == b".":
            self._curtoken = c
            self._parse1 = self._parse_float
-            return j+1
+            return j + 1
        elif c.isalpha():
            self._curtoken = c
            self._parse1 = self._parse_keyword
-            return j+1
+            return j + 1
-        elif c == b'(':
+        elif c == b"(":
-            self._curtoken = b''
+            self._curtoken = b""
            self.paren = 1
            self._parse1 = self._parse_string
-            return j+1
+            return j + 1
-        elif c == b'<':
+        elif c == b"<":
-            self._curtoken = b''
+            self._curtoken = b""
            self._parse1 = self._parse_wopen
-            return j+1
+            return j + 1
-        elif c == b'>':
+        elif c == b">":
-            self._curtoken = b''
+            self._curtoken = b""
            self._parse1 = self._parse_wclose
-            return j+1
+            return j + 1
        else:
            self._add_token(KWD(c))
-            return j+1
+            return j + 1
    def _add_token(self, obj: PSBaseParserToken) -> None:
        self._tokens.append((self._curtokenpos, obj))
@ -353,13 +361,13 @@ class PSBaseParser:
            return len(s)
        j = m.start(0)
        self._curtoken += s[i:j]
-        c = s[j:j+1]
+        c = s[j : j + 1]
-        if c == b'#':
+        if c == b"#":
-            self.hex = b''
+            self.hex = b""
            self._parse1 = self._parse_literal_hex
-            return j+1
+            return j + 1
        try:
-            name: Union[str, bytes] = str(self._curtoken, 'utf-8')
+            name: Union[str, bytes] = str(self._curtoken, "utf-8")
        except Exception:
            name = self._curtoken
        self._add_token(LIT(name))
@ -367,10 +375,10 @@ class PSBaseParser:
        return j
    def _parse_literal_hex(self, s: bytes, i: int) -> int:
-        c = s[i:i+1]
+        c = s[i : i + 1]
        if HEX.match(c) and len(self.hex) < 2:
            self.hex += c
-            return i+1
+            return i + 1
        if self.hex:
            self._curtoken += bytes((int(self.hex, 16),))
        self._parse1 = self._parse_literal
@ -383,11 +391,11 @@ class PSBaseParser:
            return len(s)
        j = m.start(0)
        self._curtoken += s[i:j]
-        c = s[j:j+1]
+        c = s[j : j + 1]
-        if c == b'.':
+        if c == b".":
            self._curtoken += c
            self._parse1 = self._parse_float
-            return j+1
+            return j + 1
        try:
            self._add_token(int(self._curtoken))
        except ValueError:
@ -416,9 +424,9 @@ class PSBaseParser:
            return len(s)
        j = m.start(0)
        self._curtoken += s[i:j]
-        if self._curtoken == b'true':
+        if self._curtoken == b"true":
            token: Union[bool, PSKeyword] = True
-        elif self._curtoken == b'false':
+        elif self._curtoken == b"false":
            token = False
        else:
            token = KWD(self._curtoken)
@ -433,34 +441,34 @@ class PSBaseParser:
            return len(s)
        j = m.start(0)
        self._curtoken += s[i:j]
-        c = s[j:j+1]
+        c = s[j : j + 1]
-        if c == b'\\':
+        if c == b"\\":
-            self.oct = b''
+            self.oct = b""
            self._parse1 = self._parse_string_1
-            return j+1
+            return j + 1
-        if c == b'(':
+        if c == b"(":
            self.paren += 1
            self._curtoken += c
-            return j+1
+            return j + 1
-        if c == b')':
+        if c == b")":
            self.paren -= 1
            if self.paren:
                # WTF, they said balanced parens need no special treatment.
                self._curtoken += c
-                return j+1
+                return j + 1
        self._add_token(self._curtoken)
        self._parse1 = self._parse_main
-        return j+1
+        return j + 1
    def _parse_string_1(self, s: bytes, i: int) -> int:
        """Parse literal strings
        PDF Reference 3.2.3
        """
-        c = s[i:i+1]
+        c = s[i : i + 1]
        if OCT_STRING.match(c) and len(self.oct) < 3:
            self.oct += c
-            return i+1
+            return i + 1
        elif self.oct:
            self._curtoken += bytes((int(self.oct, 8),))
@ -470,18 +478,18 @@ class PSBaseParser:
        elif c in ESC_STRING:
            self._curtoken += bytes((ESC_STRING[c],))
-        elif c == b'\r' and len(s) > i+1 and s[i+1:i+2] == b'\n':
+        elif c == b"\r" and len(s) > i + 1 and s[i + 1 : i + 2] == b"\n":
            # If current and next character is \r\n skip both because enters
            # after a \ are ignored
            i += 1
        # default action
        self._parse1 = self._parse_string
-        return i+1
+        return i + 1
    def _parse_wopen(self, s: bytes, i: int) -> int:
-        c = s[i:i+1]
+        c = s[i : i + 1]
-        if c == b'<':
+        if c == b"<":
            self._add_token(KEYWORD_DICT_BEGIN)
            self._parse1 = self._parse_main
            i += 1
@ -490,8 +498,8 @@ class PSBaseParser:
        return i
    def _parse_wclose(self, s: bytes, i: int) -> int:
-        c = s[i:i+1]
+        c = s[i : i + 1]
-        if c == b'>':
+        if c == b">":
            self._add_token(KEYWORD_DICT_END)
            i += 1
        self._parse1 = self._parse_main
@ -504,8 +512,9 @@ class PSBaseParser:
            return len(s)
        j = m.start(0)
        self._curtoken += s[i:j]
-        token = HEX_PAIR.sub(lambda m: bytes((int(m.group(0), 16),)),
+        token = HEX_PAIR.sub(
-                             SPC.sub(b'', self._curtoken))
+            lambda m: bytes((int(m.group(0), 16),)), SPC.sub(b"", self._curtoken)
        )
        self._add_token(token)
        self._parse1 = self._parse_main
        return j
@ -515,7 +524,7 @@ class PSBaseParser:
            self.fillbuf()
            self.charpos = self._parse1(self.buf, self.charpos)
        token = self._tokens.pop(0)
-        log.debug('nexttoken: %r', token)
+        log.debug("nexttoken: %r", token)
        return token
@ -530,15 +539,13 @@ PSStackEntry = Tuple[int, PSStackType[ExtraT]]
 class PSStackParser(PSBaseParser, Generic[ExtraT]):
    def __init__(self, fp: BinaryIO) -> None:
        PSBaseParser.__init__(self, fp)
        self.reset()
        return
    def reset(self) -> None:
-        self.context: List[Tuple[int, Optional[str],
+        self.context: List[Tuple[int, Optional[str], List[PSStackEntry[ExtraT]]]] = []
                           List[PSStackEntry[ExtraT]]]] = []
        self.curtype: Optional[str] = None
        self.curstack: List[PSStackEntry[ExtraT]] = []
        self.results: List[PSStackEntry[ExtraT]] = []
@ -565,25 +572,24 @@ class PSStackParser(PSBaseParser, Generic[ExtraT]):
    def add_results(self, *objs: PSStackEntry[ExtraT]) -> None:
        try:
-            log.debug('add_results: %r', objs)
+            log.debug("add_results: %r", objs)
        except Exception:
-            log.debug('add_results: (unprintable object)')
+            log.debug("add_results: (unprintable object)")
        self.results.extend(objs)
        return
    def start_type(self, pos: int, type: str) -> None:
        self.context.append((pos, self.curtype, self.curstack))
        (self.curtype, self.curstack) = (type, [])
-        log.debug('start_type: pos=%r, type=%r', pos, type)
+        log.debug("start_type: pos=%r, type=%r", pos, type)
        return
    def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]:
        if self.curtype != type:
-            raise PSTypeError('Type mismatch: {!r} != {!r}'
+            raise PSTypeError("Type mismatch: {!r} != {!r}".format(self.curtype, type))
                              .format(self.curtype, type))
        objs = [obj for (_, obj) in self.curstack]
        (pos, self.curtype, self.curstack) = self.context.pop()
-        log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs)
+        log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs)
        return (pos, objs)
    def do_keyword(self, pos: int, token: PSKeyword) -> None:
@ -604,47 +610,55 @@ class PSStackParser(PSBaseParser, Generic[ExtraT]):
                self.push((pos, token))
            elif token == KEYWORD_ARRAY_BEGIN:
                # begin array
-                self.start_type(pos, 'a')
+                self.start_type(pos, "a")
            elif token == KEYWORD_ARRAY_END:
                # end array
                try:
-                    self.push(self.end_type('a'))
+                    self.push(self.end_type("a"))
                except PSTypeError:
                    if settings.STRICT:
                        raise
            elif token == KEYWORD_DICT_BEGIN:
                # begin dictionary
-                self.start_type(pos, 'd')
+                self.start_type(pos, "d")
            elif token == KEYWORD_DICT_END:
                # end dictionary
                try:
-                    (pos, objs) = self.end_type('d')
+                    (pos, objs) = self.end_type("d")
                    if len(objs) % 2 != 0:
-                        error_msg = 'Invalid dictionary construct: %r' % objs
+                        error_msg = "Invalid dictionary construct: %r" % objs
                        raise PSSyntaxError(error_msg)
-                    d = {literal_name(k): v
+                    d = {
-                         for (k, v) in choplist(2, objs) if v is not None}
+                        literal_name(k): v
                        for (k, v) in choplist(2, objs)
                        if v is not None
                    }
                    self.push((pos, d))
                except PSTypeError:
                    if settings.STRICT:
                        raise
            elif token == KEYWORD_PROC_BEGIN:
                # begin proc
-                self.start_type(pos, 'p')
+                self.start_type(pos, "p")
            elif token == KEYWORD_PROC_END:
                # end proc
                try:
-                    self.push(self.end_type('p'))
+                    self.push(self.end_type("p"))
                except PSTypeError:
                    if settings.STRICT:
                        raise
            elif isinstance(token, PSKeyword):
-                log.debug('do_keyword: pos=%r, token=%r, stack=%r', pos,
+                log.debug(
-                          token, self.curstack)
+                    "do_keyword: pos=%r, token=%r, stack=%r", pos, token, self.curstack
                )
                self.do_keyword(pos, token)
            else:
-                log.error('unknown token: pos=%r, token=%r, stack=%r', pos,
+                log.error(
-                          token, self.curstack)
+                    "unknown token: pos=%r, token=%r, stack=%r",
                    pos,
                    token,
                    self.curstack,
                )
                self.do_keyword(pos, token)
                raise
            if self.context:
@ -653,7 +667,7 @@ class PSStackParser(PSBaseParser, Generic[ExtraT]):
                self.flush()
        obj = self.results.pop(0)
        try:
-            log.debug('nextobject: %r', obj)
+            log.debug("nextobject: %r", obj)
        except Exception:
-            log.debug('nextobject: (unprintable object)')
+            log.debug("nextobject: (unprintable object)")
        return obj
--- a/pdfminer/runlength.py
+++ b/pdfminer/runlength.py
@ -20,7 +20,7 @@ def rldecode(data: bytes) -> bytes:
        (2 to 128) times during decompression. A length value of 128
        denotes EOD.
    """
-    decoded = b''
+    decoded = b""
    i = 0
    while i < len(data):
        length = data[i]
@ -28,13 +28,13 @@ def rldecode(data: bytes) -> bytes:
            break
        if length >= 0 and length < 128:
-            for j in range(i+1, (i+1)+(length+1)):
+            for j in range(i + 1, (i + 1) + (length + 1)):
                decoded += bytes((data[j],))
-            i = (i+1) + (length+1)
+            i = (i + 1) + (length + 1)
        if length > 128:
-            run = bytes((data[i+1],))*(257-length)
+            run = bytes((data[i + 1],)) * (257 - length)
            decoded += run
-            i = (i+1) + 1
+            i = (i + 1) + 1
    return decoded
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@ -6,9 +6,24 @@ import pathlib
 import string
 import struct
 from html import escape
-from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator,
+from typing import (
-                    List, Optional, Set, TextIO, Tuple, TypeVar, Union,
+    Any,
-                    TYPE_CHECKING, cast)
+    BinaryIO,
    Callable,
    Dict,
    Generic,
    Iterable,
    Iterator,
    List,
    Optional,
    Set,
    TextIO,
    Tuple,
    TypeVar,
    Union,
    TYPE_CHECKING,
    cast,
 )
 if TYPE_CHECKING:
    from .layout import LTComponent
@ -30,12 +45,8 @@ class open_filename(object):
    (str or pathlib.PurePath type is supported) and closes it on exit,
    (just like `open`), but does nothing for file-like objects.
    """
-    def __init__(
+
-        self,
+    def __init__(self, filename: FileOrName, *args: Any, **kwargs: Any) -> None:
        filename: FileOrName,
        *args: Any,
        **kwargs: Any
    ) -> None:
        if isinstance(filename, pathlib.PurePath):
            filename = str(filename)
        if isinstance(filename, str):
@ -45,17 +56,12 @@ class open_filename(object):
            self.file_handler = cast(AnyIO, filename)
            self.closing = False
        else:
-            raise TypeError('Unsupported input type: %s' % type(filename))
+            raise TypeError("Unsupported input type: %s" % type(filename))
    def __enter__(self) -> AnyIO:
        return self.file_handler
-    def __exit__(
+    def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
        self,
        exc_type: object,
        exc_val: object,
        exc_tb: object
    ) -> None:
        if self.closing:
            self.file_handler.close()
@ -70,7 +76,7 @@ def make_compat_str(o: object) -> str:
    """Converts everything to string, if bytes guessing the encoding."""
    if isinstance(o, bytes):
        enc = chardet.detect(o)
-        return o.decode(enc['encoding'])
+        return o.decode(enc["encoding"])
    else:
        return str(o)
@ -80,20 +86,18 @@ def shorten_str(s: str, size: int) -> str:
        return s[:size]
    if len(s) > size:
        length = (size - 5) // 2
-        return '{} ... {}'.format(s[:length], s[-length:])
+        return "{} ... {}".format(s[:length], s[-length:])
    else:
        return s
 def compatible_encode_method(
-    bytesorstring: Union[bytes, str],
+    bytesorstring: Union[bytes, str], encoding: str = "utf-8", erraction: str = "ignore"
    encoding: str = 'utf-8',
    erraction: str = 'ignore'
 ) -> str:
    """When Py2 str.encode is called, it often means bytes.encode in Py3.
-     This does either.
+    This does either.
-     """
+    """
    if isinstance(bytesorstring, str):
        return bytesorstring
    assert isinstance(bytesorstring, bytes), str(type(bytesorstring))
@ -119,11 +123,7 @@ def paeth_predictor(left: int, above: int, upper_left: int) -> int:
 def apply_png_predictor(
-    pred: int,
+    pred: int, colors: int, columns: int, bitspercomponent: int, data: bytes
    colors: int,
    columns: int,
    bitspercomponent: int,
    data: bytes
 ) -> bytes:
    """Reverse the effect of the PNG predictor
@ -135,12 +135,12 @@ def apply_png_predictor(
    nbytes = colors * columns * bitspercomponent // 8
    bpp = colors * bitspercomponent // 8  # number of bytes per complete pixel
-    buf = b''
+    buf = b""
-    line_above = b'\x00' * columns
+    line_above = b"\x00" * columns
    for scanline_i in range(0, len(data), nbytes + 1):
        filter_type = data[scanline_i]
-        line_encoded = data[scanline_i + 1:scanline_i + 1 + nbytes]
+        line_encoded = data[scanline_i + 1 : scanline_i + 1 + nbytes]
-        raw = b''
+        raw = b""
        if filter_type == 0:
            # Filter type 0: None
@ -223,10 +223,11 @@ Point = Tuple[float, float]
 Rect = Tuple[float, float, float, float]
 Matrix = Tuple[float, float, float, float, float, float]
 PathSegment = Union[
-    Tuple[str],                                             # Literal['h']
+    Tuple[str],  # Literal['h']
-    Tuple[str, float, float],                               # Literal['m', 'l']
+    Tuple[str, float, float],  # Literal['m', 'l']
-    Tuple[str, float, float, float, float],                 # Literal['v', 'y']
+    Tuple[str, float, float, float, float],  # Literal['v', 'y']
-    Tuple[str, float, float, float, float, float, float]]   # Literal['c']
+    Tuple[str, float, float, float, float, float, float],
 ]  # Literal['c']
 #  Matrix operations
 MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0)
@ -236,9 +237,14 @@ def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix:
    (a1, b1, c1, d1, e1, f1) = m1
    (a0, b0, c0, d0, e0, f0) = m0
    """Returns the multiplication of two matrices."""
-    return (a0 * a1 + c0 * b1, b0 * a1 + d0 * b1,
+    return (
-            a0 * c1 + c0 * d1, b0 * c1 + d0 * d1,
+        a0 * a1 + c0 * b1,
-            a0 * e1 + c0 * f1 + e0, b0 * e1 + d0 * f1 + f0)
+        b0 * a1 + d0 * b1,
        a0 * c1 + c0 * d1,
        b0 * c1 + d0 * d1,
        a0 * e1 + c0 * f1 + e0,
        b0 * e1 + d0 * f1 + f0,
    )
 def translate_matrix(m: Matrix, v: Point) -> Matrix:
@ -264,11 +270,12 @@ def apply_matrix_norm(m: Matrix, v: Point) -> Point:
 #  Utility functions
 def isnumber(x: object) -> bool:
    return isinstance(x, (int, float))
-_T = TypeVar('_T')
+_T = TypeVar("_T")
 def uniq(objs: Iterable[_T]) -> Iterator[_T]:
@ -282,10 +289,7 @@ def uniq(objs: Iterable[_T]) -> Iterator[_T]:
    return
-def fsplit(
+def fsplit(pred: Callable[[_T], bool], objs: Iterable[_T]) -> Tuple[List[_T], List[_T]]:
    pred: Callable[[_T], bool],
    objs: Iterable[_T]
 ) -> Tuple[List[_T], List[_T]]:
    """Split a list into two classes according to the predicate."""
    t = []
    f = []
@ -315,9 +319,7 @@ def get_bound(pts: Iterable[Point]) -> Rect:
 def pick(
-    seq: Iterable[_T],
+    seq: Iterable[_T], func: Callable[[_T], float], maxobj: Optional[_T] = None
    func: Callable[[_T], float],
    maxobj: Optional[_T] = None
 ) -> Optional[_T]:
    """Picks the object obj where func(obj) has the highest value."""
    maxscore = None
@ -347,77 +349,303 @@ def nunpack(s: bytes, default: int = 0) -> int:
    elif length == 1:
        return ord(s)
    elif length == 2:
-        return cast(int, struct.unpack('>H', s)[0])
+        return cast(int, struct.unpack(">H", s)[0])
    elif length == 3:
-        return cast(int, struct.unpack('>L', b'\x00' + s)[0])
+        return cast(int, struct.unpack(">L", b"\x00" + s)[0])
    elif length == 4:
-        return cast(int, struct.unpack('>L', s)[0])
+        return cast(int, struct.unpack(">L", s)[0])
    elif length == 8:
-        return cast(int, struct.unpack('>Q', s)[0])
+        return cast(int, struct.unpack(">Q", s)[0])
    else:
-        raise TypeError('invalid length: %d' % length)
+        raise TypeError("invalid length: %d" % length)
-PDFDocEncoding = ''.join(chr(x) for x in (
+PDFDocEncoding = "".join(
-    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+    chr(x)
-    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+    for x in (
-    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0017, 0x0017,
+        0x0000,
-    0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
+        0x0001,
-    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+        0x0002,
-    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+        0x0003,
-    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+        0x0004,
-    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+        0x0005,
-    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+        0x0006,
-    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+        0x0007,
-    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+        0x0008,
-    0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+        0x0009,
-    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+        0x000A,
-    0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+        0x000B,
-    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+        0x000C,
-    0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000,
+        0x000D,
-    0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
+        0x000E,
-    0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
+        0x000F,
-    0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
+        0x0010,
-    0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000,
+        0x0011,
-    0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+        0x0012,
-    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af,
+        0x0013,
-    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+        0x0014,
-    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+        0x0015,
-    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+        0x0017,
-    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+        0x0017,
-    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+        0x02D8,
-    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+        0x02C7,
-    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+        0x02C6,
-    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+        0x02D9,
-    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+        0x02DD,
-    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
+        0x02DB,
-))
+        0x02DA,
        0x02DC,
        0x0020,
        0x0021,
        0x0022,
        0x0023,
        0x0024,
        0x0025,
        0x0026,
        0x0027,
        0x0028,
        0x0029,
        0x002A,
        0x002B,
        0x002C,
        0x002D,
        0x002E,
        0x002F,
        0x0030,
        0x0031,
        0x0032,
        0x0033,
        0x0034,
        0x0035,
        0x0036,
        0x0037,
        0x0038,
        0x0039,
        0x003A,
        0x003B,
        0x003C,
        0x003D,
        0x003E,
        0x003F,
        0x0040,
        0x0041,
        0x0042,
        0x0043,
        0x0044,
        0x0045,
        0x0046,
        0x0047,
        0x0048,
        0x0049,
        0x004A,
        0x004B,
        0x004C,
        0x004D,
        0x004E,
        0x004F,
        0x0050,
        0x0051,
        0x0052,
        0x0053,
        0x0054,
        0x0055,
        0x0056,
        0x0057,
        0x0058,
        0x0059,
        0x005A,
        0x005B,
        0x005C,
        0x005D,
        0x005E,
        0x005F,
        0x0060,
        0x0061,
        0x0062,
        0x0063,
        0x0064,
        0x0065,
        0x0066,
        0x0067,
        0x0068,
        0x0069,
        0x006A,
        0x006B,
        0x006C,
        0x006D,
        0x006E,
        0x006F,
        0x0070,
        0x0071,
        0x0072,
        0x0073,
        0x0074,
        0x0075,
        0x0076,
        0x0077,
        0x0078,
        0x0079,
        0x007A,
        0x007B,
        0x007C,
        0x007D,
        0x007E,
        0x0000,
        0x2022,
        0x2020,
        0x2021,
        0x2026,
        0x2014,
        0x2013,
        0x0192,
        0x2044,
        0x2039,
        0x203A,
        0x2212,
        0x2030,
        0x201E,
        0x201C,
        0x201D,
        0x2018,
        0x2019,
        0x201A,
        0x2122,
        0xFB01,
        0xFB02,
        0x0141,
        0x0152,
        0x0160,
        0x0178,
        0x017D,
        0x0131,
        0x0142,
        0x0153,
        0x0161,
        0x017E,
        0x0000,
        0x20AC,
        0x00A1,
        0x00A2,
        0x00A3,
        0x00A4,
        0x00A5,
        0x00A6,
        0x00A7,
        0x00A8,
        0x00A9,
        0x00AA,
        0x00AB,
        0x00AC,
        0x0000,
        0x00AE,
        0x00AF,
        0x00B0,
        0x00B1,
        0x00B2,
        0x00B3,
        0x00B4,
        0x00B5,
        0x00B6,
        0x00B7,
        0x00B8,
        0x00B9,
        0x00BA,
        0x00BB,
        0x00BC,
        0x00BD,
        0x00BE,
        0x00BF,
        0x00C0,
        0x00C1,
        0x00C2,
        0x00C3,
        0x00C4,
        0x00C5,
        0x00C6,
        0x00C7,
        0x00C8,
        0x00C9,
        0x00CA,
        0x00CB,
        0x00CC,
        0x00CD,
        0x00CE,
        0x00CF,
        0x00D0,
        0x00D1,
        0x00D2,
        0x00D3,
        0x00D4,
        0x00D5,
        0x00D6,
        0x00D7,
        0x00D8,
        0x00D9,
        0x00DA,
        0x00DB,
        0x00DC,
        0x00DD,
        0x00DE,
        0x00DF,
        0x00E0,
        0x00E1,
        0x00E2,
        0x00E3,
        0x00E4,
        0x00E5,
        0x00E6,
        0x00E7,
        0x00E8,
        0x00E9,
        0x00EA,
        0x00EB,
        0x00EC,
        0x00ED,
        0x00EE,
        0x00EF,
        0x00F0,
        0x00F1,
        0x00F2,
        0x00F3,
        0x00F4,
        0x00F5,
        0x00F6,
        0x00F7,
        0x00F8,
        0x00F9,
        0x00FA,
        0x00FB,
        0x00FC,
        0x00FD,
        0x00FE,
        0x00FF,
    )
 )
 def decode_text(s: bytes) -> str:
    """Decodes a PDFDocEncoding string to Unicode."""
-    if s.startswith(b'\xfe\xff'):
+    if s.startswith(b"\xfe\xff"):
-        return str(s[2:], 'utf-16be', 'ignore')
+        return str(s[2:], "utf-16be", "ignore")
    else:
-        return ''.join(PDFDocEncoding[c] for c in s)
+        return "".join(PDFDocEncoding[c] for c in s)
 def enc(x: str) -> str:
    """Encodes a string for SGML/XML/HTML"""
    if isinstance(x, bytes):
-        return ''
+        return ""
    return escape(x)
 def bbox2str(bbox: Rect) -> str:
    (x0, y0, x1, y1) = bbox
-    return '{:.3f},{:.3f},{:.3f},{:.3f}'.format(x0, y0, x1, y1)
+    return "{:.3f},{:.3f},{:.3f},{:.3f}".format(x0, y0, x1, y1)
 def matrix2str(m: Matrix) -> str:
    (a, b, c, d, e, f) = m
-    return '[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]'\
+    return "[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]".format(a, b, c, d, e, f)
        .format(a, b, c, d, e, f)
 def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
@ -446,7 +674,7 @@ def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
        return max(0, iw), max(0, ih)
-LTComponentT = TypeVar('LTComponentT', bound='LTComponent')
+LTComponentT = TypeVar("LTComponentT", bound="LTComponent")
 class Plane(Generic[LTComponentT]):
@ -465,7 +693,7 @@ class Plane(Generic[LTComponentT]):
        (self.x0, self.y0, self.x1, self.y1) = bbox
    def __repr__(self) -> str:
-        return '<Plane objs=%r>' % list(self)
+        return "<Plane objs=%r>" % list(self)
    def __iter__(self) -> Iterator[LTComponentT]:
        return (obj for obj in self._seq if obj in self._objs)
@ -524,14 +752,13 @@ class Plane(Generic[LTComponentT]):
                if obj in done:
                    continue
                done.add(obj)
-                if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 \
+                if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 or y1 <= obj.y0:
                        or y1 <= obj.y0:
                    continue
                yield obj
-ROMAN_ONES = ['i', 'x', 'c', 'm']
+ROMAN_ONES = ["i", "x", "c", "m"]
-ROMAN_FIVES = ['v', 'l', 'd']
+ROMAN_FIVES = ["v", "l", "d"]
 def format_int_roman(value: int) -> str:
@ -557,7 +784,7 @@ def format_int_roman(value: int) -> str:
            result.insert(1 if over_five else 0, ROMAN_ONES[index] * remainder)
        index += 1
-    return ''.join(result)
+    return "".join(result)
 def format_int_alpha(value: int) -> str:
@ -571,4 +798,4 @@ def format_int_alpha(value: int) -> str:
        result.append(string.ascii_lowercase[remainder])
    result.reverse()
-    return ''.join(result)
+    return "".join(result)
--- a/setup.py
+++ b/setup.py
@ -8,52 +8,52 @@ sys.path.append(str(Path(__file__).parent))
 import pdfminer as package
-with open(path.join(path.abspath(path.dirname(__file__)), 'README.md')) as f:
+with open(path.join(path.abspath(path.dirname(__file__)), "README.md")) as f:
    readme = f.read()
 setup(
-    name='pdfminer.six',
+    name="pdfminer.six",
    version=package.__version__,
-    packages=['pdfminer'],
+    packages=["pdfminer"],
-    package_data={'pdfminer': ['cmap/*.pickle.gz', 'py.typed']},
+    package_data={"pdfminer": ["cmap/*.pickle.gz", "py.typed"]},
    install_requires=[
        'chardet ; python_version > "3.0"',
-        'cryptography',
+        "cryptography",
    ],
    extras_require={
-        "dev": ["pytest", "nox", "mypy == 0.931"],
+        "dev": ["pytest", "nox", "black", "mypy == 0.931"],
        "docs": ["sphinx", "sphinx-argparse"],
    },
-    description='PDF parser and analyzer',
+    description="PDF parser and analyzer",
    long_description=readme,
-    long_description_content_type='text/markdown',
+    long_description_content_type="text/markdown",
-    license='MIT/X',
+    license="MIT/X",
-    author='Yusuke Shinyama + Philippe Guglielmetti',
+    author="Yusuke Shinyama + Philippe Guglielmetti",
-    author_email='pdfminer@goulu.net',
+    author_email="pdfminer@goulu.net",
-    url='https://github.com/pdfminer/pdfminer.six',
+    url="https://github.com/pdfminer/pdfminer.six",
    scripts=[
-        'tools/pdf2txt.py',
+        "tools/pdf2txt.py",
-        'tools/dumppdf.py',
+        "tools/dumppdf.py",
    ],
    keywords=[
-        'pdf parser',
+        "pdf parser",
-        'pdf converter',
+        "pdf converter",
-        'layout analysis',
+        "layout analysis",
-        'text mining',
+        "text mining",
    ],
-    python_requires='>=3.6',
+    python_requires=">=3.6",
    classifiers=[
-        'Programming Language :: Python',
+        "Programming Language :: Python",
-        'Programming Language :: Python :: 3.6',
+        "Programming Language :: Python :: 3.6",
-        'Programming Language :: Python :: 3.7',
+        "Programming Language :: Python :: 3.7",
-        'Programming Language :: Python :: 3.8',
+        "Programming Language :: Python :: 3.8",
-        'Programming Language :: Python :: 3.9',
+        "Programming Language :: Python :: 3.9",
-        'Programming Language :: Python :: 3 :: Only',
+        "Programming Language :: Python :: 3 :: Only",
-        'Development Status :: 5 - Production/Stable',
+        "Development Status :: 5 - Production/Stable",
-        'Environment :: Console',
+        "Environment :: Console",
-        'Intended Audience :: Developers',
+        "Intended Audience :: Developers",
-        'Intended Audience :: Science/Research',
+        "Intended Audience :: Science/Research",
-        'License :: OSI Approved :: MIT License',
+        "License :: OSI Approved :: MIT License",
-        'Topic :: Text Processing',
+        "Topic :: Text Processing",
    ],
 )
--- a/tests/helpers.py
+++ b/tests/helpers.py
@ -2,7 +2,6 @@ import os
 def absolute_sample_path(relative_sample_path):
-    sample_dir = os.path.abspath(
+    sample_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../samples"))
        os.path.join(os.path.dirname(__file__), '../samples'))
    sample_file = os.path.join(sample_dir, relative_sample_path)
    return sample_file
--- a/tests/tempfilepath.py
+++ b/tests/tempfilepath.py
@ -4,7 +4,7 @@ import tempfile
 import os
-class TemporaryFilePath():
+class TemporaryFilePath:
    """Context manager class, which generates temporary file name
    Coonroraly to standard tempfile.NamedTemporaryFile(), it does not
@ -40,9 +40,9 @@ class TemporaryFilePath():
        `tempfile.NamedTemporaryFile` will create and delete a file, and
        this method only returns the filepath of the non-existing file.
        """
-        with tempfile.NamedTemporaryFile(suffix=self.suffix,
+        with tempfile.NamedTemporaryFile(
-                                         prefix=self.prefix,
+            suffix=self.suffix, prefix=self.prefix, dir=self.dir
-                                         dir=self.dir) as file:
+        ) as file:
            self.temp_file_name = file.name
        return self.temp_file_name
--- a/tests/test_converter.py
+++ b/tests/test_converter.py
@ -9,14 +9,14 @@ from pdfminer.pdfinterp import PDFGraphicState
 class TestPaintPath:
    def test_paint_path(self):
-        path = [('m', 6, 7), ('l', 7, 7)]
+        path = [("m", 6, 7), ("l", 7, 7)]
        analyzer = self._get_analyzer()
        analyzer.cur_item = LTContainer([0, 100, 0, 100])
        analyzer.paint_path(PDFGraphicState(), False, False, False, path)
        assert len(analyzer.cur_item._objs) == 1
    def test_paint_path_mlllh(self):
-        path = [('m', 6, 7), ('l', 7, 7), ('l', 7, 91), ('l', 6, 91), ('h',)]
+        path = [("m", 6, 7), ("l", 7, 7), ("l", 7, 91), ("l", 6, 91), ("h",)]
        analyzer = self._get_analyzer()
        analyzer.cur_item = LTContainer([0, 100, 0, 100])
        analyzer.paint_path(PDFGraphicState(), False, False, False, path)
@ -25,9 +25,21 @@ class TestPaintPath:
    def test_paint_path_multiple_mlllh(self):
        """Path from samples/contrib/issue-00369-excel.pdf"""
        path = [
-            ('m', 6, 7), ('l', 7, 7), ('l', 7, 91), ('l', 6, 91), ('h',),
+            ("m", 6, 7),
-            ('m', 4, 7), ('l', 6, 7), ('l', 6, 91), ('l', 4, 91), ('h',),
+            ("l", 7, 7),
-            ('m', 67, 2), ('l', 68, 2), ('l', 68, 3), ('l', 67, 3), ('h',)
+            ("l", 7, 91),
            ("l", 6, 91),
            ("h",),
            ("m", 4, 7),
            ("l", 6, 7),
            ("l", 6, 91),
            ("l", 4, 91),
            ("h",),
            ("m", 67, 2),
            ("l", 68, 2),
            ("l", 68, 3),
            ("l", 67, 3),
            ("h",),
        ]
        analyzer = self._get_analyzer()
        analyzer.cur_item = LTContainer([0, 100, 0, 100])
@ -177,34 +189,34 @@ class TestPaintPath:
            return analyzer.cur_item._objs
        # "c" operator
-        assert parse([
+        assert parse(
-            ("m", 72.41, 433.89),
+            [
-            ("c", 72.41, 434.45, 71.96, 434.89, 71.41, 434.89),
+                ("m", 72.41, 433.89),
-        ])[0].pts == [
+                ("c", 72.41, 434.45, 71.96, 434.89, 71.41, 434.89),
            ]
        )[0].pts == [
            (72.41, 433.89),
            (71.41, 434.89),
        ]
        # "v" operator
-        assert parse([
+        assert parse([("m", 72.41, 433.89), ("v", 71.96, 434.89, 71.41, 434.89)])[
-            ("m", 72.41, 433.89),
+            0
-            ("v", 71.96, 434.89, 71.41, 434.89),
+        ].pts == [
        ])[0].pts == [
            (72.41, 433.89),
            (71.41, 434.89),
        ]
        # "y" operator
-        assert parse([
+        assert parse([("m", 72.41, 433.89), ("y", 72.41, 434.45, 71.41, 434.89)])[
-            ("m", 72.41, 433.89),
+            0
-            ("y", 72.41, 434.45, 71.41, 434.89),
+        ].pts == [
        ])[0].pts == [
            (72.41, 433.89),
            (71.41, 434.89),
        ]
-class TestBinaryDetector():
+class TestBinaryDetector:
    def test_stringio(self):
        assert not PDFConverter._is_binary_stream(io.StringIO())
@ -212,11 +224,11 @@ class TestBinaryDetector():
        assert PDFConverter._is_binary_stream(io.BytesIO())
    def test_tmpfile(self):
-        with TemporaryFile(mode='w') as f:
+        with TemporaryFile(mode="w") as f:
            assert not PDFConverter._is_binary_stream(f)
    def test_binary_tmpfile(self):
-        with TemporaryFile(mode='wb') as f:
+        with TemporaryFile(mode="wb") as f:
            assert PDFConverter._is_binary_stream(f)
    def test_non_file_like_object_defaults_to_binary(self):
--- a/tests/test_encodingdb.py
+++ b/tests/test_encodingdb.py
@ -13,31 +13,31 @@ from pdfminer.psparser import PSLiteral
 def test_name2unicode_name_in_agl():
    """The name "Lcommaaccent" has a single component,
    which is mapped to the string U+013B by AGL"""
-    assert '\u013B' == name2unicode('Lcommaaccent')
+    assert "\u013B" == name2unicode("Lcommaaccent")
 def test_name2unicode_uni():
    """The components "Lcommaaccent," "uni013B," and "u013B"
    all map to the string U+013B"""
-    assert '\u013B' == name2unicode('uni013B')
+    assert "\u013B" == name2unicode("uni013B")
 def test_name2unicode_uni_lowercase():
    """The components "Lcommaaccent," "uni013B," and "u013B"
    all map to the string U+013B"""
-    assert '\u013B' == name2unicode('uni013b')
+    assert "\u013B" == name2unicode("uni013b")
 def test_name2unicode_uni_with_sequence_of_digits():
    """The name "uni20AC0308" has a single component,
    which is mapped to the string U+20AC U+0308"""
-    assert '\u20AC\u0308' == name2unicode('uni20AC0308')
+    assert "\u20AC\u0308" == name2unicode("uni20AC0308")
 def test_name2unicode_uni_with_sequence_of_digits_lowercase():
    """The name "uni20AC0308" has a single component,
    which is mapped to the string U+20AC U+0308"""
-    assert '\u20AC\u0308' == name2unicode('uni20ac0308')
+    assert "\u20AC\u0308" == name2unicode("uni20ac0308")
 def test_name2unicode_uni_empty_string():
@ -46,7 +46,7 @@ def test_name2unicode_uni_empty_string():
    According to the specification this should be mapped to an empty string,
    but we also want to support lowercase hexadecimals"""
-    assert '\u20ac' == name2unicode('uni20ac')
+    assert "\u20ac" == name2unicode("uni20ac")
 def test_name2unicode_uni_empty_string_long():
@ -60,7 +60,7 @@ def test_name2unicode_uni_empty_string_long():
    glyph name "u1040C.
    """
    with pytest.raises(KeyError):
-        name2unicode('uniD801DC0C')
+        name2unicode("uniD801DC0C")
 def test_name2unicode_uni_empty_string_long_lowercase():
@ -73,57 +73,59 @@ def test_name2unicode_uni_empty_string_long_lowercase():
    This character can be correctly mapped by using the
    glyph name "u1040C."""
    with pytest.raises(KeyError):
-        name2unicode('uniD801DC0C')
+        name2unicode("uniD801DC0C")
 def test_name2unicode_uni_pua():
-    """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
+    """ "Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
-     U+F6FB."""
+    U+F6FB."""
-    assert '\uF6FB' == name2unicode('uniF6FB')
+    assert "\uF6FB" == name2unicode("uniF6FB")
 def test_name2unicode_uni_pua_lowercase():
-    """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
+    """ "Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
-     U+F6FB."""
+    U+F6FB."""
-    assert '\uF6FB' == name2unicode('unif6fb')
+    assert "\uF6FB" == name2unicode("unif6fb")
 def test_name2unicode_u_with_4_digits():
    """The components "Lcommaaccent," "uni013B," and "u013B" all map to the
    string U+013B"""
-    assert '\u013B' == name2unicode('u013B')
+    assert "\u013B" == name2unicode("u013B")
 def test_name2unicode_u_with_4_digits_lowercase():
    """The components "Lcommaaccent," "uni013B," and "u013B" all map to the
    string U+013B"""
-    assert '\u013B' == name2unicode('u013b')
+    assert "\u013B" == name2unicode("u013b")
 def test_name2unicode_u_with_5_digits():
    """The name "u1040C" has a single component, which is mapped to the string
-     U+1040C"""
+    U+1040C"""
-    assert '\U0001040C' == name2unicode('u1040C')
+    assert "\U0001040C" == name2unicode("u1040C")
 def test_name2unicode_u_with_5_digits_lowercase():
    """The name "u1040C" has a single component, which is mapped to the string
-     U+1040C"""
+    U+1040C"""
-    assert '\U0001040C' == name2unicode('u1040c')
+    assert "\U0001040C" == name2unicode("u1040c")
 def test_name2unicode_multiple_components():
    """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the
    string U+013B U+20AC U+0308 U+1040C"""
-    assert '\u013B\u20AC\u0308\U0001040C' == \
+    assert "\u013B\u20AC\u0308\U0001040C" == name2unicode(
-           name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate')
+        "Lcommaaccent_uni20AC0308_u1040C.alternate"
    )
 def test_name2unicode_multiple_components_lowercase():
    """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the
-     string U+013B U+20AC U+0308 U+1040C"""
+    string U+013B U+20AC U+0308 U+1040C"""
-    assert '\u013B\u20AC\u0308\U0001040C' == \
+    assert "\u013B\u20AC\u0308\U0001040C" == name2unicode(
-           name2unicode('Lcommaaccent_uni20ac0308_u1040c.alternate')
+        "Lcommaaccent_uni20ac0308_u1040c.alternate"
    )
 def test_name2unicode_foo():
@ -131,26 +133,26 @@ def test_name2unicode_foo():
    because 'foo' is not in AGL,
    and because it does not start with a 'u.'"""
    with pytest.raises(KeyError):
-        name2unicode('foo')
+        name2unicode("foo")
 def test_name2unicode_notdef():
    """The name ".notdef" is reduced to an empty string (step 1)
    and mapped to an empty string (step 3)"""
    with pytest.raises(KeyError):
-        name2unicode('.notdef')
+        name2unicode(".notdef")
 def test_name2unicode_pua_ogoneksmall():
-    """"
+    """ "
    Ogoneksmall" and "uniF6FB" both map to the string
    that corresponds to U+F6FB."""
-    assert '\uF6FB' == name2unicode('Ogoneksmall')
+    assert "\uF6FB" == name2unicode("Ogoneksmall")
 def test_name2unicode_overflow_error():
    with pytest.raises(KeyError):
-        name2unicode('226215240241240240240240')
+        name2unicode("226215240241240240240240")
 def test_get_encoding_with_invalid_differences():
@ -158,5 +160,5 @@ def test_get_encoding_with_invalid_differences():
    Regression test for https://github.com/pdfminer/pdfminer.six/issues/385
    """
-    invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')]
+    invalid_differences = [PSLiteral("ubuntu"), PSLiteral("1234")]
-    EncodingDB.get_encoding('StandardEncoding', invalid_differences)
+    EncodingDB.get_encoding("StandardEncoding", invalid_differences)
--- a/tests/test_font_size.py
+++ b/tests/test_font_size.py
@ -4,7 +4,7 @@ from pdfminer.layout import LTChar, LTTextBox
 def test_font_size():
-    path = absolute_sample_path('font-size-test.pdf')
+    path = absolute_sample_path("font-size-test.pdf")
    for page in extract_pages(path):
        for text_box in page:
            if isinstance(text_box, LTTextBox):
--- a/tests/test_highlevel_extracttext.py
+++ b/tests/test_highlevel_extracttext.py
@ -22,19 +22,19 @@ def run_with_file(sample_path):
 test_strings = {
    "simple1.pdf": "Hello \n\nWorld\n\nHello \n\nWorld\n\n"
-                   "H e l l o  \n\nW o r l d\n\n"
+    "H e l l o  \n\nW o r l d\n\n"
-                   "H e l l o  \n\nW o r l d\n\n\f",
+    "H e l l o  \n\nW o r l d\n\n\f",
    "simple1.pdf_no_boxes_flow": "Hello \n\nWorld\n\nHello \n\nWorld\n\n"
-                                 "H e l l o  \n\nW o r l d\n\n"
+    "H e l l o  \n\nW o r l d\n\n"
-                                 "H e l l o  \n\nW o r l d\n\n\f",
+    "H e l l o  \n\nW o r l d\n\n\f",
    "simple2.pdf": "\f",
    "simple3.pdf": "Hello\n\nHello\nあ\nい\nう\nえ\nお\nあ\nい\nう\nえ\nお\n"
-                   "World\n\nWorld\n\n\f",
+    "World\n\nWorld\n\n\f",
    "simple4.pdf": "Text1\nText2\nText3\n\n\f",
    "simple5.pdf": "Heading\n\n"
-                   "Link to heading that is working with vim-pandoc.\n\n"
+    "Link to heading that is working with vim-pandoc.\n\n"
-                   "Link to heading “that is” not working with vim-pandoc.\n\n"
+    "Link to heading “that is” not working with vim-pandoc.\n\n"
-                   "Subheading\n\nSome “more text”\n\n1\n\n\f",
+    "Subheading\n\nSome “more text”\n\n1\n\n\f",
    "zen_of_python_corrupted.pdf": "Mai 30, 18 13:27\n\nzen_of_python.txt",
    "contrib/issue_566_test_1.pdf": "ISSUE Date：2019-4-25 Buyer：黎荣",
    "contrib/issue_566_test_2.pdf": "甲方：中国饮料有限公司（盖章）",
@ -102,7 +102,7 @@ class TestExtractText(unittest.TestCase):
        test_file = "zen_of_python_corrupted.pdf"
        s = run_with_file(test_file)
        expected = test_strings[test_file]
-        self.assertEqual(s[:len(expected)], expected)
+        self.assertEqual(s[: len(expected)], expected)
    def test_issue_566_cmap_bytes(self):
        test_file = "contrib/issue_566_test_1.pdf"
@ -129,37 +129,43 @@ class TestExtractPages(unittest.TestCase):
    def test_line_margin(self):
        # The lines have margin 0.2 relative to the height.
        # Extract with line_margin 0.19 should break into 3 separate textboxes.
-        pages = list(extract_pages(
+        pages = list(
-            self._get_test_file_path(), laparams=LAParams(line_margin=0.19)))
+            extract_pages(
                self._get_test_file_path(), laparams=LAParams(line_margin=0.19)
            )
        )
        self.assertEqual(len(pages), 1)
        page = pages[0]
-        elements = [element for element in page
+        elements = [element for element in page if isinstance(element, LTTextContainer)]
                    if isinstance(element, LTTextContainer)]
        self.assertEqual(len(elements), 3)
        self.assertEqual(elements[0].get_text(), "Text1\n")
        self.assertEqual(elements[1].get_text(), "Text2\n")
        self.assertEqual(elements[2].get_text(), "Text3\n")
        # Extract with line_margin 0.21 should merge into one textbox.
-        pages = list(extract_pages(
+        pages = list(
-            self._get_test_file_path(), laparams=LAParams(line_margin=0.21)))
+            extract_pages(
                self._get_test_file_path(), laparams=LAParams(line_margin=0.21)
            )
        )
        self.assertEqual(len(pages), 1)
        page = pages[0]
-        elements = [element for element in page
+        elements = [element for element in page if isinstance(element, LTTextContainer)]
                    if isinstance(element, LTTextContainer)]
        self.assertEqual(len(elements), 1)
        self.assertEqual(elements[0].get_text(), "Text1\nText2\nText3\n")
    def test_no_boxes_flow(self):
-        pages = list(extract_pages(
+        pages = list(
-            self._get_test_file_path(), laparams=LAParams(boxes_flow=None)))
+            extract_pages(
                self._get_test_file_path(), laparams=LAParams(boxes_flow=None)
            )
        )
        self.assertEqual(len(pages), 1)
        page = pages[0]
-        elements = [element for element in page
+        elements = [element for element in page if isinstance(element, LTTextContainer)]
                    if isinstance(element, LTTextContainer)]
        self.assertEqual(len(elements), 1)
        self.assertEqual(elements[0].get_text(), "Text1\nText2\nText3\n")
--- a/tests/test_layout.py
+++ b/tests/test_layout.py
@ -46,8 +46,7 @@ class TestFindNeigbors(unittest.TestCase):
        right_aligned_below.set_bbox((15, 2, 20, 4))
        plane.add(right_aligned_below)
-        centrally_aligned_overlapping = LTTextLineHorizontal(
+        centrally_aligned_overlapping = LTTextLineHorizontal(laparams.word_margin)
            laparams.word_margin)
        centrally_aligned_overlapping.set_bbox((13, 5, 17, 7))
        plane.add(centrally_aligned_overlapping)
@ -86,8 +85,7 @@ class TestFindNeigbors(unittest.TestCase):
        top_aligned_left.set_bbox((2, 15, 4, 20))
        plane.add(top_aligned_left)
-        centrally_aligned_overlapping = LTTextLineVertical(
+        centrally_aligned_overlapping = LTTextLineVertical(laparams.word_margin)
            laparams.word_margin)
        centrally_aligned_overlapping.set_bbox((5, 13, 7, 17))
        plane.add(centrally_aligned_overlapping)
--- a/tests/test_pdfdocument.py
+++ b/tests/test_pdfdocument.py
@ -9,9 +9,8 @@ from pdfminer.pdftypes import PDFObjectNotFound, dict_value, int_value
 class TestPdfDocument(object):
    def test_get_zero_objid_raises_pdfobjectnotfound(self):
-        with open(absolute_sample_path('simple1.pdf'), 'rb') as in_file:
+        with open(absolute_sample_path("simple1.pdf"), "rb") as in_file:
            parser = PDFParser(in_file)
            doc = PDFDocument(parser)
            with pytest.raises(PDFObjectNotFound):
@ -21,24 +20,29 @@ class TestPdfDocument(object):
        # Some documents may be encrypted but not have an /ID key in
        # their trailer. Tests
        # https://github.com/pdfminer/pdfminer.six/issues/594
-        path = absolute_sample_path('encryption/encrypted_doc_no_id.pdf')
+        path = absolute_sample_path("encryption/encrypted_doc_no_id.pdf")
-        with open(path, 'rb') as fp:
+        with open(path, "rb") as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
-            assert doc.info == [{'Producer': b'European Patent Office'}]
+            assert doc.info == [{"Producer": b"European Patent Office"}]
    def test_page_labels(self):
-        path = absolute_sample_path('contrib/pagelabels.pdf')
+        path = absolute_sample_path("contrib/pagelabels.pdf")
-        with open(path, 'rb') as fp:
+        with open(path, "rb") as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
-            total_pages = int_value(dict_value(doc.catalog['Pages'])['Count'])
+            total_pages = int_value(dict_value(doc.catalog["Pages"])["Count"])
-            assert list(itertools.islice(doc.get_page_labels(), total_pages)) \
+            assert list(itertools.islice(doc.get_page_labels(), total_pages)) == [
-                   == ['iii', 'iv', '1', '2', '1']
+                "iii",
                "iv",
                "1",
                "2",
                "1",
            ]
    def test_no_page_labels(self):
-        path = absolute_sample_path('simple1.pdf')
+        path = absolute_sample_path("simple1.pdf")
-        with open(path, 'rb') as fp:
+        with open(path, "rb") as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
--- a/tests/test_pdfencoding.py
+++ b/tests/test_pdfencoding.py
@ -9,96 +9,95 @@ from pdfminer.psparser import PSLiteral
 class TestPDFEncoding:
    def test_cmapname_onebyteidentityV(self):
-        stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityV')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("OneByteIdentityV")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMapByte)
    def test_cmapname_onebyteidentityH(self):
-        stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityH')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("OneByteIdentityH")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMapByte)
    def test_cmapname_V(self):
-        stream = PDFStream({'CMapName': PSLiteral('V')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("V")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, CMap)
    def test_cmapname_H(self):
-        stream = PDFStream({'CMapName': PSLiteral('H')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("H")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, CMap)
    def test_encoding_identityH(self):
-        spec = {'Encoding': PSLiteral('Identity-H')}
+        spec = {"Encoding": PSLiteral("Identity-H")}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_identityV(self):
-        spec = {'Encoding': PSLiteral('Identity-V')}
+        spec = {"Encoding": PSLiteral("Identity-V")}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_identityH_as_PSLiteral_stream(self):
-        stream = PDFStream({'CMapName': PSLiteral('Identity-H')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("Identity-H")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_identityV_as_PSLiteral_stream(self):
-        stream = PDFStream({'CMapName': PSLiteral('Identity-V')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("Identity-V")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_identityH_as_stream(self):
-        stream = PDFStream({'CMapName': 'Identity-H'}, '')
+        stream = PDFStream({"CMapName": "Identity-H"}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_identityV_as_stream(self):
-        stream = PDFStream({'CMapName': 'Identity-V'}, '')
+        stream = PDFStream({"CMapName": "Identity-V"}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_DLIdentH(self):
-        spec = {'Encoding': PSLiteral('DLIdent-H')}
+        spec = {"Encoding": PSLiteral("DLIdent-H")}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_DLIdentV(self):
-        spec = {'Encoding': PSLiteral('DLIdent-V')}
+        spec = {"Encoding": PSLiteral("DLIdent-V")}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_DLIdentH_as_PSLiteral_stream(self):
-        stream = PDFStream({'CMapName': PSLiteral('DLIdent-H')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("DLIdent-H")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_DLIdentV_as_PSLiteral_stream(self):
-        stream = PDFStream({'CMapName': PSLiteral('DLIdent-V')}, '')
+        stream = PDFStream({"CMapName": PSLiteral("DLIdent-V")}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_DLIdentH_as_stream(self):
-        stream = PDFStream({'CMapName': 'DLIdent-H'}, '')
+        stream = PDFStream({"CMapName": "DLIdent-H"}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
    def test_encoding_DLIdentV_as_stream(self):
-        stream = PDFStream({'CMapName': 'DLIdent-V'}, '')
+        stream = PDFStream({"CMapName": "DLIdent-V"}, "")
-        spec = {'Encoding': stream}
+        spec = {"Encoding": stream}
        font = PDFCIDFont(None, spec)
        assert isinstance(font.cmap, IdentityCMap)
--- a/tests/test_pdffont.py
+++ b/tests/test_pdffont.py
@ -8,12 +8,12 @@ def test_get_cmap_from_pickle():
    Regression test for https://github.com/pdfminer/pdfminer.six/issues/391
    """
-    cmap_name = 'UniGB-UCS2-H'
+    cmap_name = "UniGB-UCS2-H"
-    spec = {'Encoding': PSLiteral(cmap_name)}
+    spec = {"Encoding": PSLiteral(cmap_name)}
    resource_manager = PDFResourceManager()
    font = PDFCIDFont(resource_manager, spec)
    cmap = font.get_cmap_from_spec(spec, False)
-    assert cmap.attrs.get('CMapName') == cmap_name
+    assert cmap.attrs.get("CMapName") == cmap_name
    assert len(cmap.code2cid) > 0
--- a/tests/test_pdfminer_ccitt.py
+++ b/tests/test_pdfminer_ccitt.py
@ -1,7 +1,7 @@
 from pdfminer.ccitt import CCITTG4Parser, CCITTFaxDecoder
-class TestCCITTG4Parser():
+class TestCCITTG4Parser:
    def get_parser(self, bits):
        parser = CCITTG4Parser(len(bits))
        parser._curline = [int(c) for c in bits]
@ -9,60 +9,60 @@ class TestCCITTG4Parser():
        return parser
    def test_b1(self):
-        parser = self.get_parser('00000')
+        parser = self.get_parser("00000")
        parser._do_vertical(0)
        assert parser._curpos == 0
        return
    def test_b2(self):
-        parser = self.get_parser('10000')
+        parser = self.get_parser("10000")
        parser._do_vertical(-1)
        assert parser._curpos == 0
        return
    def test_b3(self):
-        parser = self.get_parser('000111')
+        parser = self.get_parser("000111")
        parser._do_pass()
        assert parser._curpos == 3
-        assert parser._get_bits() == '111'
+        assert parser._get_bits() == "111"
        return
    def test_b4(self):
-        parser = self.get_parser('00000')
+        parser = self.get_parser("00000")
        parser._do_vertical(+2)
        assert parser._curpos == 2
-        assert parser._get_bits() == '11'
+        assert parser._get_bits() == "11"
        return
    def test_b5(self):
-        parser = self.get_parser('11111111100')
+        parser = self.get_parser("11111111100")
        parser._do_horizontal(0, 3)
        assert parser._curpos == 3
        parser._do_vertical(1)
        assert parser._curpos == 10
-        assert parser._get_bits() == '0001111111'
+        assert parser._get_bits() == "0001111111"
        return
    def test_e1(self):
-        parser = self.get_parser('10000')
+        parser = self.get_parser("10000")
        parser._do_vertical(0)
        assert parser._curpos == 1
        parser._do_vertical(0)
        assert parser._curpos == 5
-        assert parser._get_bits() == '10000'
+        assert parser._get_bits() == "10000"
        return
    def test_e2(self):
-        parser = self.get_parser('10011')
+        parser = self.get_parser("10011")
        parser._do_vertical(0)
        assert parser._curpos == 1
        parser._do_vertical(2)
        assert parser._curpos == 5
-        assert parser._get_bits() == '10000'
+        assert parser._get_bits() == "10000"
        return
    def test_e3(self):
-        parser = self.get_parser('011111')
+        parser = self.get_parser("011111")
        parser._color = 0
        parser._do_vertical(0)
        assert parser._color == 1
@ -72,90 +72,90 @@ class TestCCITTG4Parser():
        assert parser._curpos == 4
        parser._do_vertical(0)
        assert parser._curpos == 6
-        assert parser._get_bits() == '011100'
+        assert parser._get_bits() == "011100"
        return
    def test_e4(self):
-        parser = self.get_parser('10000')
+        parser = self.get_parser("10000")
        parser._do_vertical(0)
        assert parser._curpos == 1
        parser._do_vertical(-2)
        assert parser._curpos == 3
        parser._do_vertical(0)
        assert parser._curpos == 5
-        assert parser._get_bits() == '10011'
+        assert parser._get_bits() == "10011"
        return
    def test_e5(self):
-        parser = self.get_parser('011000')
+        parser = self.get_parser("011000")
        parser._color = 0
        parser._do_vertical(0)
        assert parser._curpos == 1
        parser._do_vertical(3)
        assert parser._curpos == 6
-        assert parser._get_bits() == '011111'
+        assert parser._get_bits() == "011111"
        return
    def test_e6(self):
-        parser = self.get_parser('11001')
+        parser = self.get_parser("11001")
        parser._do_pass()
        assert parser._curpos == 4
        parser._do_vertical(0)
        assert parser._curpos == 5
-        assert parser._get_bits() == '11111'
+        assert parser._get_bits() == "11111"
        return
    def test_e7(self):
-        parser = self.get_parser('0000000000')
+        parser = self.get_parser("0000000000")
        parser._curpos = 2
        parser._color = 1
        parser._do_horizontal(2, 6)
        assert parser._curpos == 10
-        assert parser._get_bits() == '1111000000'
+        assert parser._get_bits() == "1111000000"
        return
    def test_e8(self):
-        parser = self.get_parser('001100000')
+        parser = self.get_parser("001100000")
        parser._curpos = 1
        parser._color = 0
        parser._do_vertical(0)
        assert parser._curpos == 2
        parser._do_horizontal(7, 0)
        assert parser._curpos == 9
-        assert parser._get_bits() == '101111111'
+        assert parser._get_bits() == "101111111"
        return
    def test_m1(self):
-        parser = self.get_parser('10101')
+        parser = self.get_parser("10101")
        parser._do_pass()
        assert parser._curpos == 2
        parser._do_pass()
        assert parser._curpos == 4
-        assert parser._get_bits() == '1111'
+        assert parser._get_bits() == "1111"
        return
    def test_m2(self):
-        parser = self.get_parser('101011')
+        parser = self.get_parser("101011")
        parser._do_vertical(-1)
        parser._do_vertical(-1)
        parser._do_vertical(1)
        parser._do_horizontal(1, 1)
-        assert parser._get_bits() == '011101'
+        assert parser._get_bits() == "011101"
        return
    def test_m3(self):
-        parser = self.get_parser('10111011')
+        parser = self.get_parser("10111011")
        parser._do_vertical(-1)
        parser._do_pass()
        parser._do_vertical(1)
        parser._do_vertical(1)
-        assert parser._get_bits() == '00000001'
+        assert parser._get_bits() == "00000001"
        return
 class TestCCITTFaxDecoder:
    def test_b1(self):
        decoder = CCITTFaxDecoder(5)
-        decoder.output_line(0, b'0')
+        decoder.output_line(0, b"0")
-        assert decoder.close() == b'\x80'
+        assert decoder.close() == b"\x80"
        return
--- a/tests/test_pdfminer_crypto.py
+++ b/tests/test_pdfminer_crypto.py
@ -18,36 +18,37 @@ def dehex(b):
    return binascii.unhexlify(b)
-class TestAscii85():
+class TestAscii85:
    def test_ascii85decode(self):
        """The sample string is taken from:
        http://en.wikipedia.org/w/index.php?title=Ascii85"""
-        assert ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q') \
+        assert ascii85decode(b"9jqo^BlbD-BleB1DJ+*+F(f,q") == b"Man is distinguished"
-               == b'Man is distinguished'
+        assert ascii85decode(b"E,9)oF*2M7/c~>") == b"pleasure."
        assert ascii85decode(b'E,9)oF*2M7/c~>') == b'pleasure.'
    def test_asciihexdecode(self):
-        assert asciihexdecode(b'61 62 2e6364   65') == b'ab.cde'
+        assert asciihexdecode(b"61 62 2e6364   65") == b"ab.cde"
-        assert asciihexdecode(b'61 62 2e6364   657>') == b'ab.cdep'
+        assert asciihexdecode(b"61 62 2e6364   657>") == b"ab.cdep"
-        assert asciihexdecode(b'7>') == b'p'
+        assert asciihexdecode(b"7>") == b"p"
-class TestArcfour():
+class TestArcfour:
    def test(self):
-        assert hex(Arcfour(b'Key').process(b'Plaintext')) \
+        assert hex(Arcfour(b"Key").process(b"Plaintext")) == b"bbf316e8d940af0ad3"
-               == b'bbf316e8d940af0ad3'
+        assert hex(Arcfour(b"Wiki").process(b"pedia")) == b"1021bf0420"
-        assert hex(Arcfour(b'Wiki').process(b'pedia')) == b'1021bf0420'
+        assert (
-        assert hex(Arcfour(b'Secret').process(b'Attack at dawn')) \
+            hex(Arcfour(b"Secret").process(b"Attack at dawn"))
-               == b'45a01f645fc35b383552544b9bf5'
+            == b"45a01f645fc35b383552544b9bf5"
        )
-class TestLzw():
+class TestLzw:
    def test_lzwdecode(self):
-        assert lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01') \
+        assert (
-               == b'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
+            lzwdecode(b"\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01")
            == b"\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42"
        )
-class TestRunlength():
+class TestRunlength:
    def test_rldecode(self):
-        assert rldecode(b'\x05123456\xfa7\x04abcde\x80junk') \
+        assert rldecode(b"\x05123456\xfa7\x04abcde\x80junk") == b"1234567777777abcde"
               == b'1234567777777abcde'
--- a/tests/test_pdfminer_psparser.py
+++ b/tests/test_pdfminer_psparser.py
@ -8,7 +8,7 @@ logger = logging.getLogger(__name__)
 class TestPSBaseParser:
    """Simplistic Test cases"""
-    TESTDATA = br'''%!PS
+    TESTDATA = rb"""%!PS
 begin end
 "  @ #
 /a/BCD /Some_Name /foo#5f#xbaa
@ -26,33 +26,83 @@ baa)
 func/a/b{(c)do*}def
 [ 1 (z) ! ]
 << /foo (bar) >>
-'''
+"""
    TOKENS = [
-        (5, KWD(b'begin')), (11, KWD(b'end')), (16, KWD(b'"')),
+        (5, KWD(b"begin")),
-        (19, KWD(b'@')), (21, KWD(b'#')), (23, LIT('a')), (25, LIT('BCD')),
+        (11, KWD(b"end")),
-        (30, LIT('Some_Name')), (41, LIT('foo_xbaa')), (54, 0), (56, 1),
+        (16, KWD(b'"')),
-        (59, -2), (62, 0.5),  (65, 1.234), (71, b'abc'), (77, b''),
+        (19, KWD(b"@")),
-        (80, b'abc ( def ) ghi'), (98, b'def \x00 4ghi'),
+        (21, KWD(b"#")),
-        (118, b'bach\\slask'), (132, b'foo\nbaa'),
+        (23, LIT("a")),
-        (143, b'this % is not a comment.'), (170, b'foo\nbaa'),
+        (25, LIT("BCD")),
-        (180, b'foobaa'), (191, b''), (194, b' '), (199, b'@@ '),
+        (30, LIT("Some_Name")),
-        (211, b'\xab\xcd\x00\x124\x05'),  (226, KWD(b'func')), (230, LIT('a')),
+        (41, LIT("foo_xbaa")),
-        (232, LIT('b')), (234, KWD(b'{')), (235, b'c'), (238, KWD(b'do*')),
+        (54, 0),
-        (241, KWD(b'}')), (242, KWD(b'def')), (246, KWD(b'[')), (248, 1),
+        (56, 1),
-        (250, b'z'), (254, KWD(b'!')), (256, KWD(b']')), (258, KWD(b'<<')),
+        (59, -2),
-        (261, LIT('foo')), (266, b'bar'), (272, KWD(b'>>'))
+        (62, 0.5),
        (65, 1.234),
        (71, b"abc"),
        (77, b""),
        (80, b"abc ( def ) ghi"),
        (98, b"def \x00 4ghi"),
        (118, b"bach\\slask"),
        (132, b"foo\nbaa"),
        (143, b"this % is not a comment."),
        (170, b"foo\nbaa"),
        (180, b"foobaa"),
        (191, b""),
        (194, b" "),
        (199, b"@@ "),
        (211, b"\xab\xcd\x00\x124\x05"),
        (226, KWD(b"func")),
        (230, LIT("a")),
        (232, LIT("b")),
        (234, KWD(b"{")),
        (235, b"c"),
        (238, KWD(b"do*")),
        (241, KWD(b"}")),
        (242, KWD(b"def")),
        (246, KWD(b"[")),
        (248, 1),
        (250, b"z"),
        (254, KWD(b"!")),
        (256, KWD(b"]")),
        (258, KWD(b"<<")),
        (261, LIT("foo")),
        (266, b"bar"),
        (272, KWD(b">>")),
    ]
    OBJS = [
-        (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
+        (23, LIT("a")),
-        (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5),
+        (25, LIT("BCD")),
-        (65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'),
+        (30, LIT("Some_Name")),
-        (98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'),
+        (41, LIT("foo_xbaa")),
-        (143, b'this % is not a comment.'), (170, b'foo\nbaa'),
+        (54, 0),
-        (180, b'foobaa'), (191, b''), (194, b' '), (199, b'@@ '),
+        (56, 1),
-        (211, b'\xab\xcd\x00\x124\x05'), (230, LIT('a')), (232, LIT('b')),
+        (59, -2),
-        (234, [b'c']), (246, [1, b'z']), (258, {'foo': b'bar'}),
+        (62, 0.5),
        (65, 1.234),
        (71, b"abc"),
        (77, b""),
        (80, b"abc ( def ) ghi"),
        (98, b"def \x00 4ghi"),
        (118, b"bach\\slask"),
        (132, b"foo\nbaa"),
        (143, b"this % is not a comment."),
        (170, b"foo\nbaa"),
        (180, b"foobaa"),
        (191, b""),
        (194, b" "),
        (199, b"@@ "),
        (211, b"\xab\xcd\x00\x124\x05"),
        (230, LIT("a")),
        (232, LIT("b")),
        (234, [b"c"]),
        (246, [1, b"z"]),
        (258, {"foo": b"bar"}),
    ]
    def get_tokens(self, s):
--- a/tests/test_pdfpage.py
+++ b/tests/test_pdfpage.py
@ -6,10 +6,10 @@ from pdfminer.pdfparser import PDFParser
 class TestPdfPage(object):
    def test_page_labels(self):
-        path = absolute_sample_path('contrib/pagelabels.pdf')
+        path = absolute_sample_path("contrib/pagelabels.pdf")
-        expected_labels = ['iii', 'iv', '1', '2', '1']
+        expected_labels = ["iii", "iv", "1", "2", "1"]
-        with open(path, 'rb') as fp:
+        with open(path, "rb") as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
            for (i, page) in enumerate(PDFPage.create_pages(doc)):
--- a/tests/test_tools_dumppdf.py
+++ b/tests/test_tools_dumppdf.py
@ -11,48 +11,47 @@ def run(filename, options=None):
    absolute_path = absolute_sample_path(filename)
    with TemporaryFilePath() as output_file_name:
        if options:
-            s = 'dumppdf -o %s %s %s' % (output_file_name,
+            s = "dumppdf -o %s %s %s" % (output_file_name, options, absolute_path)
                                         options, absolute_path)
        else:
-            s = 'dumppdf -o %s %s' % (output_file_name, absolute_path)
+            s = "dumppdf -o %s %s" % (output_file_name, absolute_path)
-        dumppdf.main(s.split(' ')[1:])
+        dumppdf.main(s.split(" ")[1:])
 class TestDumpPDF(unittest.TestCase):
    def test_simple1(self):
-        run('simple1.pdf', '-t -a')
+        run("simple1.pdf", "-t -a")
    def test_simple2(self):
-        run('simple2.pdf', '-t -a')
+        run("simple2.pdf", "-t -a")
    def test_jo(self):
-        run('jo.pdf', '-t -a')
+        run("jo.pdf", "-t -a")
    def test_simple3(self):
-        run('simple3.pdf', '-t -a')
+        run("simple3.pdf", "-t -a")
    def test_2(self):
-        run('nonfree/dmca.pdf', '-t -a')
+        run("nonfree/dmca.pdf", "-t -a")
    def test_3(self):
-        run('nonfree/f1040nr.pdf')
+        run("nonfree/f1040nr.pdf")
    def test_4(self):
-        run('nonfree/i1040nr.pdf')
+        run("nonfree/i1040nr.pdf")
    def test_5(self):
-        run('nonfree/kampo.pdf', '-t -a')
+        run("nonfree/kampo.pdf", "-t -a")
    def test_6(self):
-        run('nonfree/naacl06-shinyama.pdf', '-t -a')
+        run("nonfree/naacl06-shinyama.pdf", "-t -a")
    def test_simple1_raw(self):
        """Known issue: crash in dumpxml writing binary to text stream."""
        with pytest.raises(TypeError):
-            run('simple1.pdf', '-r -a')
+            run("simple1.pdf", "-r -a")
    def test_simple1_binary(self):
        """Known issue: crash in dumpxml writing binary to text stream."""
        with pytest.raises(TypeError):
-            run('simple1.pdf', '-b -a')
+            run("simple1.pdf", "-b -a")
--- a/tests/test_tools_pdf2txt.py
+++ b/tests/test_tools_pdf2txt.py
@ -12,115 +12,119 @@ def run(sample_path, options=None):
    absolute_path = absolute_sample_path(sample_path)
    with TemporaryFilePath() as output_file_name:
        if options:
-            s = 'pdf2txt -o{} {} {}' \
+            s = "pdf2txt -o{} {} {}".format(output_file_name, options, absolute_path)
                .format(output_file_name, options, absolute_path)
        else:
-            s = 'pdf2txt -o{} {}'.format(output_file_name, absolute_path)
+            s = "pdf2txt -o{} {}".format(output_file_name, absolute_path)
-        pdf2txt.main(s.split(' ')[1:])
+        pdf2txt.main(s.split(" ")[1:])
-class TestPdf2Txt():
+class TestPdf2Txt:
    def test_jo(self):
-        run('jo.pdf')
+        run("jo.pdf")
    def test_simple1(self):
-        run('simple1.pdf')
+        run("simple1.pdf")
    def test_simple2(self):
-        run('simple2.pdf')
+        run("simple2.pdf")
    def test_simple3(self):
-        run('simple3.pdf')
+        run("simple3.pdf")
    def test_sample_one_byte_identity_encode(self):
-        run('sampleOneByteIdentityEncode.pdf')
+        run("sampleOneByteIdentityEncode.pdf")
    def test_nonfree_175(self):
        """Regression test for:
        https://github.com/pdfminer/pdfminer.six/issues/65
        """
-        run('nonfree/175.pdf')
+        run("nonfree/175.pdf")
    def test_nonfree_dmca(self):
-        run('nonfree/dmca.pdf')
+        run("nonfree/dmca.pdf")
    def test_nonfree_f1040nr(self):
-        run('nonfree/f1040nr.pdf', '-p 1')
+        run("nonfree/f1040nr.pdf", "-p 1")
    def test_nonfree_i1040nr(self):
-        run('nonfree/i1040nr.pdf', '-p 1')
+        run("nonfree/i1040nr.pdf", "-p 1")
    def test_nonfree_kampo(self):
-        run('nonfree/kampo.pdf')
+        run("nonfree/kampo.pdf")
    def test_nonfree_naacl06_shinyama(self):
-        run('nonfree/naacl06-shinyama.pdf')
+        run("nonfree/naacl06-shinyama.pdf")
    def test_nlp2004slides(self):
-        run('nonfree/nlp2004slides.pdf', '-p 1')
+        run("nonfree/nlp2004slides.pdf", "-p 1")
    def test_contrib_2b(self):
-        run('contrib/2b.pdf', '-A -t xml')
+        run("contrib/2b.pdf", "-A -t xml")
    def test_contrib_issue_350(self):
        """Regression test for
        https://github.com/pdfminer/pdfminer.six/issues/350"""
-        run('contrib/issue-00352-asw-oct96-p41.pdf')
+        run("contrib/issue-00352-asw-oct96-p41.pdf")
    def test_scancode_patchelf(self):
        """Regression test for https://github.com/euske/pdfminer/issues/96"""
-        run('scancode/patchelf.pdf')
+        run("scancode/patchelf.pdf")
    def test_contrib_hash_two_complement(self):
        """Check that unsigned integer is added correctly to encryption hash.et
        See https://github.com/pdfminer/pdfminer.six/issues/186
        """
-        run('contrib/issue-00352-hash-twos-complement.pdf')
+        run("contrib/issue-00352-hash-twos-complement.pdf")
    def test_contrib_excel(self):
        """Regression test for
-         https://github.com/pdfminer/pdfminer.six/issues/369
+        https://github.com/pdfminer/pdfminer.six/issues/369
-         """
+        """
-        run('contrib/issue-00369-excel.pdf', '-t html')
+        run("contrib/issue-00369-excel.pdf", "-t html")
    def test_encryption_aes128(self):
-        run('encryption/aes-128.pdf', '-P foo')
+        run("encryption/aes-128.pdf", "-P foo")
    def test_encryption_aes128m(self):
-        run('encryption/aes-128-m.pdf', '-P foo')
+        run("encryption/aes-128-m.pdf", "-P foo")
    def test_encryption_aes256(self):
-        run('encryption/aes-256.pdf', '-P foo')
+        run("encryption/aes-256.pdf", "-P foo")
    def test_encryption_aes256m(self):
-        run('encryption/aes-256-m.pdf', '-P foo')
+        run("encryption/aes-256-m.pdf", "-P foo")
    def test_encryption_aes256_r6_user(self):
-        run('encryption/aes-256-r6.pdf', '-P usersecret')
+        run("encryption/aes-256-r6.pdf", "-P usersecret")
    def test_encryption_aes256_r6_owner(self):
-        run('encryption/aes-256-r6.pdf', '-P ownersecret')
+        run("encryption/aes-256-r6.pdf", "-P ownersecret")
    def test_encryption_base(self):
-        run('encryption/base.pdf', '-P foo')
+        run("encryption/base.pdf", "-P foo")
    def test_encryption_rc4_40(self):
-        run('encryption/rc4-40.pdf', '-P foo')
+        run("encryption/rc4-40.pdf", "-P foo")
    def test_encryption_rc4_128(self):
-        run('encryption/rc4-128.pdf', '-P foo')
+        run("encryption/rc4-128.pdf", "-P foo")
 class TestDumpImages:
    @staticmethod
    def extract_images(input_file, *args):
        output_dir = mkdtemp()
        with TemporaryFilePath() as output_file_name:
-            commands = ['-o', output_file_name, '--output-dir',
+            commands = [
-                        output_dir, input_file, *args]
+                "-o",
                output_file_name,
                "--output-dir",
                output_dir,
                input_file,
                *args,
            ]
            pdf2txt.main(commands)
        image_files = os.listdir(output_dir)
        rmtree(output_dir)
@ -132,39 +136,38 @@ class TestDumpImages:
        Regression test for:
        https://github.com/pdfminer/pdfminer.six/issues/131
        """
-        filepath = absolute_sample_path('../samples/nonfree/dmca.pdf')
+        filepath = absolute_sample_path("../samples/nonfree/dmca.pdf")
-        image_files = self.extract_images(filepath, '-p', '1')
+        image_files = self.extract_images(filepath, "-p", "1")
-        assert image_files[0].endswith('bmp')
+        assert image_files[0].endswith("bmp")
    def test_nonfree_175(self):
        """Extract images of pdf containing jpg images"""
-        self.extract_images(absolute_sample_path('../samples/nonfree/175.pdf'))
+        self.extract_images(absolute_sample_path("../samples/nonfree/175.pdf"))
    def test_jbig2_image_export(self):
        """Extract images of pdf containing jbig2 images
        Feature test for: https://github.com/pdfminer/pdfminer.six/pull/46
        """
-        input_file = absolute_sample_path(
+        input_file = absolute_sample_path("../samples/contrib/pdf-with-jbig2.pdf")
            '../samples/contrib/pdf-with-jbig2.pdf')
        output_dir = mkdtemp()
        with TemporaryFilePath() as output_file_name:
-            commands = ['-o', output_file_name, '--output-dir',
+            commands = ["-o", output_file_name, "--output-dir", output_dir, input_file]
                        output_dir, input_file]
            pdf2txt.main(commands)
        image_files = os.listdir(output_dir)
        try:
-            assert image_files[0].endswith('.jb2')
+            assert image_files[0].endswith(".jb2")
-            assert filecmp.cmp(output_dir + '/' + image_files[0],
+            assert filecmp.cmp(
-                               absolute_sample_path(
+                output_dir + "/" + image_files[0],
-                                   '../samples/contrib/XIPLAYER0.jb2'))
+                absolute_sample_path("../samples/contrib/XIPLAYER0.jb2"),
            )
        finally:
            rmtree(output_dir)
    def test_contrib_matplotlib(self):
        """Test a pdf with Type3 font"""
-        run('contrib/matplotlib.pdf')
+        run("contrib/matplotlib.pdf")
    def test_nonfree_cmp_itext_logo(self):
        """Test a pdf with Type3 font"""
-        run('nonfree/cmp_itext_logo.pdf')
+        run("nonfree/cmp_itext_logo.pdf")
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -4,8 +4,13 @@ import pytest
 from helpers import absolute_sample_path
 from pdfminer.layout import LTComponent
-from pdfminer.utils import open_filename, Plane, shorten_str, \
+from pdfminer.utils import (
-    format_int_roman, format_int_alpha
+    open_filename,
    Plane,
    shorten_str,
    format_int_roman,
    format_int_alpha,
 )
 class TestOpenFilename:
@ -48,14 +53,12 @@ class TestPlane:
        assert result == [obj]
    def test_find_if_object_is_smaller_than_gridsize(self):
-        plane, obj = self.given_plane_with_one_object(object_size=1,
+        plane, obj = self.given_plane_with_one_object(object_size=1, gridsize=100)
                                                      gridsize=100)
        result = list(plane.find((0, 0, 100, 100)))
        assert result == [obj]
    def test_find_object_if_much_larger_than_gridsize(self):
-        plane, obj = self.given_plane_with_one_object(object_size=100,
+        plane, obj = self.given_plane_with_one_object(object_size=100, gridsize=10)
                                                      gridsize=10)
        result = list(plane.find((0, 0, 100, 100)))
        assert result == [obj]
@ -70,43 +73,43 @@ class TestPlane:
 class TestFunctions(object):
    def test_shorten_str(self):
-        s = shorten_str('Hello there World', 15)
+        s = shorten_str("Hello there World", 15)
-        assert s == 'Hello ... World'
+        assert s == "Hello ... World"
    def test_shorten_short_str_is_same(self):
-        s = 'Hello World'
+        s = "Hello World"
        assert shorten_str(s, 50) == s
    def test_shorten_to_really_short(self):
-        assert shorten_str('Hello World', 5) == 'Hello'
+        assert shorten_str("Hello World", 5) == "Hello"
    def test_format_int_alpha(self):
-        assert format_int_alpha(1) == 'a'
+        assert format_int_alpha(1) == "a"
-        assert format_int_alpha(2) == 'b'
+        assert format_int_alpha(2) == "b"
-        assert format_int_alpha(26) == 'z'
+        assert format_int_alpha(26) == "z"
-        assert format_int_alpha(27) == 'aa'
+        assert format_int_alpha(27) == "aa"
-        assert format_int_alpha(28) == 'ab'
+        assert format_int_alpha(28) == "ab"
-        assert format_int_alpha(26 * 2) == 'az'
+        assert format_int_alpha(26 * 2) == "az"
-        assert format_int_alpha(26 * 2 + 1) == 'ba'
+        assert format_int_alpha(26 * 2 + 1) == "ba"
-        assert format_int_alpha(26 * 27) == 'zz'
+        assert format_int_alpha(26 * 27) == "zz"
-        assert format_int_alpha(26 * 27 + 1) == 'aaa'
+        assert format_int_alpha(26 * 27 + 1) == "aaa"
    def test_format_int_roman(self):
-        assert format_int_roman(1) == 'i'
+        assert format_int_roman(1) == "i"
-        assert format_int_roman(2) == 'ii'
+        assert format_int_roman(2) == "ii"
-        assert format_int_roman(3) == 'iii'
+        assert format_int_roman(3) == "iii"
-        assert format_int_roman(4) == 'iv'
+        assert format_int_roman(4) == "iv"
-        assert format_int_roman(5) == 'v'
+        assert format_int_roman(5) == "v"
-        assert format_int_roman(6) == 'vi'
+        assert format_int_roman(6) == "vi"
-        assert format_int_roman(7) == 'vii'
+        assert format_int_roman(7) == "vii"
-        assert format_int_roman(8) == 'viii'
+        assert format_int_roman(8) == "viii"
-        assert format_int_roman(9) == 'ix'
+        assert format_int_roman(9) == "ix"
-        assert format_int_roman(10) == 'x'
+        assert format_int_roman(10) == "x"
-        assert format_int_roman(11) == 'xi'
+        assert format_int_roman(11) == "xi"
-        assert format_int_roman(20) == 'xx'
+        assert format_int_roman(20) == "xx"
-        assert format_int_roman(40) == 'xl'
+        assert format_int_roman(40) == "xl"
-        assert format_int_roman(45) == 'xlv'
+        assert format_int_roman(45) == "xlv"
-        assert format_int_roman(50) == 'l'
+        assert format_int_roman(50) == "l"
-        assert format_int_roman(90) == 'xc'
+        assert format_int_roman(90) == "xc"
-        assert format_int_roman(91) == 'xci'
+        assert format_int_roman(91) == "xci"
-        assert format_int_roman(100) == 'c'
+        assert format_int_roman(100) == "c"
--- a/tools/conv_afm.py
+++ b/tools/conv_afm.py
@ -7,39 +7,38 @@ import fileinput
 def main(argv):
    fonts = {}
    for line in fileinput.input():
-        f = line.strip().split(' ')
+        f = line.strip().split(" ")
        if not f:
            continue
        k = f[0]
-        if k == 'FontName':
+        if k == "FontName":
            fontname = f[1]
-            props = {'FontName': fontname, 'Flags': 0}
+            props = {"FontName": fontname, "Flags": 0}
            chars = {}
            fonts[fontname] = (props, chars)
-        elif k == 'C':
+        elif k == "C":
            cid = int(f[1])
            if 0 <= cid and cid <= 255:
                width = int(f[4])
                chars[cid] = width
-        elif k in ('CapHeight', 'XHeight', 'ItalicAngle',
+        elif k in ("CapHeight", "XHeight", "ItalicAngle", "Ascender", "Descender"):
-                   'Ascender', 'Descender'):
+            k = {"Ascender": "Ascent", "Descender": "Descent"}.get(k, k)
            k = {'Ascender': 'Ascent', 'Descender': 'Descent'}.get(k, k)
            props[k] = float(f[1])
-        elif k in ('FontName', 'FamilyName', 'Weight'):
+        elif k in ("FontName", "FamilyName", "Weight"):
-            k = {'FamilyName': 'FontFamily', 'Weight': 'FontWeight'}.get(k, k)
+            k = {"FamilyName": "FontFamily", "Weight": "FontWeight"}.get(k, k)
            props[k] = f[1]
-        elif k == 'IsFixedPitch':
+        elif k == "IsFixedPitch":
-            if f[1].lower() == 'true':
+            if f[1].lower() == "true":
-                props['Flags'] = 64
+                props["Flags"] = 64
-        elif k == 'FontBBox':
+        elif k == "FontBBox":
            props[k] = tuple(map(float, f[1:5]))
-    print('# -*- python -*-')
+    print("# -*- python -*-")
-    print('FONT_METRICS = {')
+    print("FONT_METRICS = {")
    for (fontname, (props, chars)) in fonts.items():
-        print(' {!r}: {!r},'.format(fontname, (props, chars)))
+        print(" {!r}: {!r},".format(fontname, (props, chars)))
-    print('}')
+    print("}")
    return 0
-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
--- a/tools/conv_cmap.py
+++ b/tools/conv_cmap.py
@ -6,7 +6,6 @@ import codecs
 class CMapConverter:
    def __init__(self, enc2codec={}):
        self.enc2codec = enc2codec
        self.code2cid = {}  # {'cmapname': ...}
@ -19,12 +18,12 @@ class CMapConverter:
        return self.code2cid.keys()
    def get_maps(self, enc):
-        if enc.endswith('-H'):
+        if enc.endswith("-H"):
            (hmapenc, vmapenc) = (enc, None)
-        elif enc == 'H':
+        elif enc == "H":
-            (hmapenc, vmapenc) = ('H', 'V')
+            (hmapenc, vmapenc) = ("H", "V")
        else:
-            (hmapenc, vmapenc) = (enc+'-H', enc+'-V')
+            (hmapenc, vmapenc) = (enc + "-H", enc + "-V")
        if hmapenc in self.code2cid:
            hmap = self.code2cid[hmapenc]
        else:
@ -43,12 +42,12 @@ class CMapConverter:
    def load(self, fp):
        encs = None
        for line in fp:
-            (line, _, _) = line.strip().partition('#')
+            (line, _, _) = line.strip().partition("#")
            if not line:
                continue
-            values = line.split('\t')
+            values = line.split("\t")
            if encs is None:
-                assert values[0] == 'CID', str(values)
+                assert values[0] == "CID", str(values)
                encs = values
                continue
@ -68,7 +67,7 @@ class CMapConverter:
            def add(unimap, enc, code):
                try:
                    codec = self.enc2codec[enc]
-                    c = code.decode(codec, 'strict')
+                    c = code.decode(codec, "strict")
                    if len(c) == 1:
                        if c not in unimap:
                            unimap[c] = 0
@ -89,20 +88,20 @@ class CMapConverter:
            unimap_h = {}
            unimap_v = {}
            for (enc, value) in zip(encs, values):
-                if enc == 'CID':
+                if enc == "CID":
                    continue
-                if value == '*':
+                if value == "*":
                    continue
                # hcodes, vcodes: encoded bytes for each writing mode.
                hcodes = []
                vcodes = []
-                for code in value.split(','):
+                for code in value.split(","):
-                    vertical = code.endswith('v')
+                    vertical = code.endswith("v")
                    if vertical:
                        code = code[:-1]
                    try:
-                        code = codecs.decode(code, 'hex_codec')
+                        code = codecs.decode(code, "hex_codec")
                    except Exception:
                        code = chr(int(code, 16))
                    if vertical:
@ -155,17 +154,19 @@ def main(argv):
    import os.path
    def usage():
-        print('usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]'
+        print(
-              % argv[0])
+            "usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]" % argv[0]
        )
        return 100
    try:
-        (opts, args) = getopt.getopt(argv[1:], 'c:')
+        (opts, args) = getopt.getopt(argv[1:], "c:")
    except getopt.GetoptError:
        return usage()
    enc2codec = {}
    for (k, v) in opts:
-        if k == '-c':
+        if k == "-c":
-            (enc, _, codec) = v.partition('=')
+            (enc, _, codec) = v.partition("=")
            enc2codec[enc] = codec
    if not args:
        return usage()
@ -176,27 +177,27 @@ def main(argv):
    converter = CMapConverter(enc2codec)
    for path in args:
-        print('reading: %r...' % path)
+        print("reading: %r..." % path)
        fp = open(path)
        converter.load(fp)
        fp.close()
    for enc in converter.get_encs():
-        fname = '%s.pickle.gz' % enc
+        fname = "%s.pickle.gz" % enc
        path = os.path.join(outdir, fname)
-        print('writing: %r...' % path)
+        print("writing: %r..." % path)
-        fp = gzip.open(path, 'wb')
+        fp = gzip.open(path, "wb")
        converter.dump_cmap(fp, enc)
        fp.close()
-    fname = 'to-unicode-%s.pickle.gz' % regname
+    fname = "to-unicode-%s.pickle.gz" % regname
    path = os.path.join(outdir, fname)
-    print('writing: %r...' % path)
+    print("writing: %r..." % path)
-    fp = gzip.open(path, 'wb')
+    fp = gzip.open(path, "wb")
    converter.dump_unicodemap(fp)
    fp.close()
    return
-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
--- a/tools/conv_glyphlist.py
+++ b/tools/conv_glyphlist.py
@ -8,20 +8,19 @@ def main(argv):
    state = 0
    for line in fileinput.input():
        line = line.strip()
-        if not line or line.startswith('#'):
+        if not line or line.startswith("#"):
            if state == 1:
                state = 2
-                print('}\n')
+                print("}\n")
            print(line)
            continue
        if state == 0:
-            print('\nglyphname2unicode = {')
+            print("\nglyphname2unicode = {")
            state = 1
-        (name, x) = line.split(';')
+        (name, x) = line.split(";")
-        codes = x.split(' ')
+        codes = x.split(" ")
-        print(' {!r}: u\'{}\','
+        print(" {!r}: u'{}',".format(name, "".join("\\u%s" % code for code in codes)))
              .format(name, ''.join('\\u%s' % code for code in codes)))
-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@ -4,8 +4,7 @@ import logging
 import os.path
 import re
 import sys
-from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
+from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, Union, cast
    Union, cast
 from argparse import ArgumentParser
 import pdfminer
@ -25,33 +24,33 @@ ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
 def escape(s: Union[str, bytes]) -> str:
    if isinstance(s, bytes):
-        us = str(s, 'latin-1')
+        us = str(s, "latin-1")
    else:
        us = s
-    return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), us)
+    return ESC_PAT.sub(lambda m: "&#%d;" % ord(m.group(0)), us)
 def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
    if obj is None:
-        out.write('<null />')
+        out.write("<null />")
        return
    if isinstance(obj, dict):
        out.write('<dict size="%d">\n' % len(obj))
        for (k, v) in obj.items():
-            out.write('<key>%s</key>\n' % k)
+            out.write("<key>%s</key>\n" % k)
-            out.write('<value>')
+            out.write("<value>")
            dumpxml(out, v)
-            out.write('</value>\n')
+            out.write("</value>\n")
-        out.write('</dict>')
+        out.write("</dict>")
        return
    if isinstance(obj, list):
        out.write('<list size="%d">\n' % len(obj))
        for v in obj:
            dumpxml(out, v)
-            out.write('\n')
+            out.write("\n")
-        out.write('</list>')
+        out.write("</list>")
        return
    if isinstance(obj, (str, bytes)):
@ -59,21 +58,20 @@ def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
        return
    if isinstance(obj, PDFStream):
-        if codec == 'raw':
+        if codec == "raw":
            # Bug: writing bytes to text I/O. This will raise TypeError.
            out.write(obj.get_rawdata())  # type: ignore [arg-type]
-        elif codec == 'binary':
+        elif codec == "binary":
            # Bug: writing bytes to text I/O. This will raise TypeError.
            out.write(obj.get_data())  # type: ignore [arg-type]
        else:
-            out.write('<stream>\n<props>\n')
+            out.write("<stream>\n<props>\n")
            dumpxml(out, obj.attrs)
-            out.write('\n</props>\n')
+            out.write("\n</props>\n")
-            if codec == 'text':
+            if codec == "text":
                data = obj.get_data()
-                out.write('<data size="%d">%s</data>\n'
+                out.write('<data size="%d">%s</data>\n' % (len(data), escape(data)))
-                          % (len(data), escape(data)))
+            out.write("</stream>")
            out.write('</stream>')
        return
    if isinstance(obj, PDFObjRef):
@ -82,38 +80,36 @@ def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
    if isinstance(obj, PSKeyword):
        # Likely bug: obj.name is bytes, not str
-        out.write('<keyword>%s</keyword>'
+        out.write("<keyword>%s</keyword>" % obj.name)  # type: ignore [str-bytes-safe]
                  % obj.name)  # type: ignore [str-bytes-safe]
        return
    if isinstance(obj, PSLiteral):
        # Likely bug: obj.name may be bytes, not str
-        out.write('<literal>%s</literal>'
+        out.write("<literal>%s</literal>" % obj.name)  # type: ignore [str-bytes-safe]
                  % obj.name)  # type: ignore [str-bytes-safe]
        return
    if isnumber(obj):
-        out.write('<number>%s</number>' % obj)
+        out.write("<number>%s</number>" % obj)
        return
    raise TypeError(obj)
 def dumptrailers(
-    out: TextIO,
+    out: TextIO, doc: PDFDocument, show_fallback_xref: bool = False
    doc: PDFDocument,
    show_fallback_xref: bool = False
 ) -> None:
    for xref in doc.xrefs:
        if not isinstance(xref, PDFXRefFallback) or show_fallback_xref:
-            out.write('<trailer>\n')
+            out.write("<trailer>\n")
            dumpxml(out, xref.get_trailer())
-            out.write('\n</trailer>\n\n')
+            out.write("\n</trailer>\n\n")
    no_xrefs = all(isinstance(xref, PDFXRefFallback) for xref in doc.xrefs)
    if no_xrefs and not show_fallback_xref:
-        msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
+        msg = (
-              'you want to display the content of a fallback xref that ' \
+            "This PDF does not have an xref. Use --show-fallback-xref if "
-              'contains all objects.'
+            "you want to display the content of a fallback xref that "
            "contains all objects."
        )
        logger.warning(msg)
    return
@ -122,10 +118,10 @@ def dumpallobjs(
    out: TextIO,
    doc: PDFDocument,
    codec: Optional[str] = None,
-    show_fallback_xref: bool = False
+    show_fallback_xref: bool = False,
 ) -> None:
    visited = set()
-    out.write('<pdf>')
+    out.write("<pdf>")
    for xref in doc.xrefs:
        for objid in xref.get_objids():
            if objid in visited:
@ -137,11 +133,11 @@ def dumpallobjs(
                    continue
                out.write('<object id="%d">\n' % objid)
                dumpxml(out, obj, codec=codec)
-                out.write('\n</object>\n\n')
+                out.write("\n</object>\n\n")
            except PDFObjectNotFound as e:
-                print('not found: %r' % e)
+                print("not found: %r" % e)
    dumptrailers(out, doc, show_fallback_xref)
-    out.write('</pdf>')
+    out.write("</pdf>")
    return
@ -150,16 +146,18 @@ def dumpoutline(
    fname: str,
    objids: Any,
    pagenos: Container[int],
-    password: str = '',
+    password: str = "",
    dumpall: bool = False,
    codec: Optional[str] = None,
-    extractdir: Optional[str] = None
+    extractdir: Optional[str] = None,
 ) -> None:
-    fp = open(fname, 'rb')
+    fp = open(fname, "rb")
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
-    pages = {page.pageid: pageno for (pageno, page)
+    pages = {
-             in enumerate(PDFPage.create_pages(doc), 1)}
+        page.pageid: pageno
        for (pageno, page) in enumerate(PDFPage.create_pages(doc), 1)
    }
    def resolve_dest(dest: object) -> Any:
        if isinstance(dest, (str, bytes)):
@ -167,14 +165,14 @@ def dumpoutline(
        elif isinstance(dest, PSLiteral):
            dest = resolve1(doc.get_dest(dest.name))
        if isinstance(dest, dict):
-            dest = dest['D']
+            dest = dest["D"]
        if isinstance(dest, PDFObjRef):
            dest = dest.resolve()
        return dest
    try:
        outlines = doc.get_outlines()
-        outfp.write('<outlines>\n')
+        outfp.write("<outlines>\n")
        for (level, title, dest, a, se) in outlines:
            pageno = None
            if dest:
@ -183,21 +181,20 @@ def dumpoutline(
            elif a:
                action = a
                if isinstance(action, dict):
-                    subtype = action.get('S')
+                    subtype = action.get("S")
-                    if subtype and repr(subtype) == '/\'GoTo\'' and action.get(
+                    if subtype and repr(subtype) == "/'GoTo'" and action.get("D"):
-                            'D'):
+                        dest = resolve_dest(action["D"])
                        dest = resolve_dest(action['D'])
                        pageno = pages[dest[0].objid]
            s = escape(title)
            outfp.write('<outline level="{!r}" title="{}">\n'.format(level, s))
            if dest is not None:
-                outfp.write('<dest>')
+                outfp.write("<dest>")
                dumpxml(outfp, dest)
-                outfp.write('</dest>\n')
+                outfp.write("</dest>\n")
            if pageno is not None:
-                outfp.write('<pageno>%r</pageno>\n' % pageno)
+                outfp.write("<pageno>%r</pageno>\n" % pageno)
-            outfp.write('</outline>\n')
+            outfp.write("</outline>\n")
-        outfp.write('</outlines>\n')
+        outfp.write("</outlines>\n")
    except PDFNoOutlines:
        pass
    parser.close()
@ -205,43 +202,48 @@ def dumpoutline(
    return
-LITERAL_FILESPEC = LIT('Filespec')
+LITERAL_FILESPEC = LIT("Filespec")
-LITERAL_EMBEDDEDFILE = LIT('EmbeddedFile')
+LITERAL_EMBEDDEDFILE = LIT("EmbeddedFile")
 def extractembedded(fname: str, password: str, extractdir: str) -> None:
    def extract1(objid: int, obj: Dict[str, Any]) -> None:
-        filename = os.path.basename(obj.get('UF') or
+        filename = os.path.basename(obj.get("UF") or cast(bytes, obj.get("F")).decode())
-                                    cast(bytes, obj.get('F')).decode())
+        fileref = obj["EF"].get("UF") or obj["EF"].get("F")
        fileref = obj['EF'].get('UF') or obj['EF'].get('F')
        fileobj = doc.getobj(fileref.objid)
        if not isinstance(fileobj, PDFStream):
-            error_msg = 'unable to process PDF: reference for %r is not a ' \
+            error_msg = (
-                        'PDFStream' % filename
+                "unable to process PDF: reference for %r is not a "
                "PDFStream" % filename
            )
            raise PDFValueError(error_msg)
-        if fileobj.get('Type') is not LITERAL_EMBEDDEDFILE:
+        if fileobj.get("Type") is not LITERAL_EMBEDDEDFILE:
            raise PDFValueError(
-                'unable to process PDF: reference for %r '
+                "unable to process PDF: reference for %r "
-                'is not an EmbeddedFile' % (filename))
+                "is not an EmbeddedFile" % (filename)
-        path = os.path.join(extractdir, '%.6d-%s' % (objid, filename))
+            )
        path = os.path.join(extractdir, "%.6d-%s" % (objid, filename))
        if os.path.exists(path):
-            raise IOError('file exists: %r' % path)
+            raise IOError("file exists: %r" % path)
-        print('extracting: %r' % path)
+        print("extracting: %r" % path)
        os.makedirs(os.path.dirname(path), exist_ok=True)
-        out = open(path, 'wb')
+        out = open(path, "wb")
        out.write(fileobj.get_data())
        out.close()
        return
-    with open(fname, 'rb') as fp:
+    with open(fname, "rb") as fp:
        parser = PDFParser(fp)
        doc = PDFDocument(parser, password)
        extracted_objids = set()
        for xref in doc.xrefs:
            for objid in xref.get_objids():
                obj = doc.getobj(objid)
-                if objid not in extracted_objids and isinstance(obj, dict) \
+                if (
-                        and obj.get('Type') is LITERAL_FILESPEC:
+                    objid not in extracted_objids
                    and isinstance(obj, dict)
                    and obj.get("Type") is LITERAL_FILESPEC
                ):
                    extracted_objids.add(objid)
                    extract1(objid, obj)
    return
@ -252,13 +254,13 @@ def dumppdf(
    fname: str,
    objids: Iterable[int],
    pagenos: Container[int],
-    password: str = '',
+    password: str = "",
    dumpall: bool = False,
    codec: Optional[str] = None,
    extractdir: Optional[str] = None,
-    show_fallback_xref: bool = False
+    show_fallback_xref: bool = False,
 ) -> None:
-    fp = open(fname, 'rb')
+    fp = open(fname, "rb")
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
    if objids:
@ -279,71 +281,125 @@ def dumppdf(
    if (not objids) and (not pagenos) and (not dumpall):
        dumptrailers(outfp, doc, show_fallback_xref)
    fp.close()
-    if codec not in ('raw', 'binary'):
+    if codec not in ("raw", "binary"):
-        outfp.write('\n')
+        outfp.write("\n")
    return
 def create_parser() -> ArgumentParser:
    parser = ArgumentParser(description=__doc__, add_help=True)
-    parser.add_argument('files', type=str, default=None, nargs='+',
+    parser.add_argument(
-                        help='One or more paths to PDF files.')
+        "files",
        type=str,
        default=None,
        nargs="+",
        help="One or more paths to PDF files.",
    )
    parser.add_argument(
-        "--version", "-v", action="version",
+        "--version",
-        version="pdfminer.six v{}".format(pdfminer.__version__))
+        "-v",
        action="version",
        version="pdfminer.six v{}".format(pdfminer.__version__),
    )
    parser.add_argument(
-        '--debug', '-d', default=False, action='store_true',
+        "--debug",
-        help='Use debug logging level.')
+        "-d",
        default=False,
        action="store_true",
        help="Use debug logging level.",
    )
    procedure_parser = parser.add_mutually_exclusive_group()
    procedure_parser.add_argument(
-        '--extract-toc', '-T', default=False, action='store_true',
+        "--extract-toc",
-        help='Extract structure of outline')
+        "-T",
        default=False,
        action="store_true",
        help="Extract structure of outline",
    )
    procedure_parser.add_argument(
-        '--extract-embedded', '-E', type=str,
+        "--extract-embedded", "-E", type=str, help="Extract embedded files"
-        help='Extract embedded files')
+    )
    parse_params = parser.add_argument_group(
-        'Parser', description='Used during PDF parsing')
+        "Parser", description="Used during PDF parsing"
    )
    parse_params.add_argument(
-        '--page-numbers', type=int, default=None, nargs='+',
+        "--page-numbers",
-        help='A space-seperated list of page numbers to parse.')
+        type=int,
        default=None,
        nargs="+",
        help="A space-seperated list of page numbers to parse.",
    )
    parse_params.add_argument(
-        '--pagenos', '-p', type=str,
+        "--pagenos",
-        help='A comma-separated list of page numbers to parse. Included for '
+        "-p",
-             'legacy applications, use --page-numbers for more idiomatic '
+        type=str,
-             'argument entry.')
+        help="A comma-separated list of page numbers to parse. Included for "
        "legacy applications, use --page-numbers for more idiomatic "
        "argument entry.",
    )
    parse_params.add_argument(
-        '--objects', '-i', type=str,
+        "--objects",
-        help='Comma separated list of object numbers to extract')
+        "-i",
        type=str,
        help="Comma separated list of object numbers to extract",
    )
    parse_params.add_argument(
-        '--all', '-a', default=False, action='store_true',
+        "--all",
-        help='If the structure of all objects should be extracted')
+        "-a",
        default=False,
        action="store_true",
        help="If the structure of all objects should be extracted",
    )
    parse_params.add_argument(
-        '--show-fallback-xref', action='store_true',
+        "--show-fallback-xref",
-        help='Additionally show the fallback xref. Use this if the PDF '
+        action="store_true",
-             'has zero or only invalid xref\'s. This setting is ignored if '
+        help="Additionally show the fallback xref. Use this if the PDF "
-             '--extract-toc or --extract-embedded is used.')
+        "has zero or only invalid xref's. This setting is ignored if "
        "--extract-toc or --extract-embedded is used.",
    )
    parse_params.add_argument(
-        '--password', '-P', type=str, default='',
+        "--password",
-        help='The password to use for decrypting PDF file.')
+        "-P",
        type=str,
        default="",
        help="The password to use for decrypting PDF file.",
    )
    output_params = parser.add_argument_group(
-        'Output', description='Used during output generation.')
+        "Output", description="Used during output generation."
    )
    output_params.add_argument(
-        '--outfile', '-o', type=str, default='-',
+        "--outfile",
        "-o",
        type=str,
        default="-",
        help='Path to file where output is written. Or "-" (default) to '
-             'write to stdout.')
+        "write to stdout.",
    )
    codec_parser = output_params.add_mutually_exclusive_group()
    codec_parser.add_argument(
-        '--raw-stream', '-r', default=False, action='store_true',
+        "--raw-stream",
-        help='Write stream objects without encoding')
+        "-r",
        default=False,
        action="store_true",
        help="Write stream objects without encoding",
    )
    codec_parser.add_argument(
-        '--binary-stream', '-b', default=False, action='store_true',
+        "--binary-stream",
-        help='Write stream objects with binary encoding')
+        "-b",
        default=False,
        action="store_true",
        help="Write stream objects with binary encoding",
    )
    codec_parser.add_argument(
-        '--text-stream', '-t', default=False, action='store_true',
+        "--text-stream",
-        help='Write stream objects as plain text')
+        "-t",
        default=False,
        action="store_true",
        help="Write stream objects as plain text",
    )
    return parser
@ -355,53 +411,63 @@ def main(argv: Optional[List[str]] = None) -> None:
    if args.debug:
        logging.getLogger().setLevel(logging.DEBUG)
-    if args.outfile == '-':
+    if args.outfile == "-":
        outfp = sys.stdout
    else:
-        outfp = open(args.outfile, 'w')
+        outfp = open(args.outfile, "w")
    if args.objects:
-        objids = [int(x) for x in args.objects.split(',')]
+        objids = [int(x) for x in args.objects.split(",")]
    else:
        objids = []
    if args.page_numbers:
        pagenos = {x - 1 for x in args.page_numbers}
    elif args.pagenos:
-        pagenos = {int(x) - 1 for x in args.pagenos.split(',')}
+        pagenos = {int(x) - 1 for x in args.pagenos.split(",")}
    else:
        pagenos = set()
    password = args.password
    if args.raw_stream:
-        codec: Optional[str] = 'raw'
+        codec: Optional[str] = "raw"
    elif args.binary_stream:
-        codec = 'binary'
+        codec = "binary"
    elif args.text_stream:
-        codec = 'text'
+        codec = "text"
    else:
        codec = None
    for fname in args.files:
        if args.extract_toc:
            dumpoutline(
-                outfp, fname, objids, pagenos, password=password,
+                outfp,
-                dumpall=args.all, codec=codec, extractdir=None
+                fname,
                objids,
                pagenos,
                password=password,
                dumpall=args.all,
                codec=codec,
                extractdir=None,
            )
        elif args.extract_embedded:
-            extractembedded(
+            extractembedded(fname, password=password, extractdir=args.extract_embedded)
                fname, password=password, extractdir=args.extract_embedded
            )
        else:
            dumppdf(
-                outfp, fname, objids, pagenos, password=password,
+                outfp,
-                dumpall=args.all, codec=codec, extractdir=None,
+                fname,
-                show_fallback_xref=args.show_fallback_xref
+                objids,
                pagenos,
                password=password,
                dumpall=args.all,
                codec=codec,
                extractdir=None,
                show_fallback_xref=args.show_fallback_xref,
            )
    outfp.close()
-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@ -12,10 +12,7 @@ from pdfminer.utils import AnyIO
 logging.basicConfig()
-OUTPUT_TYPES = ((".htm", "html"),
+OUTPUT_TYPES = ((".htm", "html"), (".html", "html"), (".xml", "xml"), (".tag", "tag"))
                (".html", "html"),
                (".xml", "xml"),
                (".tag", "tag"))
 def float_or_disabled(x: str) -> Optional[float]:
@ -29,17 +26,17 @@ def float_or_disabled(x: str) -> Optional[float]:
 def extract_text(
    files: Iterable[str] = [],
-    outfile: str = '-',
+    outfile: str = "-",
    laparams: Optional[LAParams] = None,
-    output_type: str = 'text',
+    output_type: str = "text",
-    codec: str = 'utf-8',
+    codec: str = "utf-8",
    strip_control: bool = False,
    maxpages: int = 0,
    page_numbers: Optional[Container[int]] = None,
    password: str = "",
    scale: float = 1.0,
    rotation: int = 0,
-    layoutmode: str = 'normal',
+    layoutmode: str = "normal",
    output_dir: Optional[str] = None,
    debug: bool = False,
    disable_caching: bool = False,
@ -56,7 +53,7 @@ def extract_text(
    if outfile == "-":
        outfp: AnyIO = sys.stdout
        if sys.stdout.encoding is not None:
-            codec = 'utf-8'
+            codec = "utf-8"
    else:
        outfp = open(outfile, "wb")
@ -69,118 +66,211 @@ def extract_text(
 def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__, add_help=True)
    parser.add_argument(
-        "files", type=str, default=None, nargs="+",
+        "files",
-        help="One or more paths to PDF files.")
+        type=str,
        default=None,
        nargs="+",
        help="One or more paths to PDF files.",
    )
    parser.add_argument(
-        "--version", "-v", action="version",
+        "--version",
-        version="pdfminer.six v{}".format(pdfminer.__version__))
+        "-v",
        action="version",
        version="pdfminer.six v{}".format(pdfminer.__version__),
    )
    parser.add_argument(
-        "--debug", "-d", default=False, action="store_true",
+        "--debug",
-        help="Use debug logging level.")
+        "-d",
        default=False,
        action="store_true",
        help="Use debug logging level.",
    )
    parser.add_argument(
-        "--disable-caching", "-C", default=False, action="store_true",
+        "--disable-caching",
-        help="If caching or resources, such as fonts, should be disabled.")
+        "-C",
        default=False,
        action="store_true",
        help="If caching or resources, such as fonts, should be disabled.",
    )
    parse_params = parser.add_argument_group(
-        'Parser', description='Used during PDF parsing')
+        "Parser", description="Used during PDF parsing"
    )
    parse_params.add_argument(
-        "--page-numbers", type=int, default=None, nargs="+",
+        "--page-numbers",
-        help="A space-seperated list of page numbers to parse.")
+        type=int,
        default=None,
        nargs="+",
        help="A space-seperated list of page numbers to parse.",
    )
    parse_params.add_argument(
-        "--pagenos", "-p", type=str,
+        "--pagenos",
        "-p",
        type=str,
        help="A comma-separated list of page numbers to parse. "
-             "Included for legacy applications, use --page-numbers "
+        "Included for legacy applications, use --page-numbers "
-             "for more idiomatic argument entry.")
+        "for more idiomatic argument entry.",
    )
    parse_params.add_argument(
-        "--maxpages", "-m", type=int, default=0,
+        "--maxpages",
-        help="The maximum number of pages to parse.")
+        "-m",
        type=int,
        default=0,
        help="The maximum number of pages to parse.",
    )
    parse_params.add_argument(
-        "--password", "-P", type=str, default="",
+        "--password",
-        help="The password to use for decrypting PDF file.")
+        "-P",
        type=str,
        default="",
        help="The password to use for decrypting PDF file.",
    )
    parse_params.add_argument(
-        "--rotation", "-R", default=0, type=int,
+        "--rotation",
        "-R",
        default=0,
        type=int,
        help="The number of degrees to rotate the PDF "
-             "before other types of processing.")
+        "before other types of processing.",
    )
    la_params = LAParams()  # will be used for defaults
    la_param_group = parser.add_argument_group(
-        'Layout analysis', description='Used during layout analysis.')
+        "Layout analysis", description="Used during layout analysis."
    )
    la_param_group.add_argument(
-        "--no-laparams", "-n", default=False, action="store_true",
+        "--no-laparams",
-        help="If layout analysis parameters should be ignored.")
+        "-n",
-    la_param_group.add_argument(
+        default=False,
        "--detect-vertical", "-V", default=la_params.detect_vertical,
        action="store_true",
-        help="If vertical text should be considered during layout analysis")
+        help="If layout analysis parameters should be ignored.",
    )
    la_param_group.add_argument(
-        "--line-overlap", type=float, default=la_params.line_overlap,
+        "--detect-vertical",
-        help='If two characters have more overlap than this they '
+        "-V",
-             'are considered to be on the same line. The overlap is specified '
+        default=la_params.detect_vertical,
-             'relative to the minimum height of both characters.')
+        action="store_true",
        help="If vertical text should be considered during layout analysis",
    )
    la_param_group.add_argument(
-        "--char-margin", "-M", type=float, default=la_params.char_margin,
+        "--line-overlap",
        type=float,
        default=la_params.line_overlap,
        help="If two characters have more overlap than this they "
        "are considered to be on the same line. The overlap is specified "
        "relative to the minimum height of both characters.",
    )
    la_param_group.add_argument(
        "--char-margin",
        "-M",
        type=float,
        default=la_params.char_margin,
        help="If two characters are closer together than this margin they "
-             "are considered to be part of the same line. The margin is "
+        "are considered to be part of the same line. The margin is "
-             "specified relative to the width of the character.")
+        "specified relative to the width of the character.",
    )
    la_param_group.add_argument(
-        "--word-margin", "-W", type=float, default=la_params.word_margin,
+        "--word-margin",
        "-W",
        type=float,
        default=la_params.word_margin,
        help="If two characters on the same line are further apart than this "
-             "margin then they are considered to be two separate words, and "
+        "margin then they are considered to be two separate words, and "
-             "an intermediate space will be added for readability. The margin "
+        "an intermediate space will be added for readability. The margin "
-             "is specified relative to the width of the character.")
+        "is specified relative to the width of the character.",
    )
    la_param_group.add_argument(
-        "--line-margin", "-L", type=float, default=la_params.line_margin,
+        "--line-margin",
        "-L",
        type=float,
        default=la_params.line_margin,
        help="If two lines are close together they are considered to "
-             "be part of the same paragraph. The margin is specified "
+        "be part of the same paragraph. The margin is specified "
-             "relative to the height of a line.")
+        "relative to the height of a line.",
    )
    la_param_group.add_argument(
-        "--boxes-flow", "-F", type=float_or_disabled,
+        "--boxes-flow",
        "-F",
        type=float_or_disabled,
        default=la_params.boxes_flow,
        help="Specifies how much a horizontal and vertical position of a "
-             "text matters when determining the order of lines. The value "
+        "text matters when determining the order of lines. The value "
-             "should be within the range of -1.0 (only horizontal position "
+        "should be within the range of -1.0 (only horizontal position "
-             "matters) to +1.0 (only vertical position matters). You can also "
+        "matters) to +1.0 (only vertical position matters). You can also "
-             "pass `disabled` to disable advanced layout analysis, and "
+        "pass `disabled` to disable advanced layout analysis, and "
-             "instead return text based on the position of the bottom left "
+        "instead return text based on the position of the bottom left "
-             "corner of the text box.")
+        "corner of the text box.",
    )
    la_param_group.add_argument(
-        "--all-texts", "-A", default=la_params.all_texts, action="store_true",
+        "--all-texts",
-        help="If layout analysis should be performed on text in figures.")
+        "-A",
        default=la_params.all_texts,
        action="store_true",
        help="If layout analysis should be performed on text in figures.",
    )
    output_params = parser.add_argument_group(
-        'Output', description='Used during output generation.')
+        "Output", description="Used during output generation."
    )
    output_params.add_argument(
-        "--outfile", "-o", type=str, default="-",
+        "--outfile",
        "-o",
        type=str,
        default="-",
        help="Path to file where output is written. "
-             "Or \"-\" (default) to write to stdout.")
+        'Or "-" (default) to write to stdout.',
    )
    output_params.add_argument(
-        "--output_type", "-t", type=str, default="text",
+        "--output_type",
-        help="Type of output to generate {text,html,xml,tag}.")
+        "-t",
        type=str,
        default="text",
        help="Type of output to generate {text,html,xml,tag}.",
    )
    output_params.add_argument(
-        "--codec", "-c", type=str, default="utf-8",
+        "--codec",
-        help="Text encoding to use in output file.")
+        "-c",
        type=str,
        default="utf-8",
        help="Text encoding to use in output file.",
    )
    output_params.add_argument(
-        "--output-dir", "-O", default=None,
+        "--output-dir",
        "-O",
        default=None,
        help="The output directory to put extracted images in. If not given, "
-             "images are not extracted.")
+        "images are not extracted.",
    )
    output_params.add_argument(
-        "--layoutmode", "-Y", default="normal",
+        "--layoutmode",
-        type=str, help="Type of layout to use when generating html "
+        "-Y",
-                       "{normal,exact,loose}. If normal,each line is"
+        default="normal",
-                       " positioned separately in the html. If exact"
+        type=str,
-                       ", each character is positioned separately in"
+        help="Type of layout to use when generating html "
-                       " the html. If loose, same result as normal "
+        "{normal,exact,loose}. If normal,each line is"
-                       "but with an additional newline after each "
+        " positioned separately in the html. If exact"
-                       "text line. Only used when output_type is html.")
+        ", each character is positioned separately in"
        " the html. If loose, same result as normal "
        "but with an additional newline after each "
        "text line. Only used when output_type is html.",
    )
    output_params.add_argument(
-        "--scale", "-s", type=float, default=1.0,
+        "--scale",
        "-s",
        type=float,
        default=1.0,
        help="The amount of zoom to use when generating html file. "
-             "Only used when output_type is html.")
+        "Only used when output_type is html.",
    )
    output_params.add_argument(
-        "--strip-control", "-S", default=False, action="store_true",
+        "--strip-control",
        "-S",
        default=False,
        action="store_true",
        help="Remove control statement from text. "
-             "Only used when output_type is xml.")
+        "Only used when output_type is xml.",
    )
    parsed_args = parser.parse_args(args=args)
@ -199,13 +289,10 @@ def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
        )
    if parsed_args.page_numbers:
-        parsed_args.page_numbers = {x-1 for x in parsed_args.page_numbers}
+        parsed_args.page_numbers = {x - 1 for x in parsed_args.page_numbers}
    if parsed_args.pagenos:
-        parsed_args.page_numbers = {
+        parsed_args.page_numbers = {int(x) - 1 for x in parsed_args.pagenos.split(",")}
            int(x) - 1
            for x in parsed_args.pagenos.split(",")
        }
    if parsed_args.output_type == "text" and parsed_args.outfile != "-":
        for override, alttype in OUTPUT_TYPES:
@ -222,5 +309,5 @@ def main(args: Optional[List[str]] = None) -> int:
    return 0
-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(main())
--- a/tools/pdfdiff.py
+++ b/tools/pdfdiff.py
@ -21,14 +21,20 @@ def compare(file1: str, file2: str, **kwargs: Any) -> Iterable[str]:
    # If any LAParams group arguments were passed,
    # create an LAParams object and
    # populate with given args. Otherwise, set it to None.
-    if kwargs.get('laparams', None) is None:
+    if kwargs.get("laparams", None) is None:
        laparams = layout.LAParams()
-        for param in ("all_texts", "detect_vertical", "word_margin",
+        for param in (
-                      "char_margin", "line_margin", "boxes_flow"):
+            "all_texts",
            "detect_vertical",
            "word_margin",
            "char_margin",
            "line_margin",
            "boxes_flow",
        ):
            paramv = kwargs.get(param, None)
            if paramv is not None:
                setattr(laparams, param, paramv)
-        kwargs['laparams'] = laparams
+        kwargs["laparams"] = laparams
    s1 = io.StringIO()
    with open(file1, "rb") as fp:
@ -39,81 +45,140 @@ def compare(file1: str, file2: str, **kwargs: Any) -> Iterable[str]:
        high_level.extract_text_to_fp(fp, s2, **kwargs)
    import difflib
    s1.seek(0)
    s2.seek(0)
    s1_lines, s2_lines = s1.readlines(), s2.readlines()
    import os.path
    try:
-        extension = os.path.splitext(kwargs['outfile'])[1][1:4]
+        extension = os.path.splitext(kwargs["outfile"])[1][1:4]
-        if extension.lower() == 'htm':
+        if extension.lower() == "htm":
            return difflib.HtmlDiff().make_file(s1_lines, s2_lines)
    except KeyError:
        pass
-    return difflib.unified_diff(s1_lines, s2_lines, n=kwargs['context_lines'])
+    return difflib.unified_diff(s1_lines, s2_lines, n=kwargs["context_lines"])
 # main
 def main(args: Optional[List[str]] = None) -> int:
    import argparse
    P = argparse.ArgumentParser(description=__doc__)
    P.add_argument("file1", type=str, default=None, help="File 1 to compare.")
    P.add_argument("file2", type=str, default=None, help="File 2 to compare.")
-    P.add_argument("-o", "--outfile", type=str, default="-",
+    P.add_argument(
-                   help="Output file(default/'-' is stdout) if .htm or .html,"
+        "-o",
-                        " create an HTML table (or a complete HTML file "
+        "--outfile",
-                        "containing the table) showing a side by side, "
+        type=str,
-                        "line by line comparison of text with inter-line and "
+        default="-",
-                        "intra-line change  highlights. The table can be "
+        help="Output file(default/'-' is stdout) if .htm or .html,"
-                        "generated in either full or "
+        " create an HTML table (or a complete HTML file "
-                        "contextual difference mode.")
+        "containing the table) showing a side by side, "
-    P.add_argument("-N", "--context-lines", default=3, type=int,
+        "line by line comparison of text with inter-line and "
-                   help="context lines shown")
+        "intra-line change  highlights. The table can be "
-    P.add_argument("-d", "--debug", default=False, action="store_true",
+        "generated in either full or "
-                   help="Debug output.")
+        "contextual difference mode.",
    )
    P.add_argument(
        "-N", "--context-lines", default=3, type=int, help="context lines shown"
    )
    P.add_argument(
        "-d", "--debug", default=False, action="store_true", help="Debug output."
    )
    # params for pdf2txt
-    P.add_argument("-p", "--pagenos", type=str,
+    P.add_argument(
-                   help="Comma-separated list of page numbers to parse. "
+        "-p",
-                        "Included for legacy applications, "
+        "--pagenos",
-                        "use --page-numbers for more "
+        type=str,
-                        "idiomatic argument entry.")
+        help="Comma-separated list of page numbers to parse. "
-    P.add_argument("--page-numbers", type=int, default=None, nargs="+",
+        "Included for legacy applications, "
-                   help="Alternative to --pagenos with space-separated "
+        "use --page-numbers for more "
-                        "numbers; supercedes --pagenos where it is used.")
+        "idiomatic argument entry.",
-    P.add_argument("-m", "--maxpages", type=int, default=0,
+    )
-                   help="Maximum pages to parse")
+    P.add_argument(
-    P.add_argument("-P", "--password", type=str, default="",
+        "--page-numbers",
-                   help="Decryption password for both PDFs")
+        type=int,
-    P.add_argument("-t", "--output_type", type=str, default="text",
+        default=None,
-                   help="pdf2txt type: text|html|xml|tag (default is text)")
+        nargs="+",
-    P.add_argument("-c", "--codec", type=str, default="utf-8",
+        help="Alternative to --pagenos with space-separated "
-                   help="Text encoding")
+        "numbers; supercedes --pagenos where it is used.",
    )
    P.add_argument(
        "-m", "--maxpages", type=int, default=0, help="Maximum pages to parse"
    )
    P.add_argument(
        "-P",
        "--password",
        type=str,
        default="",
        help="Decryption password for both PDFs",
    )
    P.add_argument(
        "-t",
        "--output_type",
        type=str,
        default="text",
        help="pdf2txt type: text|html|xml|tag (default is text)",
    )
    P.add_argument("-c", "--codec", type=str, default="utf-8", help="Text encoding")
    P.add_argument("-s", "--scale", type=float, default=1.0, help="Scale")
-    P.add_argument("-A", "--all-texts", default=None, action="store_true",
+    P.add_argument(
-                   help="LAParams all texts")
+        "-A",
-    P.add_argument("-V", "--detect-vertical", default=None,
+        "--all-texts",
-                   action="store_true", help="LAParams detect vertical")
+        default=None,
-    P.add_argument("-W", "--word-margin", type=float, default=None,
+        action="store_true",
-                   help="LAParams word margin")
+        help="LAParams all texts",
-    P.add_argument("-M", "--char-margin", type=float, default=None,
+    )
-                   help="LAParams char margin")
+    P.add_argument(
-    P.add_argument("-L", "--line-margin", type=float, default=None,
+        "-V",
-                   help="LAParams line margin")
+        "--detect-vertical",
-    P.add_argument("-F", "--boxes-flow", type=float, default=None,
+        default=None,
-                   help="LAParams boxes flow")
+        action="store_true",
-    P.add_argument("-Y", "--layoutmode", default="normal", type=str,
+        help="LAParams detect vertical",
-                   help="HTML Layout Mode")
+    )
-    P.add_argument("-n", "--no-laparams", default=False,
+    P.add_argument(
-                   action="store_true", help="Pass None as LAParams")
+        "-W", "--word-margin", type=float, default=None, help="LAParams word margin"
-    P.add_argument("-R", "--rotation", default=0, type=int,
+    )
-                   help="Rotation")
+    P.add_argument(
-    P.add_argument("-O", "--output-dir", default=None,
+        "-M", "--char-margin", type=float, default=None, help="LAParams char margin"
-                   help="Output directory for images")
+    )
-    P.add_argument("-C", "--disable-caching", default=False,
+    P.add_argument(
-                   action="store_true", help="Disable caching")
+        "-L", "--line-margin", type=float, default=None, help="LAParams line margin"
-    P.add_argument("-S", "--strip-control", default=False,
+    )
-                   action="store_true", help="Strip control in XML mode")
+    P.add_argument(
        "-F", "--boxes-flow", type=float, default=None, help="LAParams boxes flow"
    )
    P.add_argument(
        "-Y", "--layoutmode", default="normal", type=str, help="HTML Layout Mode"
    )
    P.add_argument(
        "-n",
        "--no-laparams",
        default=False,
        action="store_true",
        help="Pass None as LAParams",
    )
    P.add_argument("-R", "--rotation", default=0, type=int, help="Rotation")
    P.add_argument(
        "-O", "--output-dir", default=None, help="Output directory for images"
    )
    P.add_argument(
        "-C",
        "--disable-caching",
        default=False,
        action="store_true",
        help="Disable caching",
    )
    P.add_argument(
        "-S",
        "--strip-control",
        default=False,
        action="store_true",
        help="Strip control in XML mode",
    )
    A = P.parse_args(args=args)
@ -121,26 +186,28 @@ def main(args: Optional[List[str]] = None) -> int:
        logging.getLogger().setLevel(logging.DEBUG)
    if A.page_numbers:
-        A.page_numbers = {x-1 for x in A.page_numbers}
+        A.page_numbers = {x - 1 for x in A.page_numbers}
    if A.pagenos:
-        A.page_numbers = {int(x)-1 for x in A.pagenos.split(",")}
+        A.page_numbers = {int(x) - 1 for x in A.pagenos.split(",")}
    if A.output_type == "text" and A.outfile != "-":
-        for override, alttype in ((".htm",  "html"),
+        for override, alttype in (
-                                  (".html", "html"),
+            (".htm", "html"),
-                                  (".xml",  "xml"),
+            (".html", "html"),
-                                  (".tag",  "tag")):
+            (".xml", "xml"),
            (".tag", "tag"),
        ):
            if A.outfile.endswith(override):
                A.output_type = alttype
    if A.outfile == "-":
        outfp = sys.stdout
    else:
-        outfp = open(A.outfile, "w", encoding='utf-8')
+        outfp = open(A.outfile, "w", encoding="utf-8")
    outfp.writelines(compare(**vars(A)))
    outfp.close()
    return 0
-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(main())
--- a/tools/pdfstats.py
+++ b/tools/pdfstats.py
@ -21,7 +21,7 @@ _, SCRIPT = os.path.split(__file__)
 def msg(*args: object, **kwargs: Any) -> None:
-    print(' '.join(map(str, args)), **kwargs)  # noqa E999
+    print(" ".join(map(str, args)), **kwargs)  # noqa E999
 def flat_iter(obj: object) -> Iterator[object]:
@ -35,22 +35,22 @@ def main(args: List[str]) -> int:
    msg(SCRIPT, args)
    if len(args) != 1:
-        msg('Parse a PDF file and print some pdfminer-specific stats')
+        msg("Parse a PDF file and print some pdfminer-specific stats")
-        msg('Usage:', SCRIPT, '<PDF-filename>')
+        msg("Usage:", SCRIPT, "<PDF-filename>")
        return 1
-    infilename, = args
+    (infilename,) = args
    lt_types: Counter[str] = collections.Counter()
-    with open(infilename, 'rb') as pdf_file:
+    with open(infilename, "rb") as pdf_file:
        # Create a PDF parser object associated with the file object.
        parser = PDFParser(pdf_file)
        # Create a PDF document object that stores the document structure.
        # Supply the password for initialization.
-        password = ''
+        password = ""
        document = PDFDocument(parser, password)
        # Check if the document allows text extraction.
        if not document.is_extractable:
@ -63,7 +63,7 @@ def main(args: List[str]) -> int:
        laparams = LAParams(
            detect_vertical=True,
            all_texts=True,
-            )
+        )
        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
        interpreter = PDFPageInterpreter(rsrcmgr, device)
@ -75,11 +75,11 @@ def main(args: List[str]) -> int:
            lt_types.update(type(item).__name__ for item in flat_iter(layout))
-    msg('page_count', page_count)
+    msg("page_count", page_count)
-    msg('lt_types:', ' '.join('{}:{}'.format(*tc) for tc in lt_types.items()))
+    msg("lt_types:", " ".join("{}:{}".format(*tc) for tc in lt_types.items()))
    return 0
-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
--- a/tools/prof.py
+++ b/tools/prof.py
@ -7,15 +7,16 @@ def prof_main(argv: List[str]) -> int:
    import hotshot.stats  # type: ignore[import]
    def usage() -> int:
-        print('usage: %s module.function [args ...]' % argv[0])
+        print("usage: %s module.function [args ...]" % argv[0])
        return 100
    args = argv[1:]
    if len(args) < 1:
        return usage()
    name = args.pop(0)
-    prof = name+'.prof'
+    prof = name + ".prof"
-    i = name.rindex('.')
+    i = name.rindex(".")
-    (modname, funcname) = (name[:i], name[i+1:])
+    (modname, funcname) = (name[:i], name[i + 1 :])
    # Type error: fromlist expects sequence of strings; presumably the intent
    # is to retrieve the named module rather than a top-level package (as in
@ -31,10 +32,10 @@ def prof_main(argv: List[str]) -> int:
    else:
        stats = hotshot.stats.load(prof)
        stats.strip_dirs()
-        stats.sort_stats('time', 'calls')
+        stats.sort_stats("time", "calls")
        stats.print_stats(1000)
    return 0
-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(prof_main(sys.argv))