Add type annotations (#661)

Squashed commit of the following: commit fa229f7b7591c07aea4e5a4545f9e0c34246e1cd Merge: eaab3c6 c3e3499 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 20:33:06 2021 -0700 Merge branch 'develop' into mypy (and fixed types) commit eaab3c65e2e3ab5f1f400cfc5186a3834c4ffe34 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 20:00:45 2021 -0700 reformat all multi-line function defs to one-arg-per-line commit 3fe2b69eed9197009d9da6776462f580ebf0dfa3 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 15:58:48 2021 -0700 ccitt nit -- avoid casting needlessly commit 15983d8c1e7162632fde43752c9d1c15938cd980 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 15:58:36 2021 -0700 tweak CHANGELOG commit 13dc0babf782938e7d5b5e482d4c5adf92d82702 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 15:43:46 2021 -0700 add failing tests for dumppdf crash commit 6b509c517876b8c15ac5a98a963884e23bd2e4d8 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 15:24:23 2021 -0700 ccitt: apply misc PR feedback commit feb031ba86d3f22e41cfbbda13f17c039359f1e6 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 15:18:26 2021 -0700 add missing None return type to all __init__ methods commit c0d62d6c54c7ec37b40bea54a3f6a7a618ec0ec6 Author: Andrew Baumann <ab@ab.id.au> Date: Mon Sep 6 15:13:08 2021 -0700 minor cleanup, remove a few more Any types commit b52a0594e1998a492c172538a9b35491c5fc5f52 Author: Andrew Baumann <ab@ab.id.au> Date: Sun Sep 5 22:37:28 2021 -0700 tighten up types, avoid Any in favour of explicit casts commit e58fd48bd14f31bebd2de8259f12630ac02756d6 Author: Andrew Baumann <ab@ab.id.au> Date: Sun Sep 5 14:10:49 2021 -0700 annotate ccitt.py, and fix one definite bug (array.tostring was renamed tobytes) commit 605290633e55595e5e0045840df5c5b1d9de843a Author: Andrew Baumann <ab@ab.id.au> Date: Sat Sep 4 22:37:38 2021 -0700 python 3.7 back-compat commit 4dbcf8760f8a1d3e3d99f085476f86e6a043c80c Author: Andrew Baumann <ab@ab.id.au> Date: Sat Sep 4 22:32:43 2021 -0700 annotate pdfminer.jbig2 commit 0d40b7c03a8028dc44acd3f457eac71abd681827 Author: Andrew Baumann <ab@ab.id.au> Date: Sat Sep 4 22:31:33 2021 -0700 annotate pdf2txt.py commit 5f82eb4f5646b5d1285252689191e0a14557ec7b Author: Andrew Baumann <ab@ab.id.au> Date: Sat Sep 4 09:16:31 2021 -0700 cleanup: make Plane generic commit 624fc92b88473ff36a174760883f34c22109da2b Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 23:16:51 2021 -0700 bluntly ignore calls to cryptography.hazmat commit 96b20439c169f40dbb114cabba6a582ad1ebe91e Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 23:01:06 2021 -0700 finish annotating, and disallow_untyped_defs for pdfminer.* _except_ ccitt and jbig2 commit 0ab586347861b72b1d16880dc9293f9ad597e20a Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 21:51:56 2021 -0700 annotate pdffont commit 4b689f1bcbdaf654feb9de81023e318ca310a12e Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 18:30:02 2021 -0700 annotate a couple more scripts; document sketchy code commit 291981ff3d273952ec9c92ef8ab948473558b787 Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 15:02:01 2021 -0700 pacify flake8 commit 45d2ce91ff333f3b7e34322b16e9c52b99b7a972 Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 14:31:48 2021 -0700 annotate dumppdf, and comment likely bugs commit 7278d83851cb336a1be3803a0993b5ec0ad39b4c Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 13:49:58 2021 -0700 enable mypy on tests and tools, fix one implicit reexport bug commit 4a83166ef4e4733cd2113f43188b585a4fda392b Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 13:25:59 2021 -0700 pdfdocument: per dumppdf.py, get_dest accepts either bytes or str commit 43701e1bee068df98f378a253c9c2150ee4ad9f7 Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 13:25:00 2021 -0700 layout: LAParams.boxes_flow may be None commit 164f81652f1788e74837466f0ab593e94079bc0f Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 09:45:09 2021 -0700 add whitespace, pacify flake8 commit 893b9fb9ec918032b36a30456fc0b7a217da86d8 Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 09:40:33 2021 -0700 support old Python without typing.Protocol commit dc245084102b7b04c3f5599d75b5d62ba4290787 Author: Andrew Baumann <ab@ab.id.au> Date: Fri Sep 3 09:12:03 2021 -0700 Move "# type: ignore" comments to fix mypy on Python < 3.8 The placement of these comments got more flexible in 3.8 due to https://github.com/python/mypy/issues/1032 Satisfying older Python and fitting in flake8's 79-character line limit was quite a challenge! commit da03afe7bd2cf3336e611f467f1c901455940ae8 Author: Andrew Baumann <ab@ab.id.au> Date: Thu Sep 2 22:59:58 2021 -0700 fix text output from HTMLConverter commit 5401276a2ed3b74a385ebcab5152485224146161 Author: Andrew Baumann <ab@ab.id.au> Date: Thu Sep 2 22:40:22 2021 -0700 annotate high_level.py and the immediately-reachable internal APIs (mostly converters) commit cc490513f8f17a7adc0bcbab2e0e86f37e832300 Author: Andrew Baumann <ab@ab.id.au> Date: Thu Sep 2 17:04:35 2021 -0700 * expand and improve annotations in cmap, encryption/decompression and fonts * disallow untyped calls; this way, we have a core set of typed code that can grow over time (just not for ccitt, because there's a ton of work lurking there) * expand "typing: none" comments to suppress a specific error code commit 92df54ba1d53d5dbbd5442757dd85be5b1851f99 Author: Andrew Baumann <ab@ab.id.au> Date: Wed Sep 1 20:50:59 2021 -0700 update CHANGELOG commit f72aaead45d0615e472a9b3190c9551a6b67b36e Merge: ff787a9 8ea9f10 Author: Andrew Baumann <ab@ab.id.au> Date: Wed Sep 1 20:47:03 2021 -0700 Merge branch 'develop' into mypy commit ff787a93986c60361536a97182a41774f4a53ac3 Author: Andrew Baumann <ab@ab.id.au> Date: Sat Aug 21 21:46:14 2021 -0700 be more precise about types on ps/pdf stacks, remove most of the Any annotations commit be1550189e10717f6827dbb7009d6e8c8b3f4c62 Author: Andrew Baumann <ab@ab.id.au> Date: Sat Aug 21 10:13:58 2021 -0700 silence missing imports, (maybe?) hook to tox commit ff4b6a9bd46b352583d823d39065652c9a6f05f4 Author: Andrew Baumann <ab@ab.id.au> Date: Fri Aug 20 22:49:06 2021 -0700 turn on more strict checks, and untangle the layout mess with generics Status: $ mypy pdfminer pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame" pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs pdfminer/pdfdevice.py:191: error: Argument 1 to "write" of "IO" has incompatible type "str"; expected "bytes" pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL" Found 5 errors in 4 files (checked 27 source files) pdfdevice.py:191 appears to be a real bug commit 5c9c0b19d26ae391aea0e69c2c819261cc04460c Author: Andrew Baumann <ab@ab.id.au> Date: Fri Aug 20 17:22:41 2021 -0700 finish annotating layout commit 0e6871c16abb29df2868ab145b4ce451b4b6c777 Author: Andrew Baumann <ab@ab.id.au> Date: Fri Aug 20 16:54:46 2021 -0700 general progress on annotations * finish utils * annotate more of pdfinterp, pdfdevice * document reason for # type: ignore comments * fix cyclic imports * satisfy flake8 commit 17d59f42917fbf9b2b2eb844d3e83a8f2a3f123a Author: Andrew Baumann <ab@ab.id.au> Date: Thu Aug 19 21:38:50 2021 -0700 WIP on type annotations With the possible exception of psparser.py, this is far from complete. $ mypy pdfminer pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame" pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
2021-10-09 07:23:28 -07:00 · 2021-10-09 07:23:28 -07:00 · 9406040d8e
parent 33d7dde4d1
commit 9406040d8e
38 changed files with 2155 additions and 1101 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Added
 - Add support for PDF 2.0 (ISO 32000-2) AES-256 encryption ([#614](https://github.com/pdfminer/pdfminer.six/pull/614))
 - Support for Paeth PNG filter compression (predictor value = 4) ([#537](https://github.com/pdfminer/pdfminer.six/pull/537))
+- Type annotations ([#661](https://github.com/pdfminer/pdfminer.six/pull/661))

 ### Fixed
 - `KeyError` when `'Encrypt'` but not `'ID'` present in `trailer` ([#594](https://github.com/pdfminer/pdfminer.six/pull/594))
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -12,6 +12,7 @@

 import os
 import sys
+from typing import List

 import pdfminer

@ -48,7 +49,7 @@ templates_path = ['_templates']
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+exclude_patterns: List[str] = []


 # -- Options for HTML output -------------------------------------------------
--- a/mypy.ini
+++ b/mypy.ini
@ -0,0 +1,27 @@
+[mypy]
+warn_unused_configs = True
+disallow_any_generics = True
+disallow_subclassing_any = True
+disallow_untyped_calls = True
+disallow_incomplete_defs = True
+disallow_untyped_decorators = True
+no_implicit_optional = True
+warn_redundant_casts = True
+warn_return_any = True
+no_implicit_reexport = True
+strict_equality = True
+
+# This seems impossible to turn on in a version-independent manner
+warn_unused_ignores = False
+
+[mypy-pdfminer.*]
+disallow_untyped_defs = True
+
+[mypy-cryptography.hazmat.*]
+ignore_missing_imports = True
+
+[mypy-nose.*]
+ignore_missing_imports = True
+
+[mypy-setuptools]
+ignore_missing_imports = True
--- a/pdfminer/_saslprep.py
+++ b/pdfminer/_saslprep.py
@ -21,10 +21,11 @@
 __all__ = ['saslprep']

 import stringprep
+from typing import Callable, Tuple
 import unicodedata

 # RFC4013 section 2.3 prohibited output.
-_PROHIBITED = (
+_PROHIBITED: Tuple[Callable[[str], bool], ...] = (
    # A strict reading of RFC 4013 requires table c12 here, but
    # characters from it are mapped to SPACE in the Map step. Can
    # normalization reintroduce them somehow?
@ -39,7 +40,7 @@ _PROHIBITED = (
    stringprep.in_table_c9)


-def saslprep(data: str, prohibit_unassigned_code_points=True) -> str:
+def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
    """An implementation of RFC4013 SASLprep.
    :param data:
        The string to SASLprep.
--- a/pdfminer/arcfour.py
+++ b/pdfminer/arcfour.py
@ -5,9 +5,12 @@ This code is in the public domain.
 """


+from typing import Sequence
+
+
 class Arcfour:

-    def __init__(self, key):
+    def __init__(self, key: Sequence[int]) -> None:
        # because Py3 range is not indexable
        s = [i for i in range(256)]
        j = 0
@ -19,7 +22,7 @@ class Arcfour:
        (self.i, self.j) = (0, 0)
        return

-    def process(self, data):
+    def process(self, data: bytes) -> bytes:
        (i, j) = (self.i, self.j)
        s = self.s
        r = b''
--- a/pdfminer/ascii85.py
+++ b/pdfminer/ascii85.py
@ -9,7 +9,7 @@ import struct


 # ascii85decode(data)
-def ascii85decode(data):
+def ascii85decode(data: bytes) -> bytes:
    """
    In ASCII85 encoding, every four bytes are encoded with five ASCII
    letters, using 85 different types of characters (as 256**4 < 85**5).
@ -47,7 +47,7 @@ hex_re = re.compile(br'([a-f\d]{2})', re.IGNORECASE)
 trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)


-def asciihexdecode(data):
+def asciihexdecode(data: bytes) -> bytes:
    """
    ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
    For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
@ -57,7 +57,7 @@ def asciihexdecode(data):
    the EOD marker after reading an odd number of hexadecimal digits, it
    will behave as if a 0 followed the last digit.
    """
-    def decode(x):
+    def decode(x: bytes) -> bytes:
        i = int(x, 16)
        return bytes((i,))

--- a/pdfminer/ccitt.py
+++ b/pdfminer/ccitt.py
@ -11,25 +11,39 @@
 #    FOR GROUP 4 FACSIMILE APPARATUS"


-import sys
 import array
+from typing import (Any, Callable, Dict, Iterator, List, MutableSequence,
+                    Optional, Sequence, Union, cast)


-def get_bytes(data):
+def get_bytes(data: bytes) -> Iterator[int]:
    yield from data


+# Workaround https://github.com/python/mypy/issues/731
+BitParserState = MutableSequence[Any]
+# A better definition (not supported by mypy) would be:
+# BitParserState = MutableSequence[Union["BitParserState", int, str, None]]
+
+
 class BitParser:
-    def __init__(self):
+    _state: BitParserState
+
+    # _accept is declared Optional solely as a workaround for
+    # https://github.com/python/mypy/issues/708
+    _accept: Optional[Callable[[Any], BitParserState]]
+
+    def __init__(self) -> None:
        self._pos = 0
        return

    @classmethod
-    def add(cls, root, v, bits):
-        p = root
+    def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None:
+        p: BitParserState = root
        b = None
        for i in range(len(bits)):
            if 0 < i:
+                assert b is not None
                if p[b] is None:
                    p[b] = [None, None]
                p = p[b]
@ -37,16 +51,17 @@ class BitParser:
                b = 1
            else:
                b = 0
+        assert b is not None
        p[b] = v
        return

-    def feedbytes(self, data):
+    def feedbytes(self, data: bytes) -> None:
        for byte in get_bytes(data):
            for m in (128, 64, 32, 16, 8, 4, 2, 1):
                self._parse_bit(byte & m)
        return

-    def _parse_bit(self, x):
+    def _parse_bit(self, x: object) -> None:
        if x:
            v = self._state[1]
        else:
@ -55,6 +70,7 @@ class BitParser:
        if isinstance(v, list):
            self._state = v
        else:
+            assert self._accept is not None
            self._state = self._accept(v)
        return

@ -318,14 +334,16 @@ class CCITTG4Parser(BitParser):
    class ByteSkip(Exception):
        pass

-    def __init__(self, width, bytealign=False):
+    _color: int
+
+    def __init__(self, width: int, bytealign: bool = False) -> None:
        BitParser.__init__(self)
        self.width = width
        self.bytealign = bytealign
        self.reset()
        return

-    def feedbytes(self, data):
+    def feedbytes(self, data: bytes) -> None:
        for byte in get_bytes(data):
            try:
                for m in (128, 64, 32, 16, 8, 4, 2, 1):
@ -337,7 +355,7 @@ class CCITTG4Parser(BitParser):
                break
        return

-    def _parse_mode(self, mode):
+    def _parse_mode(self, mode: object) -> BitParserState:
        if mode == 'p':
            self._do_pass()
            self._flush_line()
@ -361,7 +379,7 @@ class CCITTG4Parser(BitParser):
        else:
            raise self.InvalidData(mode)

-    def _parse_horiz1(self, n):
+    def _parse_horiz1(self, n: Any) -> BitParserState:
        if n is None:
            raise self.InvalidData
        self._n1 += n
@ -374,7 +392,7 @@ class CCITTG4Parser(BitParser):
        else:
            return self.BLACK

-    def _parse_horiz2(self, n):
+    def _parse_horiz2(self, n: Any) -> BitParserState:
        if n is None:
            raise self.InvalidData
        self._n2 += n
@ -389,7 +407,7 @@ class CCITTG4Parser(BitParser):
        else:
            return self.BLACK

-    def _parse_uncompressed(self, bits):
+    def _parse_uncompressed(self, bits: Optional[str]) -> BitParserState:
        if not bits:
            raise self.InvalidData
        if bits.startswith('T'):
@ -401,10 +419,10 @@ class CCITTG4Parser(BitParser):
            self._do_uncompressed(bits)
            return self.UNCOMPRESSED

-    def _get_bits(self):
+    def _get_bits(self) -> str:
        return ''.join(str(b) for b in self._curline[:self._curpos])

-    def _get_refline(self, i):
+    def _get_refline(self, i: int) -> str:
        if i < 0:
            return '[]'+''.join(str(b) for b in self._refline)
        elif len(self._refline) <= i:
@ -414,7 +432,7 @@ class CCITTG4Parser(BitParser):
                    '['+str(self._refline[i])+']' +
                    ''.join(str(b) for b in self._refline[i+1:]))

-    def reset(self):
+    def reset(self) -> None:
        self._y = 0
        self._curline = array.array('b', [1]*self.width)
        self._reset_line()
@ -422,18 +440,18 @@ class CCITTG4Parser(BitParser):
        self._state = self.MODE
        return

-    def output_line(self, y, bits):
+    def output_line(self, y: int, bits: Sequence[int]) -> None:
        print(y, ''.join(str(b) for b in bits))
        return

-    def _reset_line(self):
+    def _reset_line(self) -> None:
        self._refline = self._curline
        self._curline = array.array('b', [1]*self.width)
        self._curpos = -1
        self._color = 1
        return

-    def _flush_line(self):
+    def _flush_line(self) -> None:
        if self.width <= self._curpos:
            self.output_line(self._y, self._curline)
            self._y += 1
@ -442,7 +460,7 @@ class CCITTG4Parser(BitParser):
                raise self.ByteSkip
        return

-    def _do_vertical(self, dx):
+    def _do_vertical(self, dx: int) -> None:
        x1 = self._curpos+1
        while 1:
            if x1 == 0:
@ -467,7 +485,7 @@ class CCITTG4Parser(BitParser):
        self._color = 1-self._color
        return

-    def _do_pass(self):
+    def _do_pass(self) -> None:
        x1 = self._curpos+1
        while 1:
            if x1 == 0:
@ -494,7 +512,7 @@ class CCITTG4Parser(BitParser):
        self._curpos = x1
        return

-    def _do_horizontal(self, n1, n2):
+    def _do_horizontal(self, n1: int, n2: int) -> None:
        if self._curpos < 0:
            self._curpos = 0
        x = self._curpos
@ -511,7 +529,7 @@ class CCITTG4Parser(BitParser):
        self._curpos = x
        return

-    def _do_uncompressed(self, bits):
+    def _do_uncompressed(self, bits: str) -> None:
        for c in bits:
            self._curline[self._curpos] = int(c)
            self._curpos += 1
@ -521,32 +539,33 @@ class CCITTG4Parser(BitParser):

 class CCITTFaxDecoder(CCITTG4Parser):

-    def __init__(self, width, bytealign=False, reversed=False):
+    def __init__(self, width: int, bytealign: bool = False,
+                 reversed: bool = False) -> None:
        CCITTG4Parser.__init__(self, width, bytealign=bytealign)
        self.reversed = reversed
        self._buf = b''
        return

-    def close(self):
+    def close(self) -> bytes:
        return self._buf

-    def output_line(self, y, bits):
-        bytes = array.array('B', [0]*((len(bits)+7)//8))
+    def output_line(self, y: int, bits: Sequence[int]) -> None:
+        arr = array.array('B', [0]*((len(bits)+7)//8))
        if self.reversed:
            bits = [1-b for b in bits]
        for (i, b) in enumerate(bits):
            if b:
-                bytes[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
-        self._buf += bytes.tostring()
+                arr[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
+        self._buf += arr.tobytes()
        return


-def ccittfaxdecode(data, params):
+def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes:
    K = params.get('K')
-    cols = params.get('Columns')
-    bytealign = params.get('EncodedByteAlign')
-    reversed = params.get('BlackIs1')
    if K == -1:
+        cols = cast(int, params.get('Columns'))
+        bytealign = cast(bool, params.get('EncodedByteAlign'))
+        reversed = cast(bool, params.get('BlackIs1'))
        parser = CCITTFaxDecoder(cols, bytealign=bytealign, reversed=reversed)
    else:
        raise ValueError(K)
@ -555,19 +574,20 @@ def ccittfaxdecode(data, params):


 # test
-def main(argv):
+def main(argv: List[str]) -> None:
    if not argv[1:]:
        import unittest
-        return unittest.main()
+        unittest.main()
+        return

    class Parser(CCITTG4Parser):
-        def __init__(self, width, bytealign=False):
-            import pygame
+        def __init__(self, width: int, bytealign: bool = False) -> None:
+            import pygame  # type: ignore[import]
            CCITTG4Parser.__init__(self, width, bytealign=bytealign)
            self.img = pygame.Surface((self.width, 1000))
            return

-        def output_line(self, y, bits):
+        def output_line(self, y: int, bits: Sequence[int]) -> None:
            for (x, b) in enumerate(bits):
                if b:
                    self.img.set_at((x, y), (255, 255, 255))
@ -575,7 +595,7 @@ def main(argv):
                    self.img.set_at((x, y), (0, 0, 0))
            return

-        def close(self):
+        def close(self) -> None:
            import pygame
            pygame.image.save(self.img, 'out.bmp')
            return
@ -587,7 +607,3 @@ def main(argv):
        parser.close()
        fp.close()
    return
-
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@ -16,9 +16,12 @@ import gzip
 import pickle as pickle
 import struct
 import logging
+from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List,
+                    MutableMapping, Optional, TextIO, Tuple, Union, cast)
 from .psparser import PSStackParser
 from .psparser import PSSyntaxError
 from .psparser import PSEOF
+from .psparser import PSKeyword
 from .psparser import PSLiteral
 from .psparser import literal_name
 from .psparser import KWD
@ -38,44 +41,48 @@ class CMapBase:

    debug = 0

-    def __init__(self, **kwargs):
-        self.attrs = kwargs.copy()
+    def __init__(self, **kwargs: object) -> None:
+        self.attrs: MutableMapping[str, object] = kwargs.copy()
        return

-    def is_vertical(self):
+    def is_vertical(self) -> bool:
        return self.attrs.get('WMode', 0) != 0

-    def set_attr(self, k, v):
+    def set_attr(self, k: str, v: object) -> None:
        self.attrs[k] = v
        return

-    def add_code2cid(self, code, cid):
+    def add_code2cid(self, code: str, cid: int) -> None:
        return

-    def add_cid2unichr(self, cid, code):
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
+                       ) -> None:
        return

-    def use_cmap(self, cmap):
+    def use_cmap(self, cmap: "CMapBase") -> None:
        return

+    def decode(self, code: bytes) -> Iterable[int]:
+        raise NotImplementedError
+

 class CMap(CMapBase):

-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Union[str, int]) -> None:
        CMapBase.__init__(self, **kwargs)
-        self.code2cid = {}
+        self.code2cid: Dict[int, object] = {}
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<CMap: %s>' % self.attrs.get('CMapName')

-    def use_cmap(self, cmap):
+    def use_cmap(self, cmap: CMapBase) -> None:
        assert isinstance(cmap, CMap), str(type(cmap))

-        def copy(dst, src):
+        def copy(dst: Dict[int, object], src: Dict[int, object]) -> None:
            for (k, v) in src.items():
                if isinstance(v, dict):
-                    d = {}
+                    d: Dict[int, object] = {}
                    dst[k] = d
                    copy(d, v)
                else:
@ -83,20 +90,24 @@ class CMap(CMapBase):
        copy(self.code2cid, cmap.code2cid)
        return

-    def decode(self, code):
+    def decode(self, code: bytes) -> Iterator[int]:
        log.debug('decode: %r, %r', self, code)
        d = self.code2cid
        for i in iter(code):
            if i in d:
-                d = d[i]
-                if isinstance(d, int):
-                    yield d
+                x = d[i]
+                if isinstance(x, int):
+                    yield x
                    d = self.code2cid
+                else:
+                    d = cast(Dict[int, object], x)
            else:
                d = self.code2cid
        return

-    def dump(self, out=sys.stdout, code2cid=None, code=None):
+    def dump(self, out: TextIO = sys.stdout,
+             code2cid: Optional[Dict[int, object]] = None,
+             code: Tuple[int, ...] = ()) -> None:
        if code2cid is None:
            code2cid = self.code2cid
            code = ()
@ -105,13 +116,13 @@ class CMap(CMapBase):
            if isinstance(v, int):
                out.write('code %r = cid %d\n' % (c, v))
            else:
-                self.dump(out=out, code2cid=v, code=c)
+                self.dump(out=out, code2cid=cast(Dict[int, object], v), code=c)
        return


 class IdentityCMap(CMapBase):

-    def decode(self, code):
+    def decode(self, code: bytes) -> Tuple[int, ...]:
        n = len(code)//2
        if n:
            return struct.unpack('>%dH' % n, code)
@ -121,7 +132,7 @@ class IdentityCMap(CMapBase):

 class IdentityCMapByte(IdentityCMap):

-    def decode(self, code):
+    def decode(self, code: bytes) -> Tuple[int, ...]:
        n = len(code)
        if n:
            return struct.unpack('>%dB' % n, code)
@ -131,19 +142,19 @@ class IdentityCMapByte(IdentityCMap):

 class UnicodeMap(CMapBase):

-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Union[str, int]) -> None:
        CMapBase.__init__(self, **kwargs)
-        self.cid2unichr = {}
+        self.cid2unichr: Dict[int, str] = {}
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<UnicodeMap: %s>' % self.attrs.get('CMapName')

-    def get_unichr(self, cid):
+    def get_unichr(self, cid: int) -> str:
        log.debug('get_unichr: %r, %r', self, cid)
        return self.cid2unichr[cid]

-    def dump(self, out=sys.stdout):
+    def dump(self, out: TextIO = sys.stdout) -> None:
        for (k, v) in sorted(self.cid2unichr.items()):
            out.write('cid %d = unicode %r\n' % (k, v))
        return
@ -151,29 +162,31 @@ class UnicodeMap(CMapBase):

 class FileCMap(CMap):

-    def add_code2cid(self, code, cid):
+    def add_code2cid(self, code: str, cid: int) -> None:
        assert isinstance(code, str) and isinstance(cid, int),\
            str((type(code), type(cid)))
        d = self.code2cid
        for c in code[:-1]:
-            c = ord(c)
-            if c in d:
-                d = d[c]
+            ci = ord(c)
+            if ci in d:
+                d = cast(Dict[int, object], d[ci])
            else:
-                t = {}
-                d[c] = t
+                t: Dict[int, object] = {}
+                d[ci] = t
                d = t
-        c = ord(code[-1])
-        d[c] = cid
+        ci = ord(code[-1])
+        d[ci] = cid
        return


 class FileUnicodeMap(UnicodeMap):

-    def add_cid2unichr(self, cid, code):
+    def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int]
+                       ) -> None:
        assert isinstance(cid, int), str(type(cid))
        if isinstance(code, PSLiteral):
            # Interpret as an Adobe glyph name.
+            assert isinstance(code.name, str)
            self.cid2unichr[cid] = name2unicode(code.name)
        elif isinstance(code, bytes):
            # Interpret as UTF-16BE.
@ -187,8 +200,8 @@ class FileUnicodeMap(UnicodeMap):

 class PyCMap(CMap):

-    def __init__(self, name, module):
-        CMap.__init__(self, CMapName=name)
+    def __init__(self, name: str, module: Any) -> None:
+        super().__init__(CMapName=name)
        self.code2cid = module.CODE2CID
        if module.IS_VERTICAL:
            self.attrs['WMode'] = 1
@ -197,8 +210,8 @@ class PyCMap(CMap):

 class PyUnicodeMap(UnicodeMap):

-    def __init__(self, name, module, vertical):
-        UnicodeMap.__init__(self, CMapName=name)
+    def __init__(self, name: str, module: Any, vertical: bool) -> None:
+        super().__init__(CMapName=name)
        if vertical:
            self.cid2unichr = module.CID2UNICHR_V
            self.attrs['WMode'] = 1
@ -209,14 +222,14 @@ class PyUnicodeMap(UnicodeMap):

 class CMapDB:

-    _cmap_cache = {}
-    _umap_cache = {}
+    _cmap_cache: Dict[str, PyCMap] = {}
+    _umap_cache: Dict[str, List[PyUnicodeMap]] = {}

    class CMapNotFound(CMapError):
        pass

    @classmethod
-    def _load_data(cls, name):
+    def _load_data(cls, name: str) -> Any:
        name = name.replace("\0", "")
        filename = '%s.pickle.gz' % name
        log.info('loading: %r', name)
@ -234,7 +247,7 @@ class CMapDB:
            raise CMapDB.CMapNotFound(name)

    @classmethod
-    def get_cmap(cls, name):
+    def get_cmap(cls, name: str) -> CMapBase:
        if name == 'Identity-H':
            return IdentityCMap(WMode=0)
        elif name == 'Identity-V':
@ -252,7 +265,7 @@ class CMapDB:
        return cmap

    @classmethod
-    def get_unicode_map(cls, name, vertical=False):
+    def get_unicode_map(cls, name: str, vertical: bool = False) -> UnicodeMap:
        try:
            return cls._umap_cache[name][vertical]
        except KeyError:
@ -263,16 +276,16 @@ class CMapDB:
        return cls._umap_cache[name][vertical]


-class CMapParser(PSStackParser):
+class CMapParser(PSStackParser[PSKeyword]):

-    def __init__(self, cmap, fp):
+    def __init__(self, cmap: CMapBase, fp: BinaryIO) -> None:
        PSStackParser.__init__(self, fp)
        self.cmap = cmap
        # some ToUnicode maps don't have "begincmap" keyword.
        self._in_cmap = True
        return

-    def run(self):
+    def run(self) -> None:
        try:
            self.nextobject()
        except PSEOF:
@ -296,7 +309,7 @@ class CMapParser(PSStackParser):
    KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange')
    KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange')

-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        if token is self.KEYWORD_BEGINCMAP:
            self._in_cmap = True
            self.popall()
@ -380,6 +393,7 @@ class CMapParser(PSStackParser):
                    for i in range(e1-s1+1):
                        self.cmap.add_cid2unichr(s1+i, code[i])
                else:
+                    assert isinstance(code, bytes)
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
@ -410,7 +424,7 @@ class CMapParser(PSStackParser):
        return


-def main(argv):
+def main(argv: List[str]) -> None:
    args = argv[1:]
    for fname in args:
        fp = open(fname, 'rb')
@ -422,4 +436,4 @@ def main(argv):


 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    main(sys.argv)
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@ -1,13 +1,19 @@
 import io
 import logging
+from pdfminer.pdfcolor import PDFColorSpace
+from typing import (BinaryIO, Dict, Generic, List, Optional, Sequence, TextIO,
+                    Tuple, TypeVar, Union, cast)
 import re

 from . import utils
+from .layout import LAParams, LTComponent, TextGroupElement
 from .layout import LTChar
 from .layout import LTContainer
 from .layout import LTCurve
 from .layout import LTFigure
 from .layout import LTImage
+from .layout import LTItem
+from .layout import LTLayoutContainer
 from .layout import LTLine
 from .layout import LTPage
 from .layout import LTRect
@ -17,25 +23,38 @@ from .layout import LTTextBoxVertical
 from .layout import LTTextGroup
 from .layout import LTTextLine
 from .pdfdevice import PDFTextDevice
+from .pdffont import PDFFont
 from .pdffont import PDFUnicodeNotDefined
+from .pdfinterp import PDFGraphicState, PDFResourceManager
+from .pdfpage import PDFPage
+from .pdftypes import PDFStream
+from .utils import AnyIO, Point, Matrix, Rect, PathSegment
 from .utils import apply_matrix_pt
 from .utils import bbox2str
 from .utils import enc
 from .utils import mult_matrix
+from .image import ImageWriter

 log = logging.getLogger(__name__)


 class PDFLayoutAnalyzer(PDFTextDevice):
+    cur_item: LTLayoutContainer
+    ctm: Matrix

-    def __init__(self, rsrcmgr, pageno=1, laparams=None):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None
+    ) -> None:
        PDFTextDevice.__init__(self, rsrcmgr)
        self.pageno = pageno
        self.laparams = laparams
-        self._stack = []
+        self._stack: List[LTLayoutContainer] = []
        return

-    def begin_page(self, page, ctm):
+    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
        (x0, y0, x1, y1) = page.mediabox
        (x0, y0) = apply_matrix_pt(ctm, (x0, y0))
        (x1, y1) = apply_matrix_pt(ctm, (x1, y1))
@ -43,7 +62,7 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        self.cur_item = LTPage(self.pageno, mediabox)
        return

-    def end_page(self, page):
+    def end_page(self, page: PDFPage) -> None:
        assert not self._stack, str(len(self._stack))
        assert isinstance(self.cur_item, LTPage), str(type(self.cur_item))
        if self.laparams is not None:
@ -52,19 +71,19 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        self.receive_layout(self.cur_item)
        return

-    def begin_figure(self, name, bbox, matrix):
+    def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
        self._stack.append(self.cur_item)
        self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
        return

-    def end_figure(self, _):
+    def end_figure(self, _: str) -> None:
        fig = self.cur_item
        assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
        self.cur_item = self._stack.pop()
        self.cur_item.add(fig)
        return

-    def render_image(self, name, stream):
+    def render_image(self, name: str, stream: PDFStream) -> None:
        assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
        item = LTImage(name, stream,
                       (self.cur_item.x0, self.cur_item.y0,
@ -72,7 +91,14 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        self.cur_item.add(item)
        return

-    def paint_path(self, gstate, stroke, fill, evenodd, path):
+    def paint_path(
+        self,
+        gstate: PDFGraphicState,
+        stroke: bool,
+        fill: bool,
+        evenodd: bool,
+        path: Sequence[PathSegment]
+    ) -> None:
        """Paint paths described in section 4.4 of the PDF reference manual"""
        shape = ''.join(x[0] for x in path)

@ -90,7 +116,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
            # And, per Section 4.4's Table 4.9, all other path commands place
            # their point-position in their final two arguments. (Any preceding
            # arguments represent control points on Bézier curves.)
-            raw_pts = [p[-2:] if p[0] != 'h' else path[0][-2:] for p in path]
+            raw_pts = [cast(Point, p[-2:] if p[0] != 'h' else path[0][-2:])
+                       for p in path]
            pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts]

            if shape in {'mlh', 'ml'}:
@ -123,8 +150,17 @@ class PDFLayoutAnalyzer(PDFTextDevice):
                                gstate.scolor, gstate.ncolor)
                self.cur_item.add(curve)

-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
+    def render_char(
+        self,
+        matrix: Matrix,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        rise: float,
+        cid: int,
+        ncs: PDFColorSpace,
+        graphicstate: PDFGraphicState
+    ) -> float:
        try:
            text = font.to_unichr(cid)
            assert isinstance(text, str), str(type(text))
@ -137,40 +173,56 @@ class PDFLayoutAnalyzer(PDFTextDevice):
        self.cur_item.add(item)
        return item.adv

-    def handle_undefined_char(self, font, cid):
+    def handle_undefined_char(self, font: PDFFont, cid: int) -> str:
        log.info('undefined: %r, %r', font, cid)
        return '(cid:%d)' % cid

-    def receive_layout(self, ltpage):
+    def receive_layout(self, ltpage: LTPage) -> None:
        return


 class PDFPageAggregator(PDFLayoutAnalyzer):
-    def __init__(self, rsrcmgr, pageno=1, laparams=None):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None
+    ) -> None:
        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
                                   laparams=laparams)
-        self.result = None
+        self.result: Optional[LTPage] = None
        return

-    def receive_layout(self, ltpage):
+    def receive_layout(self, ltpage: LTPage) -> None:
        self.result = ltpage
        return

-    def get_result(self):
+    def get_result(self) -> LTPage:
+        assert self.result is not None
        return self.result


-class PDFConverter(PDFLayoutAnalyzer):
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
-                 laparams=None):
+# Some PDFConverter children support only binary I/O
+IOType = TypeVar('IOType', TextIO, BinaryIO, AnyIO)
+
+
+class PDFConverter(PDFLayoutAnalyzer, Generic[IOType]):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: IOType,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None
+    ) -> None:
        PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno,
                                   laparams=laparams)
-        self.outfp = outfp
+        self.outfp: IOType = outfp
        self.codec = codec
        self.outfp_binary = self._is_binary_stream(self.outfp)

    @staticmethod
-    def _is_binary_stream(outfp):
+    def _is_binary_stream(outfp: AnyIO) -> bool:
        """Test if an stream is binary or not"""
        if 'b' in getattr(outfp, 'mode', ''):
            return True
@ -187,24 +239,33 @@ class PDFConverter(PDFLayoutAnalyzer):
        return True


-class TextConverter(PDFConverter):
-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 showpageno=False, imagewriter=None):
-        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
+class TextConverter(PDFConverter[AnyIO]):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: AnyIO,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None,
+        showpageno: bool = False,
+        imagewriter: Optional[ImageWriter] = None
+    ) -> None:
+        super().__init__(rsrcmgr, outfp, codec=codec, pageno=pageno,
                         laparams=laparams)
        self.showpageno = showpageno
        self.imagewriter = imagewriter
        return

-    def write_text(self, text):
+    def write_text(self, text: str) -> None:
        text = utils.compatible_encode_method(text, self.codec, 'ignore')
        if self.outfp_binary:
-            text = text.encode()
-        self.outfp.write(text)
+            cast(BinaryIO, self.outfp).write(text.encode())
+        else:
+            cast(TextIO, self.outfp).write(text)
        return

-    def receive_layout(self, ltpage):
-        def render(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def render(item: LTItem) -> None:
            if isinstance(item, LTContainer):
                for child in item:
                    render(child)
@ -224,17 +285,24 @@ class TextConverter(PDFConverter):
    # Some dummy functions to save memory/CPU when all that is wanted
    # is text.  This stops all the image and drawing output from being
    # recorded and taking up RAM.
-    def render_image(self, name, stream):
+    def render_image(self, name: str, stream: PDFStream) -> None:
        if self.imagewriter is None:
            return
        PDFConverter.render_image(self, name, stream)
        return

-    def paint_path(self, gstate, stroke, fill, evenodd, path):
+    def paint_path(
+        self,
+        gstate: PDFGraphicState,
+        stroke: bool,
+        fill: bool,
+        evenodd: bool,
+        path: Sequence[PathSegment]
+    ) -> None:
        return


-class HTMLConverter(PDFConverter):
+class HTMLConverter(PDFConverter[AnyIO]):
    RECT_COLORS = {
        'figure': 'yellow',
        'textline': 'magenta',
@ -249,12 +317,30 @@ class HTMLConverter(PDFConverter):
        'char': 'black',
    }

-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 scale=1, fontscale=1.0, layoutmode='normal', showpageno=True,
-                 pagemargin=50, imagewriter=None, debug=0, rect_colors=None,
-                 text_colors=None):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: AnyIO,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None,
+        scale: float = 1,
+        fontscale: float = 1.0,
+        layoutmode: str = 'normal',
+        showpageno: bool = True,
+        pagemargin: int = 50,
+        imagewriter: Optional[ImageWriter] = None,
+        debug: int = 0,
+        rect_colors: Optional[Dict[str, str]] = None,
+        text_colors: Optional[Dict[str, str]] = None
+    ) -> None:
        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
                              laparams=laparams)
+
+        # write() assumes a codec for binary I/O, or no codec for text I/O.
+        if self.outfp_binary == (not self.codec):
+            raise ValueError("Codec is required for a binary I/O output")
+
        if text_colors is None:
            text_colors = {'char': 'black'}
        if rect_colors is None:
@ -271,19 +357,20 @@ class HTMLConverter(PDFConverter):
        if debug:
            self.rect_colors.update(self.RECT_COLORS)
            self.text_colors.update(self.TEXT_COLORS)
-        self._yoffset = self.pagemargin
-        self._font = None
-        self._fontstack = []
+        self._yoffset: float = self.pagemargin
+        self._font: Optional[Tuple[str, float]] = None
+        self._fontstack: List[Optional[Tuple[str, float]]] = []
        self.write_header()
        return

-    def write(self, text):
+    def write(self, text: str) -> None:
        if self.codec:
-            text = text.encode(self.codec)
-        self.outfp.write(text)
+            cast(BinaryIO, self.outfp).write(text.encode(self.codec))
+        else:
+            cast(TextIO, self.outfp).write(text)
        return

-    def write_header(self):
+    def write_header(self) -> None:
        self.write('<html><head>\n')
        if self.codec:
            s = '<meta http-equiv="Content-Type" content="text/html; ' \
@ -294,7 +381,7 @@ class HTMLConverter(PDFConverter):
        self.write('</head><body>\n')
        return

-    def write_footer(self):
+    def write_footer(self) -> None:
        page_links = ['<a href="#{}">{}</a>'.format(i, i)
                      for i in range(1, self.pageno)]
        s = '<div style="position:absolute; top:0px;">Page: %s</div>\n' % \
@ -303,28 +390,49 @@ class HTMLConverter(PDFConverter):
        self.write('</body></html>\n')
        return

-    def write_text(self, text):
+    def write_text(self, text: str) -> None:
        self.write(enc(text))
        return

-    def place_rect(self, color, borderwidth, x, y, w, h):
-        color = self.rect_colors.get(color)
-        if color is not None:
+    def place_rect(
+        self,
+        color: str,
+        borderwidth: int,
+        x: float,
+        y: float,
+        w: float,
+        h: float
+    ) -> None:
+        color2 = self.rect_colors.get(color)
+        if color2 is not None:
            s = '<span style="position:absolute; border: %s %dpx solid; ' \
                'left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>\n' % \
-                (color, borderwidth, x * self.scale,
+                (color2, borderwidth, x * self.scale,
                 (self._yoffset - y) * self.scale, w * self.scale,
                 h * self.scale)
            self.write(
                s)
        return

-    def place_border(self, color, borderwidth, item):
+    def place_border(
+        self,
+        color: str,
+        borderwidth: int,
+        item: LTComponent
+    ) -> None:
        self.place_rect(color, borderwidth, item.x0, item.y1, item.width,
                        item.height)
        return

-    def place_image(self, item, borderwidth, x, y, w, h):
+    def place_image(
+        self,
+        item: LTImage,
+        borderwidth: int,
+        x: float,
+        y: float,
+        w: float,
+        h: float
+    ) -> None:
        if self.imagewriter is not None:
            name = self.imagewriter.export_image(item)
            s = '<img src="%s" border="%d" style="position:absolute; ' \
@ -335,19 +443,35 @@ class HTMLConverter(PDFConverter):
            self.write(s)
        return

-    def place_text(self, color, text, x, y, size):
-        color = self.text_colors.get(color)
-        if color is not None:
+    def place_text(
+        self,
+        color: str,
+        text: str,
+        x: float,
+        y: float,
+        size: float
+    ) -> None:
+        color2 = self.text_colors.get(color)
+        if color2 is not None:
            s = '<span style="position:absolute; color:%s; left:%dpx; ' \
                'top:%dpx; font-size:%dpx;">' % \
-                (color, x * self.scale, (self._yoffset - y) * self.scale,
+                (color2, x * self.scale, (self._yoffset - y) * self.scale,
                 size * self.scale * self.fontscale)
            self.write(s)
            self.write_text(text)
            self.write('</span>\n')
        return

-    def begin_div(self, color, borderwidth, x, y, w, h, writing_mode=False):
+    def begin_div(
+        self,
+        color: str,
+        borderwidth: int,
+        x: float,
+        y: float,
+        w: float,
+        h: float,
+        writing_mode: str = 'False'
+    ) -> None:
        self._fontstack.append(self._font)
        self._font = None
        s = '<div style="position:absolute; border: %s %dpx solid; ' \
@ -358,14 +482,14 @@ class HTMLConverter(PDFConverter):
        self.write(s)
        return

-    def end_div(self, color):
+    def end_div(self, color: str) -> None:
        if self._font is not None:
            self.write('</span>')
        self._font = self._fontstack.pop()
        self.write('</div>')
        return

-    def put_text(self, text, fontname, fontsize):
+    def put_text(self, text: str, fontname: str, fontsize: float) -> None:
        font = (fontname, fontsize)
        if font != self._font:
            if self._font is not None:
@ -379,19 +503,20 @@ class HTMLConverter(PDFConverter):
        self.write_text(text)
        return

-    def put_newline(self):
+    def put_newline(self) -> None:
        self.write('<br>')
        return

-    def receive_layout(self, ltpage):
-        def show_group(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def show_group(item: Union[LTTextGroup, TextGroupElement]) -> None:
            if isinstance(item, LTTextGroup):
                self.place_border('textgroup', 1, item)
                for child in item:
                    show_group(child)
            return

-        def render(item):
+        def render(item: LTItem) -> None:
+            child: LTItem
            if isinstance(item, LTPage):
                self._yoffset += item.y1
                self.place_border('page', 1, item)
@ -455,31 +580,45 @@ class HTMLConverter(PDFConverter):
        self._yoffset += self.pagemargin
        return

-    def close(self):
+    def close(self) -> None:
        self.write_footer()
        return


-class XMLConverter(PDFConverter):
+class XMLConverter(PDFConverter[AnyIO]):

    CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')

-    def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
-                 imagewriter=None, stripcontrol=False):
+    def __init__(
+        self,
+        rsrcmgr: PDFResourceManager,
+        outfp: AnyIO,
+        codec: str = 'utf-8',
+        pageno: int = 1,
+        laparams: Optional[LAParams] = None,
+        imagewriter: Optional[ImageWriter] = None,
+        stripcontrol: bool = False
+    ) -> None:
        PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno,
                              laparams=laparams)
+
+        # write() assumes a codec for binary I/O, or no codec for text I/O.
+        if self.outfp_binary == (not self.codec):
+            raise ValueError("Codec is required for a binary I/O output")
+
        self.imagewriter = imagewriter
        self.stripcontrol = stripcontrol
        self.write_header()
        return

-    def write(self, text):
+    def write(self, text: str) -> None:
        if self.codec:
-            text = text.encode(self.codec)
-        self.outfp.write(text)
+            cast(BinaryIO, self.outfp).write(text.encode(self.codec))
+        else:
+            cast(TextIO, self.outfp).write(text)
        return

-    def write_header(self):
+    def write_header(self) -> None:
        if self.codec:
            self.write('<?xml version="1.0" encoding="%s" ?>\n' % self.codec)
        else:
@ -487,18 +626,18 @@ class XMLConverter(PDFConverter):
        self.write('<pages>\n')
        return

-    def write_footer(self):
+    def write_footer(self) -> None:
        self.write('</pages>\n')
        return

-    def write_text(self, text):
+    def write_text(self, text: str) -> None:
        if self.stripcontrol:
            text = self.CONTROL.sub('', text)
        self.write(enc(text))
        return

-    def receive_layout(self, ltpage):
-        def show_group(item):
+    def receive_layout(self, ltpage: LTPage) -> None:
+        def show_group(item: LTItem) -> None:
            if isinstance(item, LTTextBox):
                self.write('<textbox id="%d" bbox="%s" />\n' %
                           (item.index, bbox2str(item.bbox)))
@ -509,7 +648,8 @@ class XMLConverter(PDFConverter):
                self.write('</textgroup>\n')
            return

-        def render(item):
+        def render(item: LTItem) -> None:
+            child: LTItem
            if isinstance(item, LTPage):
                s = '<page id="%s" bbox="%s" rotate="%d">\n' % \
                    (item.pageid, bbox2str(item.bbox), item.rotate)
@ -580,6 +720,6 @@ class XMLConverter(PDFConverter):
        render(ltpage)
        return

-    def close(self):
+    def close(self) -> None:
        self.write_footer()
        return
--- a/pdfminer/encodingdb.py
+++ b/pdfminer/encodingdb.py
@ -1,5 +1,6 @@
 import logging
 import re
+from typing import Dict, Iterable, Optional, cast

 from .glyphlist import glyphname2unicode
 from .latin_enc import ENCODING
@ -10,7 +11,7 @@ HEXADECIMAL = re.compile(r'[0-9a-fA-F]+')
 log = logging.getLogger(__name__)


-def name2unicode(name):
+def name2unicode(name: str) -> str:
    """Converts Adobe glyph names to Unicode numbers.

    In contrast to the specification, this raises a KeyError instead of return
@ -32,7 +33,7 @@ def name2unicode(name):

    else:
        if name in glyphname2unicode:
-            return glyphname2unicode.get(name)
+            return glyphname2unicode[name]

        elif name.startswith('uni'):
            name_without_uni = name.strip('uni')
@ -59,7 +60,7 @@ def name2unicode(name):
                   'it does not match specification' % name)


-def raise_key_error_for_invalid_unicode(unicode_digit):
+def raise_key_error_for_invalid_unicode(unicode_digit: int) -> None:
    """Unicode values should not be in the range D800 through DFFF because
    that is used for surrogate pairs in UTF-16

@ -72,10 +73,10 @@ def raise_key_error_for_invalid_unicode(unicode_digit):

 class EncodingDB:

-    std2unicode = {}
-    mac2unicode = {}
-    win2unicode = {}
-    pdf2unicode = {}
+    std2unicode: Dict[int, str] = {}
+    mac2unicode: Dict[int, str] = {}
+    win2unicode: Dict[int, str] = {}
+    pdf2unicode: Dict[int, str] = {}
    for (name, std, mac, win, pdf) in ENCODING:
        c = name2unicode(name)
        if std:
@ -95,7 +96,11 @@ class EncodingDB:
    }

    @classmethod
-    def get_encoding(cls, name, diff=None):
+    def get_encoding(
+        cls,
+        name: str,
+        diff: Optional[Iterable[object]] = None
+    ) -> Dict[int, str]:
        cid2unicode = cls.encodings.get(name, cls.std2unicode)
        if diff:
            cid2unicode = cid2unicode.copy()
@ -105,7 +110,7 @@ class EncodingDB:
                    cid = x
                elif isinstance(x, PSLiteral):
                    try:
-                        cid2unicode[cid] = name2unicode(x.name)
+                        cid2unicode[cid] = name2unicode(cast(str, x.name))
                    except (KeyError, ValueError) as e:
                        log.debug(str(e))
                    cid += 1
--- a/pdfminer/high_level.py
+++ b/pdfminer/high_level.py
@ -3,22 +3,36 @@
 import logging
 import sys
 from io import StringIO
+from typing import Any, BinaryIO, Container, Iterator, Optional, cast

 from .converter import XMLConverter, HTMLConverter, TextConverter, \
    PDFPageAggregator
 from .image import ImageWriter
-from .layout import LAParams
-from .pdfdevice import TagExtractor
+from .layout import LAParams, LTPage
+from .pdfdevice import PDFDevice, TagExtractor
 from .pdfinterp import PDFResourceManager, PDFPageInterpreter
 from .pdfpage import PDFPage
-from .utils import open_filename
+from .utils import open_filename, FileOrName, AnyIO


-def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
-                       laparams=None, maxpages=0, page_numbers=None,
-                       password="", scale=1.0, rotation=0, layoutmode='normal',
-                       output_dir=None, strip_control=False, debug=False,
-                       disable_caching=False, **kwargs):
+def extract_text_to_fp(
+    inf: BinaryIO,
+    outfp: AnyIO,
+    output_type: str = 'text',
+    codec: str = 'utf-8',
+    laparams: Optional[LAParams] = None,
+    maxpages: int = 0,
+    page_numbers: Optional[Container[int]] = None,
+    password: str = "",
+    scale: float = 1.0,
+    rotation: int = 0,
+    layoutmode: str = 'normal',
+    output_dir: Optional[str] = None,
+    strip_control: bool = False,
+    debug: bool = False,
+    disable_caching: bool = False,
+    **kwargs: Any
+) -> None:
    """Parses text from inf-file and writes to outfp file-like object.

    Takes loads of optional arguments but the defaults are somewhat sane.
@ -56,7 +70,7 @@ def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
        imagewriter = ImageWriter(output_dir)

    rsrcmgr = PDFResourceManager(caching=not disable_caching)
-    device = None
+    device: Optional[PDFDevice] = None

    if output_type != 'text' and outfp == sys.stdout:
        outfp = sys.stdout.buffer
@ -76,13 +90,15 @@ def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
                               imagewriter=imagewriter)

    elif output_type == 'tag':
-        device = TagExtractor(rsrcmgr, outfp, codec=codec)
+        # Binary I/O is required, but we have no good way to test it here.
+        device = TagExtractor(rsrcmgr, cast(BinaryIO, outfp), codec=codec)

    else:
        msg = f"Output type can be text, html, xml or tag but is " \
              f"{output_type}"
        raise ValueError(msg)

+    assert device is not None
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    for page in PDFPage.get_pages(inf,
                                  page_numbers,
@ -95,8 +111,15 @@ def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
    device.close()


-def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
-                 caching=True, codec='utf-8', laparams=None):
+def extract_text(
+    pdf_file: FileOrName,
+    password: str = '',
+    page_numbers: Optional[Container[int]] = None,
+    maxpages: int = 0,
+    caching: bool = True,
+    codec: str = 'utf-8',
+    laparams: Optional[LAParams] = None
+) -> str:
    """Parse and return the text contained in a PDF file.

    :param pdf_file: Either a file path or a file-like object for the PDF file
@ -114,6 +137,7 @@ def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
        laparams = LAParams()

    with open_filename(pdf_file, "rb") as fp, StringIO() as output_string:
+        fp = cast(BinaryIO, fp)  # we opened in binary mode
        rsrcmgr = PDFResourceManager(caching=caching)
        device = TextConverter(rsrcmgr, output_string, codec=codec,
                               laparams=laparams)
@ -131,8 +155,14 @@ def extract_text(pdf_file, password='', page_numbers=None, maxpages=0,
        return output_string.getvalue()


-def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0,
-                  caching=True, laparams=None):
+def extract_pages(
+    pdf_file: FileOrName,
+    password: str = '',
+    page_numbers: Optional[Container[int]] = None,
+    maxpages: int = 0,
+    caching: bool = True,
+    laparams: Optional[LAParams] = None
+) -> Iterator[LTPage]:
    """Extract and yield LTPage objects

    :param pdf_file: Either a file path or a file-like object for the PDF file
@ -149,6 +179,7 @@ def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0,
        laparams = LAParams()

    with open_filename(pdf_file, "rb") as fp:
+        fp = cast(BinaryIO, fp)  # we opened in binary mode
        resource_manager = PDFResourceManager(caching=caching)
        device = PDFPageAggregator(resource_manager, laparams=laparams)
        interpreter = PDFPageInterpreter(resource_manager, device)
--- a/pdfminer/image.py
+++ b/pdfminer/image.py
@ -2,20 +2,28 @@ import os
 import os.path
 import struct
 from io import BytesIO
+from typing import BinaryIO, Tuple

 from .jbig2 import JBIG2StreamReader, JBIG2StreamWriter
+from .layout import LTImage
 from .pdfcolor import LITERAL_DEVICE_CMYK
 from .pdfcolor import LITERAL_DEVICE_GRAY
 from .pdfcolor import LITERAL_DEVICE_RGB
 from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE


-def align32(x):
+def align32(x: int) -> int:
    return ((x+3)//4)*4


 class BMPWriter:
-    def __init__(self, fp, bits, width, height):
+    def __init__(
+        self,
+        fp: BinaryIO,
+        bits: int,
+        width: int,
+        height: int
+    ) -> None:
        self.fp = fp
        self.bits = bits
        self.width = width
@ -51,7 +59,7 @@ class BMPWriter:
        self.pos1 = self.pos0 + self.datasize
        return

-    def write_line(self, y, data):
+    def write_line(self, y: int, data: bytes) -> None:
        self.fp.seek(self.pos1 - (y+1)*self.linesize)
        self.fp.write(data)
        return
@ -63,13 +71,13 @@ class ImageWriter:
    Supports various image types: JPEG, JBIG2 and bitmaps
    """

-    def __init__(self, outdir):
+    def __init__(self, outdir: str) -> None:
        self.outdir = outdir
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)
        return

-    def export_image(self, image):
+    def export_image(self, image: LTImage) -> str:
        (width, height) = image.srcsize

        is_jbig2 = self.is_jbig2_image(image)
@ -80,8 +88,9 @@ class ImageWriter:
        fp = open(path, 'wb')
        if ext == '.jpg':
            raw_data = image.stream.get_rawdata()
+            assert raw_data is not None
            if LITERAL_DEVICE_CMYK in image.colorspace:
-                from PIL import Image
+                from PIL import Image  # type: ignore[import]
                from PIL import ImageChops
                ifp = BytesIO(raw_data)
                i = Image.open(ifp)
@ -128,7 +137,7 @@ class ImageWriter:
        return name

    @staticmethod
-    def is_jbig2_image(image):
+    def is_jbig2_image(image: LTImage) -> bool:
        filters = image.stream.get_filters()
        is_jbig2 = False
        for filter_name, params in filters:
@ -138,7 +147,12 @@ class ImageWriter:
        return is_jbig2

    @staticmethod
-    def _get_image_extension(image, width, height, is_jbig2):
+    def _get_image_extension(
+        image: LTImage,
+        width: int,
+        height: int,
+        is_jbig2: bool
+    ) -> str:
        filters = image.stream.get_filters()
        if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
            ext = '.jpg'
@ -154,7 +168,11 @@ class ImageWriter:
        return ext

    @staticmethod
-    def _create_unique_image_name(dirname, image_name, ext):
+    def _create_unique_image_name(
+        dirname: str,
+        image_name: str,
+        ext: str
+    ) -> Tuple[str, str]:
        name = image_name + ext
        path = os.path.join(dirname, name)
        img_index = 0
--- a/pdfminer/jbig2.py
+++ b/pdfminer/jbig2.py
@ -1,6 +1,7 @@
 import math
 import os
 from struct import pack, unpack, calcsize
+from typing import BinaryIO, Dict, Iterable, List, Optional, Tuple, Union, cast

 # segment structure base
 SEG_STRUCT = [
@ -34,15 +35,15 @@ FILE_HEAD_FLAG_SEQUENTIAL = 0b00000001
 FILE_HEAD_FLAG_PAGES_UNKNOWN = 0b00000010


-def bit_set(bit_pos, value):
+def bit_set(bit_pos: int, value: int) -> bool:
    return bool((value >> bit_pos) & 1)


-def check_flag(flag, value):
+def check_flag(flag: int, value: int) -> bool:
    return bool(flag & value)


-def masked_value(mask, value):
+def masked_value(mask: int, value: int) -> int:
    for bit_pos in range(0, 31):
        if bit_set(bit_pos, mask):
            return (value & mask) >> bit_pos
@ -50,7 +51,7 @@ def masked_value(mask, value):
    raise Exception("Invalid mask or value")


-def mask_value(mask, value):
+def mask_value(mask: int, value: int) -> int:
    for bit_pos in range(0, 31):
        if bit_set(bit_pos, mask):
            return (value & (mask >> bit_pos)) << bit_pos
@ -58,25 +59,34 @@ def mask_value(mask, value):
    raise Exception("Invalid mask or value")


+def unpack_int(format: str, buffer: bytes) -> int:
+    assert format in {">B", ">I", ">L"}
+    [result] = cast(Tuple[int], unpack(format, buffer))
+    return result
+
+
+JBIG2SegmentFlags = Dict[str, Union[int, bool]]
+JBIG2RetentionFlags = Dict[str, Union[int, List[int], List[bool]]]
+JBIG2Segment = Dict[str, Union[bool, int, bytes, JBIG2SegmentFlags,
+                               JBIG2RetentionFlags]]
+
+
 class JBIG2StreamReader:
    """Read segments from a JBIG2 byte stream"""
-
-    def __init__(self, stream):
+    def __init__(self, stream: BinaryIO) -> None:
        self.stream = stream

-    def get_segments(self):
-        segments = []
+    def get_segments(self) -> List[JBIG2Segment]:
+        segments: List[JBIG2Segment] = []
        while not self.is_eof():
-            segment = {}
+            segment: JBIG2Segment = {}
            for field_format, name in SEG_STRUCT:
                field_len = calcsize(field_format)
                field = self.stream.read(field_len)
                if len(field) < field_len:
                    segment["_error"] = True
                    break
-                value = unpack(field_format, field)
-                if len(value) == 1:
-                    [value] = value
+                value = unpack_int(field_format, field)
                parser = getattr(self, "parse_%s" % name, None)
                if callable(parser):
                    value = parser(segment, value, field)
@ -86,21 +96,31 @@ class JBIG2StreamReader:
                segments.append(segment)
        return segments

-    def is_eof(self):
+    def is_eof(self) -> bool:
        if self.stream.read(1) == b'':
            return True
        else:
            self.stream.seek(-1, os.SEEK_CUR)
            return False

-    def parse_flags(self, segment, flags, field):
+    def parse_flags(
+        self,
+        segment: JBIG2Segment,
+        flags: int,
+        field: bytes
+    ) -> JBIG2SegmentFlags:
        return {
            "deferred": check_flag(HEADER_FLAG_DEFERRED, flags),
            "page_assoc_long": check_flag(HEADER_FLAG_PAGE_ASSOC_LONG, flags),
            "type": masked_value(SEG_TYPE_MASK, flags)
        }

-    def parse_retention_flags(self, segment, flags, field):
+    def parse_retention_flags(
+        self,
+        segment: JBIG2Segment,
+        flags: int,
+        field: bytes
+    ) -> JBIG2RetentionFlags:
        ref_count = masked_value(REF_COUNT_SHORT_MASK, flags)
        retain_segments = []
        ref_segments = []
@ -110,15 +130,16 @@ class JBIG2StreamReader:
                retain_segments.append(bit_set(bit_pos, flags))
        else:
            field += self.stream.read(3)
-            [ref_count] = unpack(">L", field)
+            ref_count = unpack_int(">L", field)
            ref_count = masked_value(REF_COUNT_LONG_MASK, ref_count)
            ret_bytes_count = int(math.ceil((ref_count + 1) / 8))
            for ret_byte_index in range(ret_bytes_count):
-                [ret_byte] = unpack(">B", self.stream.read(1))
+                ret_byte = unpack_int(">B", self.stream.read(1))
                for bit_pos in range(7):
                    retain_segments.append(bit_set(bit_pos, ret_byte))

        seg_num = segment["number"]
+        assert isinstance(seg_num, int)
        if seg_num <= 256:
            ref_format = ">B"
        elif seg_num <= 65536:
@ -129,8 +150,8 @@ class JBIG2StreamReader:
        ref_size = calcsize(ref_format)

        for ref_index in range(ref_count):
-            ref = self.stream.read(ref_size)
-            [ref] = unpack(ref_format, ref)
+            ref_data = self.stream.read(ref_size)
+            ref = unpack_int(ref_format, ref_data)
            ref_segments.append(ref)

        return {
@ -139,15 +160,26 @@ class JBIG2StreamReader:
            "ref_segments": ref_segments,
        }

-    def parse_page_assoc(self, segment, page, field):
-        if segment["flags"]["page_assoc_long"]:
+    def parse_page_assoc(
+        self,
+        segment: JBIG2Segment,
+        page: int,
+        field: bytes
+    ) -> int:
+        if cast(JBIG2SegmentFlags, segment["flags"])["page_assoc_long"]:
            field += self.stream.read(3)
-            [page] = unpack(">L", field)
+            page = unpack_int(">L", field)
        return page

-    def parse_data_length(self, segment, length, field):
+    def parse_data_length(
+        self,
+        segment: JBIG2Segment,
+        length: int,
+        field: bytes
+    ) -> int:
        if length:
-            if (segment["flags"]["type"] == SEG_TYPE_IMMEDIATE_GEN_REGION) \
+            if (cast(JBIG2SegmentFlags, segment["flags"])["type"] ==
+                    SEG_TYPE_IMMEDIATE_GEN_REGION) \
                    and (length == DATA_LEN_UNKNOWN):

                raise NotImplementedError(
@ -163,25 +195,36 @@ class JBIG2StreamReader:
 class JBIG2StreamWriter:
    """Write JBIG2 segments to a file in JBIG2 format"""

-    def __init__(self, stream):
+    EMPTY_RETENTION_FLAGS: JBIG2RetentionFlags = {
+        'ref_count': 0,
+        'ref_segments': cast(List[int], []),
+        'retain_segments': cast(List[bool], [])
+    }
+
+    def __init__(self, stream: BinaryIO) -> None:
        self.stream = stream

-    def write_segments(self, segments, fix_last_page=True):
+    def write_segments(
+        self,
+        segments: Iterable[JBIG2Segment],
+        fix_last_page: bool = True
+    ) -> int:
        data_len = 0
-        current_page = None
-        seg_num = None
+        current_page: Optional[int] = None
+        seg_num: Optional[int] = None

        for segment in segments:
            data = self.encode_segment(segment)
            self.stream.write(data)
            data_len += len(data)

-            seg_num = segment["number"]
+            seg_num = cast(Optional[int], segment["number"])

            if fix_last_page:
-                seg_page = segment.get("page_assoc")
+                seg_page = cast(int, segment.get("page_assoc"))

-                if segment["flags"]["type"] == SEG_TYPE_END_OF_PAGE:
+                if cast(JBIG2SegmentFlags, segment["flags"])["type"] == \
+                        SEG_TYPE_END_OF_PAGE:
                    current_page = None
                elif seg_page:
                    current_page = seg_page
@ -194,7 +237,11 @@ class JBIG2StreamWriter:

        return data_len

-    def write_file(self, segments, fix_last_page=True):
+    def write_file(
+        self,
+        segments: Iterable[JBIG2Segment],
+        fix_last_page: bool = True
+    ) -> int:
        header = FILE_HEADER_ID
        header_flags = FILE_HEAD_FLAG_SEQUENTIAL | FILE_HEAD_FLAG_PAGES_UNKNOWN
        header += pack(">B", header_flags)
@ -205,7 +252,7 @@ class JBIG2StreamWriter:

        seg_num = 0
        for segment in segments:
-            seg_num = segment["number"]
+            seg_num = cast(int, segment["number"])

        eof_segment = self.get_eof_segment(seg_num + 1)
        data = self.encode_segment(eof_segment)
@ -215,7 +262,7 @@ class JBIG2StreamWriter:

        return data_len

-    def encode_segment(self, segment):
+    def encode_segment(self, segment: JBIG2Segment) -> bytes:
        data = b''
        for field_format, name in SEG_STRUCT:
            value = segment.get(name)
@ -227,7 +274,8 @@ class JBIG2StreamWriter:
            data += field
        return data

-    def encode_flags(self, value, segment):
+    def encode_flags(self, value: JBIG2SegmentFlags, segment: JBIG2Segment
+                     ) -> bytes:
        flags = 0
        if value.get("deferred"):
            flags |= HEADER_FLAG_DEFERRED
@ -237,17 +285,22 @@ class JBIG2StreamWriter:
                if value["page_assoc_long"] else flags
        else:
            flags |= HEADER_FLAG_PAGE_ASSOC_LONG \
-                if segment.get("page", 0) > 255 else flags
+                if cast(int, segment.get("page", 0)) > 255 else flags

        flags |= mask_value(SEG_TYPE_MASK, value["type"])

        return pack(">B", flags)

-    def encode_retention_flags(self, value, segment):
+    def encode_retention_flags(
+        self,
+        value: JBIG2RetentionFlags,
+        segment: JBIG2Segment
+    ) -> bytes:
        flags = []
        flags_format = ">B"
        ref_count = value["ref_count"]
-        retain_segments = value.get("retain_segments", [])
+        assert isinstance(ref_count, int)
+        retain_segments = cast(List[bool], value.get("retain_segments", []))

        if ref_count <= 4:
            flags_byte = mask_value(REF_COUNT_SHORT_MASK, ref_count)
@ -271,9 +324,9 @@ class JBIG2StreamWriter:

                flags.append(ret_byte)

-        ref_segments = value.get("ref_segments", [])
+        ref_segments = cast(List[int], value.get("ref_segments", []))

-        seg_num = segment["number"]
+        seg_num = cast(int, segment["number"])
        if seg_num <= 256:
            ref_format = "B"
        elif seg_num <= 65536:
@ -287,35 +340,31 @@ class JBIG2StreamWriter:

        return pack(flags_format, *flags)

-    def encode_data_length(self, value, segment):
+    def encode_data_length(self, value: int, segment: JBIG2Segment) -> bytes:
        data = pack(">L", value)
-        data += segment["raw_data"]
+        data += cast(bytes, segment["raw_data"])
        return data

-    def get_eop_segment(self, seg_number, page_number):
+    def get_eop_segment(
+        self,
+        seg_number: int,
+        page_number: int
+    ) -> JBIG2Segment:
        return {
            'data_length': 0,
            'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_PAGE},
            'number': seg_number,
            'page_assoc': page_number,
            'raw_data': b'',
-            'retention_flags': {
-                'ref_count': 0,
-                'ref_segments': [],
-                'retain_segments': []
-            }
+            'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS
        }

-    def get_eof_segment(self, seg_number):
+    def get_eof_segment(self, seg_number: int) -> JBIG2Segment:
        return {
            'data_length': 0,
            'flags': {'deferred': False, 'type': SEG_TYPE_END_OF_FILE},
            'number': seg_number,
            'page_assoc': 0,
            'raw_data': b'',
-            'retention_flags': {
-                'ref_count': 0,
-                'ref_segments': [],
-                'retain_segments': []
-            }
+            'retention_flags': JBIG2StreamWriter.EMPTY_RETENTION_FLAGS
        }
--- a/pdfminer/latin_enc.py
+++ b/pdfminer/latin_enc.py
@ -5,7 +5,12 @@ This table is extracted from PDF Reference Manual 1.6, pp.925

 """

-ENCODING = [
+from typing import List, Optional, Tuple
+
+EncodingRow = \
+    Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]]
+
+ENCODING: List[EncodingRow] = [
  # (name, std, mac, win, pdf)
  ('A', 65, 65, 65, 65),
  ('AE', 225, 174, 198, 198),
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@ -1,25 +1,36 @@
 import heapq
 import logging
+from typing import (Dict, Generic, Iterable, Iterator, List, Optional,
+                    Sequence, Set, Tuple, TypeVar, Union, cast)

 from .utils import INF
+from .utils import LTComponentT
+from .utils import Matrix
 from .utils import Plane
+from .utils import Point
+from .utils import Rect
 from .utils import apply_matrix_pt
 from .utils import bbox2str
 from .utils import fsplit
 from .utils import get_bound
 from .utils import matrix2str
 from .utils import uniq
+from .pdfcolor import PDFColorSpace
+from .pdftypes import PDFStream
+from .pdfinterp import Color
+from .pdfinterp import PDFGraphicState
+from .pdffont import PDFFont

 logger = logging.getLogger(__name__)


 class IndexAssigner:

-    def __init__(self, index=0):
+    def __init__(self, index: int = 0) -> None:
        self.index = index
        return

-    def run(self, obj):
+    def run(self, obj: "LTItem") -> None:
        if isinstance(obj, LTTextBox):
            obj.index = self.index
            self.index += 1
@ -57,14 +68,16 @@ class LAParams:
        figures.
    """

-    def __init__(self,
-                 line_overlap=0.5,
-                 char_margin=2.0,
-                 line_margin=0.5,
-                 word_margin=0.1,
-                 boxes_flow=0.5,
-                 detect_vertical=False,
-                 all_texts=False):
+    def __init__(
+        self,
+        line_overlap: float = 0.5,
+        char_margin: float = 2.0,
+        line_margin: float = 0.5,
+        word_margin: float = 0.1,
+        boxes_flow: Optional[float] = 0.5,
+        detect_vertical: bool = False,
+        all_texts: bool = False
+    ) -> None:
        self.line_overlap = line_overlap
        self.char_margin = char_margin
        self.line_margin = line_margin
@ -76,7 +89,7 @@ class LAParams:
        self._validate()
        return

-    def _validate(self):
+    def _validate(self) -> None:
        if self.boxes_flow is not None:
            boxes_flow_err_msg = ("LAParam boxes_flow should be None, or a "
                                  "number between -1 and +1")
@ -86,7 +99,7 @@ class LAParams:
            if not -1 <= self.boxes_flow <= 1:
                raise ValueError(boxes_flow_err_msg)

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<LAParams: char_margin=%.1f, line_margin=%.1f, ' \
               'word_margin=%.1f all_texts=%r>' % \
               (self.char_margin, self.line_margin, self.word_margin,
@ -96,7 +109,7 @@ class LAParams:
 class LTItem:
    """Interface for things that can be analyzed"""

-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
        """Perform the layout analysis."""
        return

@ -104,11 +117,11 @@ class LTItem:
 class LTText:
    """Interface for things that have text"""

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s %r>' %
                (self.__class__.__name__, self.get_text()))

-    def get_text(self):
+    def get_text(self) -> str:
        """Text contained in this object"""
        raise NotImplementedError

@ -116,29 +129,29 @@ class LTText:
 class LTComponent(LTItem):
    """Object with a bounding box"""

-    def __init__(self, bbox):
+    def __init__(self, bbox: Rect) -> None:
        LTItem.__init__(self)
        self.set_bbox(bbox)
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s %s>' %
                (self.__class__.__name__, bbox2str(self.bbox)))

    # Disable comparison.
-    def __lt__(self, _):
+    def __lt__(self, _: object) -> bool:
        raise ValueError

-    def __le__(self, _):
+    def __le__(self, _: object) -> bool:
        raise ValueError

-    def __gt__(self, _):
+    def __gt__(self, _: object) -> bool:
        raise ValueError

-    def __ge__(self, _):
+    def __ge__(self, _: object) -> bool:
        raise ValueError

-    def set_bbox(self, bbox):
+    def set_bbox(self, bbox: Rect) -> None:
        (x0, y0, x1, y1) = bbox
        self.x0 = x0
        self.y0 = y0
@ -149,39 +162,39 @@ class LTComponent(LTItem):
        self.bbox = bbox
        return

-    def is_empty(self):
+    def is_empty(self) -> bool:
        return self.width <= 0 or self.height <= 0

-    def is_hoverlap(self, obj):
+    def is_hoverlap(self, obj: "LTComponent") -> bool:
        assert isinstance(obj, LTComponent), str(type(obj))
        return obj.x0 <= self.x1 and self.x0 <= obj.x1

-    def hdistance(self, obj):
+    def hdistance(self, obj: "LTComponent") -> float:
        assert isinstance(obj, LTComponent), str(type(obj))
        if self.is_hoverlap(obj):
            return 0
        else:
            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))

-    def hoverlap(self, obj):
+    def hoverlap(self, obj: "LTComponent") -> float:
        assert isinstance(obj, LTComponent), str(type(obj))
        if self.is_hoverlap(obj):
            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
        else:
            return 0

-    def is_voverlap(self, obj):
+    def is_voverlap(self, obj: "LTComponent") -> bool:
        assert isinstance(obj, LTComponent), str(type(obj))
        return obj.y0 <= self.y1 and self.y0 <= obj.y1

-    def vdistance(self, obj):
+    def vdistance(self, obj: "LTComponent") -> float:
        assert isinstance(obj, LTComponent), str(type(obj))
        if self.is_voverlap(obj):
            return 0
        else:
            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))

-    def voverlap(self, obj):
+    def voverlap(self, obj: "LTComponent") -> float:
        assert isinstance(obj, LTComponent), str(type(obj))
        if self.is_voverlap(obj):
            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
@ -192,8 +205,16 @@ class LTComponent(LTItem):
 class LTCurve(LTComponent):
    """A generic Bezier curve"""

-    def __init__(self, linewidth, pts, stroke=False, fill=False, evenodd=False,
-                 stroking_color=None, non_stroking_color=None):
+    def __init__(
+        self,
+        linewidth: float,
+        pts: List[Point],
+        stroke: bool = False,
+        fill: bool = False,
+        evenodd: bool = False,
+        stroking_color: Optional[Color] = None,
+        non_stroking_color: Optional[Color] = None
+    ) -> None:
        LTComponent.__init__(self, get_bound(pts))
        self.pts = pts
        self.linewidth = linewidth
@ -204,7 +225,7 @@ class LTCurve(LTComponent):
        self.non_stroking_color = non_stroking_color
        return

-    def get_pts(self):
+    def get_pts(self) -> str:
        return ','.join('%.3f,%.3f' % p for p in self.pts)


@ -214,8 +235,17 @@ class LTLine(LTCurve):
    Could be used for separating text or figures.
    """

-    def __init__(self, linewidth, p0, p1, stroke=False, fill=False,
-                 evenodd=False, stroking_color=None, non_stroking_color=None):
+    def __init__(
+        self,
+        linewidth: float,
+        p0: Point,
+        p1: Point,
+        stroke: bool = False,
+        fill: bool = False,
+        evenodd: bool = False,
+        stroking_color: Optional[Color] = None,
+        non_stroking_color: Optional[Color] = None
+    ) -> None:
        LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd,
                         stroking_color, non_stroking_color)
        return
@ -227,8 +257,16 @@ class LTRect(LTCurve):
    Could be used for framing another pictures or figures.
    """

-    def __init__(self, linewidth, bbox, stroke=False, fill=False,
-                 evenodd=False, stroking_color=None,  non_stroking_color=None):
+    def __init__(
+        self,
+        linewidth: float,
+        bbox: Rect,
+        stroke: bool = False,
+        fill: bool = False,
+        evenodd: bool = False,
+        stroking_color: Optional[Color] = None,
+        non_stroking_color: Optional[Color] = None
+    ) -> None:
        (x0, y0, x1, y1) = bbox
        LTCurve.__init__(self, linewidth,
                         [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke,
@ -242,7 +280,7 @@ class LTImage(LTComponent):
    Embedded images can be in JPEG, Bitmap or JBIG2.
    """

-    def __init__(self, name, stream, bbox):
+    def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None:
        LTComponent.__init__(self, bbox)
        self.name = name
        self.stream = stream
@ -255,7 +293,7 @@ class LTImage(LTComponent):
            self.colorspace = [self.colorspace]
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s(%s) %s %r>' %
                (self.__class__.__name__, self.name,
                 bbox2str(self.bbox), self.srcsize))
@ -269,19 +307,30 @@ class LTAnno(LTItem, LTText):
    according to the relationship between two characters (e.g. a space).
    """

-    def __init__(self, text):
+    def __init__(self, text: str) -> None:
        self._text = text
        return

-    def get_text(self):
+    def get_text(self) -> str:
        return self._text


 class LTChar(LTComponent, LTText):
    """Actual letter in the text as a Unicode string."""

-    def __init__(self, matrix, font, fontsize, scaling, rise,
-                 text, textwidth, textdisp, ncs, graphicstate):
+    def __init__(
+        self,
+        matrix: Matrix,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        rise: float,
+        text: str,
+        textwidth: float,
+        textdisp: Union[float, Tuple[Optional[float], float]],
+        ncs: PDFColorSpace,
+        graphicstate: PDFGraphicState
+    ) -> None:
        LTText.__init__(self)
        self._text = text
        self.matrix = matrix
@ -292,6 +341,7 @@ class LTChar(LTComponent, LTText):
        # compute the boundary rectangle.
        if font.is_vertical():
            # vertical
+            assert isinstance(textdisp, tuple)
            (vx, vy) = textdisp
            if vx is None:
                vx = fontsize * 0.5
@ -320,114 +370,129 @@ class LTChar(LTComponent, LTText):
            self.size = self.height
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
                (self.__class__.__name__, bbox2str(self.bbox),
                 matrix2str(self.matrix), self.fontname, self.adv,
                 self.get_text()))

-    def get_text(self):
+    def get_text(self) -> str:
        return self._text

-    def is_compatible(self, obj):
+    def is_compatible(self, obj: object) -> bool:
        """Returns True if two characters can coexist in the same line."""
        return True


-class LTContainer(LTComponent):
+LTItemT = TypeVar('LTItemT', bound=LTItem)
+
+
+class LTContainer(LTComponent, Generic[LTItemT]):
    """Object that can be extended and analyzed"""

-    def __init__(self, bbox):
+    def __init__(self, bbox: Rect) -> None:
        LTComponent.__init__(self, bbox)
-        self._objs = []
+        self._objs: List[LTItemT] = []
        return

-    def __iter__(self):
+    def __iter__(self) -> Iterator[LTItemT]:
        return iter(self._objs)

-    def __len__(self):
+    def __len__(self) -> int:
        return len(self._objs)

-    def add(self, obj):
+    def add(self, obj: LTItemT) -> None:
        self._objs.append(obj)
        return

-    def extend(self, objs):
+    def extend(self, objs: Iterable[LTItemT]) -> None:
        for obj in objs:
            self.add(obj)
        return

-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
        for obj in self._objs:
            obj.analyze(laparams)
        return


-class LTExpandableContainer(LTContainer):
-    def __init__(self):
+class LTExpandableContainer(LTContainer[LTItemT]):
+    def __init__(self) -> None:
        LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
        return

-    def add(self, obj):
-        LTContainer.add(self, obj)
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # super() LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
+        LTContainer.add(self, cast(LTItemT, obj))
        self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0),
                       max(self.x1, obj.x1), max(self.y1, obj.y1)))
        return


-class LTTextContainer(LTExpandableContainer, LTText):
-    def __init__(self):
+class LTTextContainer(LTExpandableContainer[LTItemT], LTText):
+    def __init__(self) -> None:
        LTText.__init__(self)
        LTExpandableContainer.__init__(self)
        return

-    def get_text(self):
-        return ''.join(obj.get_text() for obj in self
+    def get_text(self) -> str:
+        return ''.join(cast(LTText, obj).get_text() for obj in self
                       if isinstance(obj, LTText))


-class LTTextLine(LTTextContainer):
+TextLineElement = Union[LTChar, LTAnno]
+
+
+class LTTextLine(LTTextContainer[TextLineElement]):
    """Contains a list of LTChar objects that represent a single text line.

    The characters are aligned either horizontally or vertically, depending on
    the text's writing mode.
    """

-    def __init__(self, word_margin):
-        LTTextContainer.__init__(self)
+    def __init__(self, word_margin: float) -> None:
+        super().__init__()
        self.word_margin = word_margin
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s %s %r>' %
                (self.__class__.__name__, bbox2str(self.bbox),
                 self.get_text()))

-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
        LTTextContainer.analyze(self, laparams)
        LTContainer.add(self, LTAnno('\n'))
        return

-    def find_neighbors(self, plane, ratio):
+    def find_neighbors(self, plane: Plane[LTComponentT], ratio: float
+                       ) -> List["LTTextLine"]:
        raise NotImplementedError


 class LTTextLineHorizontal(LTTextLine):
-    def __init__(self, word_margin):
+    def __init__(self, word_margin: float) -> None:
        LTTextLine.__init__(self, word_margin)
-        self._x1 = +INF
+        self._x1: float = +INF
        return

-    def add(self, obj):
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
        if isinstance(obj, LTChar) and self.word_margin:
            margin = self.word_margin * max(obj.width, obj.height)
            if self._x1 < obj.x0 - margin:
                LTContainer.add(self, LTAnno(' '))
        self._x1 = obj.x1
-        LTTextLine.add(self, obj)
+        super().add(obj)
        return

-    def find_neighbors(self, plane, ratio):
+    def find_neighbors(
+        self,
+        plane: Plane[LTComponentT],
+        ratio: float
+    ) -> List[LTTextLine]:
        """
        Finds neighboring LTTextLineHorizontals in the plane.

@ -445,45 +510,67 @@ class LTTextLineHorizontal(LTTextLine):
                     self._is_right_aligned_with(obj, tolerance=d) or
                     self._is_centrally_aligned_with(obj, tolerance=d)))]

-    def _is_left_aligned_with(self, other, tolerance=0):
+    def _is_left_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
        """
        Whether the left-hand edge of `other` is within `tolerance`.
        """
        return abs(other.x0 - self.x0) <= tolerance

-    def _is_right_aligned_with(self, other, tolerance=0):
+    def _is_right_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
        """
        Whether the right-hand edge of `other` is within `tolerance`.
        """
        return abs(other.x1 - self.x1) <= tolerance

-    def _is_centrally_aligned_with(self, other, tolerance=0):
+    def _is_centrally_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
        """
        Whether the horizontal center of `other` is within `tolerance`.
        """
        return abs(
            (other.x0 + other.x1) / 2 - (self.x0 + self.x1) / 2) <= tolerance

-    def _is_same_height_as(self, other, tolerance):
+    def _is_same_height_as(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
        return abs(other.height - self.height) <= tolerance


 class LTTextLineVertical(LTTextLine):
-    def __init__(self, word_margin):
+    def __init__(self, word_margin: float) -> None:
        LTTextLine.__init__(self, word_margin)
-        self._y0 = -INF
+        self._y0: float = -INF
        return

-    def add(self, obj):
+    # Incompatible override: we take an LTComponent (with bounding box), but
+    # LTContainer only considers LTItem (no bounding box).
+    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
        if isinstance(obj, LTChar) and self.word_margin:
            margin = self.word_margin * max(obj.width, obj.height)
            if obj.y1 + margin < self._y0:
                LTContainer.add(self, LTAnno(' '))
        self._y0 = obj.y0
-        LTTextLine.add(self, obj)
+        super().add(obj)
        return

-    def find_neighbors(self, plane, ratio):
+    def find_neighbors(
+        self,
+        plane: Plane[LTComponentT],
+        ratio: float
+    ) -> List[LTTextLine]:
        """
        Finds neighboring LTTextLineVerticals in the plane.

@ -501,30 +588,42 @@ class LTTextLineVertical(LTTextLine):
                     self._is_upper_aligned_with(obj, tolerance=d) or
                     self._is_centrally_aligned_with(obj, tolerance=d)))]

-    def _is_lower_aligned_with(self, other, tolerance=0):
+    def _is_lower_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
        """
        Whether the lower edge of `other` is within `tolerance`.
        """
        return abs(other.y0 - self.y0) <= tolerance

-    def _is_upper_aligned_with(self, other, tolerance=0):
+    def _is_upper_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
        """
        Whether the upper edge of `other` is within `tolerance`.
        """
        return abs(other.y1 - self.y1) <= tolerance

-    def _is_centrally_aligned_with(self, other, tolerance=0):
+    def _is_centrally_aligned_with(
+        self,
+        other: LTComponent,
+        tolerance: float = 0
+    ) -> bool:
        """
        Whether the vertical center of `other` is within `tolerance`.
        """
        return abs(
            (other.y0 + other.y1) / 2 - (self.y0 + self.y1) / 2) <= tolerance

-    def _is_same_width_as(self, other, tolerance):
+    def _is_same_width_as(self, other: LTComponent, tolerance: float) -> bool:
        return abs(other.width - self.width) <= tolerance


-class LTTextBox(LTTextContainer):
+class LTTextBox(LTTextContainer[LTTextLine]):
    """Represents a group of text chunks in a rectangular area.

    Note that this box is created by geometric analysis and does not
@ -532,72 +631,86 @@ class LTTextBox(LTTextContainer):
    of LTTextLine objects.
    """

-    def __init__(self):
+    def __init__(self) -> None:
        LTTextContainer.__init__(self)
-        self.index = -1
+        self.index: int = -1
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s(%s) %s %r>' %
                (self.__class__.__name__,
                 self.index, bbox2str(self.bbox), self.get_text()))

+    def get_writing_mode(self) -> str:
+        raise NotImplementedError
+

 class LTTextBoxHorizontal(LTTextBox):
-    def analyze(self, laparams):
-        LTTextBox.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
        self._objs.sort(key=lambda obj: -obj.y1)
        return

-    def get_writing_mode(self):
+    def get_writing_mode(self) -> str:
        return 'lr-tb'


 class LTTextBoxVertical(LTTextBox):
-    def analyze(self, laparams):
-        LTTextBox.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
        self._objs.sort(key=lambda obj: -obj.x1)
        return

-    def get_writing_mode(self):
+    def get_writing_mode(self) -> str:
        return 'tb-rl'


-class LTTextGroup(LTTextContainer):
-    def __init__(self, objs):
-        LTTextContainer.__init__(self)
+TextGroupElement = Union[LTTextBox, "LTTextGroup"]
+
+
+class LTTextGroup(LTTextContainer[TextGroupElement]):
+    def __init__(self, objs: Iterable[TextGroupElement]) -> None:
+        super().__init__()
        self.extend(objs)
        return


 class LTTextGroupLRTB(LTTextGroup):
-    def analyze(self, laparams):
-        LTTextGroup.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
+        assert laparams.boxes_flow is not None
+        boxes_flow = laparams.boxes_flow
        # reorder the objects from top-left to bottom-right.
        self._objs.sort(
-            key=lambda obj: (1 - laparams.boxes_flow) * obj.x0
-            - (1 + laparams.boxes_flow) * (obj.y0 + obj.y1))
+            key=lambda obj: (1 - boxes_flow) * obj.x0
+            - (1 + boxes_flow) * (obj.y0 + obj.y1))
        return


 class LTTextGroupTBRL(LTTextGroup):
-    def analyze(self, laparams):
-        LTTextGroup.analyze(self, laparams)
+    def analyze(self, laparams: LAParams) -> None:
+        super().analyze(laparams)
+        assert laparams.boxes_flow is not None
+        boxes_flow = laparams.boxes_flow
        # reorder the objects from top-right to bottom-left.
        self._objs.sort(
-            key=lambda obj: - (1 + laparams.boxes_flow) * (obj.x0 + obj.x1)
-                            - (1 - laparams.boxes_flow) * obj.y1)
+            key=lambda obj: - (1 + boxes_flow) * (obj.x0 + obj.x1)
+                            - (1 - boxes_flow) * obj.y1)
        return


-class LTLayoutContainer(LTContainer):
-    def __init__(self, bbox):
+class LTLayoutContainer(LTContainer[LTComponent]):
+    def __init__(self, bbox: Rect) -> None:
        LTContainer.__init__(self, bbox)
-        self.groups = None
+        self.groups: Optional[List[LTTextGroup]] = None
        return

    # group_objects: group text object to textlines.
-    def group_objects(self, laparams, objs):
+    def group_objects(
+        self,
+        laparams: LAParams,
+        objs: Iterable[LTComponent]
+    ) -> Iterator[LTTextLine]:
        obj0 = None
        line = None
        for obj1 in objs:
@ -667,15 +780,20 @@ class LTLayoutContainer(LTContainer):
            obj0 = obj1
        if line is None:
            line = LTTextLineHorizontal(laparams.word_margin)
+            assert obj0 is not None
            line.add(obj0)
        yield line
        return

-    def group_textlines(self, laparams, lines):
+    def group_textlines(
+        self,
+        laparams: LAParams,
+        lines: Iterable[LTTextLine]
+    ) -> Iterator[LTTextBox]:
        """Group neighboring lines to textboxes"""
-        plane = Plane(self.bbox)
+        plane: Plane[LTTextLine] = Plane(self.bbox)
        plane.extend(lines)
-        boxes = {}
+        boxes: Dict[LTTextLine, LTTextBox] = {}
        for line in lines:
            neighbors = line.find_neighbors(plane, laparams.line_margin)
            members = [line]
@ -684,7 +802,7 @@ class LTLayoutContainer(LTContainer):
                if obj1 in boxes:
                    members.extend(boxes.pop(obj1))
            if isinstance(line, LTTextLineHorizontal):
-                box = LTTextBoxHorizontal()
+                box: LTTextBox = LTTextBoxHorizontal()
            else:
                box = LTTextBoxVertical()
            for obj in uniq(members):
@ -702,7 +820,11 @@ class LTLayoutContainer(LTContainer):
                yield box
        return

-    def group_textboxes(self, laparams, boxes):
+    def group_textboxes(
+        self,
+        laparams: LAParams,
+        boxes: Sequence[LTTextBox]
+    ) -> List[LTTextGroup]:
        """Group textboxes hierarchically.

        Get pair-wise distances, via dist func defined below, and then merge
@ -718,10 +840,13 @@ class LTLayoutContainer(LTContainer):

        :param laparams: LAParams object.
        :param boxes: All textbox objects to be grouped.
-        :return: a list that has only one element, the final top level textbox.
+        :return: a list that has only one element, the final top level group.
        """

-        def dist(obj1, obj2):
+        ElementT = Union[LTTextBox, LTTextGroup]
+        plane: Plane[ElementT] = Plane(self.bbox)
+
+        def dist(obj1: LTComponent, obj2: LTComponent) -> float:
            """A distance function between two TextBoxes.

            Consider the bounding rectangle for obj1 and obj2.
@ -740,7 +865,7 @@ class LTLayoutContainer(LTContainer):
            return (x1 - x0) * (y1 - y0) \
                - obj1.width*obj1.height - obj2.width*obj2.height

-        def isany(obj1, obj2):
+        def isany(obj1: ElementT, obj2: ElementT) -> Set[ElementT]:
            """Check if there's any other object between obj1 and obj2."""
            x0 = min(obj1.x0, obj2.x0)
            y0 = min(obj1.y0, obj2.y0)
@ -749,16 +874,15 @@ class LTLayoutContainer(LTContainer):
            objs = set(plane.find((x0, y0, x1, y1)))
            return objs.difference((obj1, obj2))

-        dists = []
+        dists: List[Tuple[bool, float, int, int, ElementT, ElementT]] = []
        for i in range(len(boxes)):
-            obj1 = boxes[i]
+            box1 = boxes[i]
            for j in range(i+1, len(boxes)):
-                obj2 = boxes[j]
-                dists.append((False, dist(obj1, obj2), id(obj1), id(obj2),
-                              obj1, obj2))
+                box2 = boxes[j]
+                dists.append((False, dist(box1, box2), id(box1), id(box2),
+                              box1, box2))
        heapq.heapify(dists)

-        plane = Plane(self.bbox)
        plane.extend(boxes)
        done = set()
        while len(dists) > 0:
@ -770,7 +894,7 @@ class LTLayoutContainer(LTContainer):
                    continue
                if isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or \
                        isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL)):
-                    group = LTTextGroupTBRL([obj1, obj2])
+                    group: LTTextGroup = LTTextGroupTBRL([obj1, obj2])
                else:
                    group = LTTextGroupLRTB([obj1, obj2])
                plane.remove(obj1)
@ -781,9 +905,10 @@ class LTLayoutContainer(LTContainer):
                    heapq.heappush(dists, (False, dist(group, other),
                                           id(group), id(other), group, other))
                plane.add(group)
-        return list(plane)
+        # By now only groups are in the plane
+        return list(cast(LTTextGroup, g) for g in plane)

-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
        # textobjs is a list of LTChar objects, i.e.
        # it has all the individual characters in the page.
        (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar),
@ -801,7 +926,7 @@ class LTLayoutContainer(LTContainer):
            for textbox in textboxes:
                textbox.analyze(laparams)

-            def getkey(box):
+            def getkey(box: LTTextBox) -> Tuple[int, float, float]:
                if isinstance(box, LTTextBoxVertical):
                    return (0, -box.x1, -box.y0)
                else:
@ -814,7 +939,8 @@ class LTLayoutContainer(LTContainer):
                group.analyze(laparams)
                assigner.run(group)
            textboxes.sort(key=lambda box: box.index)
-        self._objs = textboxes + otherobjs + empties
+        self._objs = (cast(List[LTComponent], textboxes) + otherobjs
+                      + cast(List[LTComponent], empties))
        return


@ -826,7 +952,7 @@ class LTFigure(LTLayoutContainer):
    recursively.
    """

-    def __init__(self, name, bbox, matrix):
+    def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None:
        self.name = name
        self.matrix = matrix
        (x, y, w, h) = bbox
@ -835,12 +961,12 @@ class LTFigure(LTLayoutContainer):
        LTLayoutContainer.__init__(self, bbox)
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s(%s) %s matrix=%s>' %
                (self.__class__.__name__, self.name,
                 bbox2str(self.bbox), matrix2str(self.matrix)))

-    def analyze(self, laparams):
+    def analyze(self, laparams: LAParams) -> None:
        if not laparams.all_texts:
            return
        LTLayoutContainer.analyze(self, laparams)
@ -854,13 +980,13 @@ class LTPage(LTLayoutContainer):
    LTCurve and LTLine.
    """

-    def __init__(self, pageid, bbox, rotate=0):
+    def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None:
        LTLayoutContainer.__init__(self, bbox)
        self.pageid = pageid
        self.rotate = rotate
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<%s(%r) %s rotate=%r>' %
                (self.__class__.__name__, self.pageid,
                 bbox2str(self.bbox), self.rotate))
--- a/pdfminer/lzw.py
+++ b/pdfminer/lzw.py
@ -1,5 +1,6 @@
 from io import BytesIO
 import logging
+from typing import BinaryIO, Iterator, List, Optional, cast


 logger = logging.getLogger(__name__)
@ -11,16 +12,17 @@ class CorruptDataError(Exception):

 class LZWDecoder:

-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO) -> None:
        self.fp = fp
        self.buff = 0
        self.bpos = 8
        self.nbits = 9
-        self.table = None
-        self.prevbuf = None
+        # NB: self.table stores None only in indices 256 and 257
+        self.table: Optional[List[Optional[bytes]]] = None
+        self.prevbuf: Optional[bytes] = None
        return

-    def readbits(self, bits):
+    def readbits(self, bits: int) -> int:
        v = 0
        while 1:
            # the number of remaining bits we can get from the current buffer.
@ -45,7 +47,7 @@ class LZWDecoder:
                self.bpos = 0
        return v

-    def feed(self, code):
+    def feed(self, code: int) -> bytes:
        x = b''
        if code == 256:
            self.table = [bytes((c,)) for c in range(256)]  # 0-255
@ -56,14 +58,16 @@ class LZWDecoder:
        elif code == 257:
            pass
        elif not self.prevbuf:
-            x = self.prevbuf = self.table[code]
+            assert self.table is not None
+            x = self.prevbuf = cast(bytes, self.table[code])  # assume not None
        else:
+            assert self.table is not None
            if code < len(self.table):
-                x = self.table[code]
+                x = cast(bytes, self.table[code])  # assume not None
                self.table.append(self.prevbuf+x[:1])
            elif code == len(self.table):
                self.table.append(self.prevbuf+self.prevbuf[:1])
-                x = self.table[code]
+                x = cast(bytes, self.table[code])
            else:
                raise CorruptDataError
            table_length = len(self.table)
@ -76,7 +80,7 @@ class LZWDecoder:
            self.prevbuf = x
        return x

-    def run(self):
+    def run(self) -> Iterator[bytes]:
        while 1:
            try:
                code = self.readbits(self.nbits)
@ -88,12 +92,13 @@ class LZWDecoder:
                # just ignore corrupt data and stop yielding there
                break
            yield x
+            assert self.table is not None
            logger.debug('nbits=%d, code=%d, output=%r, table=%r'
                         % (self.nbits, code, x, self.table[258:]))
        return


-def lzwdecode(data):
+def lzwdecode(data: bytes) -> bytes:
    fp = BytesIO(data)
    s = LZWDecoder(fp).run()
    return b''.join(s)
--- a/pdfminer/pdfcolor.py
+++ b/pdfminer/pdfcolor.py
@ -1,4 +1,5 @@
 import collections
+from typing import Dict
 from .psparser import LIT


@ -9,17 +10,17 @@ LITERAL_DEVICE_CMYK = LIT('DeviceCMYK')

 class PDFColorSpace:

-    def __init__(self, name, ncomponents):
+    def __init__(self, name: str, ncomponents: int) -> None:
        self.name = name
        self.ncomponents = ncomponents
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFColorSpace: %s, ncomponents=%d>' % \
               (self.name, self.ncomponents)


-PREDEFINED_COLORSPACE = collections.OrderedDict()
+PREDEFINED_COLORSPACE: Dict[str, PDFColorSpace] = collections.OrderedDict()

 for (name, n) in [
    ('DeviceGray', 1),  # default value first
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@ -1,66 +1,116 @@
+from pdfminer.psparser import PSLiteral
+from typing import (BinaryIO, Iterable, List, Optional, Sequence,
+                    TYPE_CHECKING, Union, cast)
 from . import utils
+from .utils import Matrix, Point, Rect, PathSegment
+from .pdfcolor import PDFColorSpace
+from .pdffont import PDFFont
 from .pdffont import PDFUnicodeNotDefined
+from .pdfpage import PDFPage
+from .pdftypes import PDFStream
+
+if TYPE_CHECKING:
+    from .pdfinterp import PDFGraphicState
+    from .pdfinterp import PDFResourceManager
+    from .pdfinterp import PDFTextState
+    from .pdfinterp import PDFStackT
+
+
+PDFTextSeq = Iterable[Union[int, float, bytes]]


 class PDFDevice:
    """Translate the output of PDFPageInterpreter to the output that is needed
    """

-    def __init__(self, rsrcmgr):
+    def __init__(self, rsrcmgr: "PDFResourceManager") -> None:
        self.rsrcmgr = rsrcmgr
-        self.ctm = None
+        self.ctm: Optional[Matrix] = None
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFDevice>'

-    def __enter__(self):
+    def __enter__(self) -> "PDFDevice":
        return self

-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: object,
+        exc_val: object,
+        exc_tb: object
+    ) -> None:
        self.close()

-    def close(self):
+    def close(self) -> None:
        return

-    def set_ctm(self, ctm):
+    def set_ctm(self, ctm: Matrix) -> None:
        self.ctm = ctm
        return

-    def begin_tag(self, tag, props=None):
+    def begin_tag(
+        self,
+        tag: PSLiteral,
+        props: Optional["PDFStackT"] = None
+    ) -> None:
        return

-    def end_tag(self):
+    def end_tag(self) -> None:
        return

-    def do_tag(self, tag, props=None):
+    def do_tag(
+        self,
+        tag: PSLiteral,
+        props: Optional["PDFStackT"] = None
+    ) -> None:
        return

-    def begin_page(self, page, ctm):
+    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
        return

-    def end_page(self, page):
+    def end_page(self, page: PDFPage) -> None:
        return

-    def begin_figure(self, name, bbox, matrix):
+    def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
        return

-    def end_figure(self, name):
+    def end_figure(self, name: str) -> None:
        return

-    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
+    def paint_path(
+        self,
+        graphicstate: "PDFGraphicState",
+        stroke: bool,
+        fill: bool,
+        evenodd: bool,
+        path: Sequence[PathSegment]
+    ) -> None:
        return

-    def render_image(self, name, stream):
+    def render_image(self, name: str, stream: PDFStream) -> None:
        return

-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(
+        self,
+        textstate: "PDFTextState",
+        seq: PDFTextSeq,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> None:
        return


 class PDFTextDevice(PDFDevice):

-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(
+        self,
+        textstate: "PDFTextState",
+        seq: PDFTextSeq,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> None:
+        assert self.ctm is not None
        matrix = utils.mult_matrix(textstate.matrix, self.ctm)
        font = textstate.font
        fontsize = textstate.fontsize
@ -68,6 +118,7 @@ class PDFTextDevice(PDFDevice):
        charspace = textstate.charspace * scaling
        wordspace = textstate.wordspace * scaling
        rise = textstate.rise
+        assert font is not None
        if font.is_multibyte():
            wordspace = 0
        dxscale = .001 * fontsize * scaling
@ -83,13 +134,25 @@ class PDFTextDevice(PDFDevice):
                graphicstate)
        return

-    def render_string_horizontal(self, seq, matrix, pos,
-                                 font, fontsize, scaling, charspace, wordspace,
-                                 rise, dxscale, ncs, graphicstate):
+    def render_string_horizontal(
+        self,
+        seq: PDFTextSeq,
+        matrix: Matrix,
+        pos: Point,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        charspace: float,
+        wordspace: float,
+        rise: float,
+        dxscale: float,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> Point:
        (x, y) = pos
        needcharspace = False
        for obj in seq:
-            if utils.isnumber(obj):
+            if isinstance(obj, (int, float)):
                x -= obj*dxscale
                needcharspace = True
            else:
@ -104,13 +167,25 @@ class PDFTextDevice(PDFDevice):
                    needcharspace = True
        return (x, y)

-    def render_string_vertical(self, seq, matrix, pos,
-                               font, fontsize, scaling, charspace, wordspace,
-                               rise, dxscale, ncs, graphicstate):
+    def render_string_vertical(
+        self,
+        seq: PDFTextSeq,
+        matrix: Matrix,
+        pos: Point,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        charspace: float,
+        wordspace: float,
+        rise: float,
+        dxscale: float,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> Point:
        (x, y) = pos
        needcharspace = False
        for obj in seq:
-            if utils.isnumber(obj):
+            if isinstance(obj, (int, float)):
                y -= obj*dxscale
                needcharspace = True
            else:
@ -125,23 +200,44 @@ class PDFTextDevice(PDFDevice):
                    needcharspace = True
        return (x, y)

-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
-                    graphicstate):
+    def render_char(
+        self,
+        matrix: Matrix,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        rise: float,
+        cid: int,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> float:
        return 0


 class TagExtractor(PDFDevice):

-    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        outfp: BinaryIO,
+        codec: str = 'utf-8'
+    ) -> None:
        PDFDevice.__init__(self, rsrcmgr)
        self.outfp = outfp
        self.codec = codec
        self.pageno = 0
-        self._stack = []
+        self._stack: List[PSLiteral] = []
        return

-    def render_string(self, textstate, seq, ncs, graphicstate):
+    def render_string(
+        self,
+        textstate: "PDFTextState",
+        seq: PDFTextSeq,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState"
+    ) -> None:
        font = textstate.font
+        assert font is not None
        text = ''
        for obj in seq:
            if isinstance(obj, str):
@ -158,40 +254,42 @@ class TagExtractor(PDFDevice):
        self._write(utils.enc(text))
        return

-    def begin_page(self, page, ctm):
+    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
        output = '<page id="%s" bbox="%s" rotate="%d">' %\
                 (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
        self._write(output)
        return

-    def end_page(self, page):
+    def end_page(self, page: PDFPage) -> None:
        self._write('</page>\n')
        self.pageno += 1
        return

-    def begin_tag(self, tag, props=None):
+    def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
+                  ) -> None:
        s = ''
        if isinstance(props, dict):
            s = ''.join([
                ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
                for (k, v) in sorted(props.items())
            ])
-        out_s = '<{}{}>'.format(utils.enc(tag.name), s)
+        out_s = '<{}{}>'.format(utils.enc(cast(str, tag.name)), s)
        self._write(out_s)
        self._stack.append(tag)
        return

-    def end_tag(self):
+    def end_tag(self) -> None:
        assert self._stack, str(self.pageno)
        tag = self._stack.pop(-1)
-        out_s = '</%s>' % utils.enc(tag.name)
+        out_s = '</%s>' % utils.enc(cast(str, tag.name))
        self._write(out_s)
        return

-    def do_tag(self, tag, props=None):
+    def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None
+               ) -> None:
        self.begin_tag(tag, props)
        self._stack.pop(-1)
        return

-    def _write(self, s: str):
+    def _write(self, s: str) -> None:
        self.outfp.write(s.encode(self.codec))
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@ -2,16 +2,18 @@ import logging
 import re
 import struct
 from hashlib import sha256, md5, sha384, sha512
+from typing import (Any, Callable, Dict, Iterable, Iterator, KeysView, List,
+                    Optional, Sequence, Tuple, Type, Union, cast)

 from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes

 from . import settings
 from .arcfour import Arcfour
-from .pdfparser import PDFSyntaxError, PDFStreamParser
-from .pdftypes import PDFException, uint_value, PDFTypeError, PDFStream, \
+from .pdfparser import PDFSyntaxError, PDFParser, PDFStreamParser
+from .pdftypes import DecipherCallable, PDFException, PDFTypeError, PDFStream,\
    PDFObjectNotFound, decipher_all, int_value, str_value, list_value, \
-    dict_value, stream_value
+    uint_value, dict_value, stream_value
 from .psparser import PSEOF, literal_name, LIT, KWD
 from .utils import choplist, nunpack, decode_text

@ -51,7 +53,7 @@ class PDFTextExtractionNotAllowed(PDFEncryptionError):


 class PDFTextExtractionNotAllowedError(PDFTextExtractionNotAllowed):
-    def __init__(self, *args):
+    def __init__(self, *args: object) -> None:
        from warnings import warn
        warn('PDFTextExtractionNotAllowedError will be removed in the future. '
             'Use PDFTextExtractionNotAllowed instead.', DeprecationWarning)
@ -65,31 +67,33 @@ LITERAL_CATALOG = LIT('Catalog')


 class PDFBaseXRef:
-
-    def get_trailer(self):
+    def get_trailer(self) -> Dict[str, Any]:
        raise NotImplementedError

-    def get_objids(self):
+    def get_objids(self) -> Iterable[int]:
        return []

    # Must return
    #     (strmid, index, genno)
    #  or (None, pos, genno)
-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
        raise KeyError(objid)

+    def load(self, parser: PDFParser) -> None:
+        raise NotImplementedError
+

 class PDFXRef(PDFBaseXRef):

-    def __init__(self):
-        self.offsets = {}
-        self.trailer = {}
+    def __init__(self) -> None:
+        self.offsets: Dict[int, Tuple[Optional[int], int, int]] = {}
+        self.trailer: Dict[str, Any] = {}
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFXRef: offsets=%r>' % (self.offsets.keys())

-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
        while True:
            try:
                (pos, line) = parser.nextline()
@ -123,15 +127,15 @@ class PDFXRef(PDFBaseXRef):
                    error_msg = 'Invalid XRef format: {!r}, line={!r}'\
                        .format(parser, line)
                    raise PDFNoValidXRef(error_msg)
-                (pos, genno, use) = f
-                if use != b'n':
+                (pos_b, genno_b, use_b) = f
+                if use_b != b'n':
                    continue
-                self.offsets[objid] = (None, int(pos), int(genno))
+                self.offsets[objid] = (None, int(pos_b), int(genno_b))
        log.info('xref objects: %r', self.offsets)
        self.load_trailer(parser)
        return

-    def load_trailer(self, parser):
+    def load_trailer(self, parser: PDFParser) -> None:
        try:
            (_, kwd) = parser.nexttoken()
            assert kwd is KWD(b'trailer'), str(kwd)
@ -145,13 +149,13 @@ class PDFXRef(PDFBaseXRef):
        log.debug('trailer=%r', self.trailer)
        return

-    def get_trailer(self):
+    def get_trailer(self) -> Dict[str, Any]:
        return self.trailer

-    def get_objids(self):
+    def get_objids(self) -> KeysView[int]:
        return self.offsets.keys()

-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
        try:
            return self.offsets[objid]
        except KeyError:
@ -160,30 +164,30 @@ class PDFXRef(PDFBaseXRef):

 class PDFXRefFallback(PDFXRef):

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFXRefFallback: offsets=%r>' % (self.offsets.keys())

    PDFOBJ_CUE = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')

-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
        parser.seek(0)
        while 1:
            try:
-                (pos, line) = parser.nextline()
+                (pos, line_bytes) = parser.nextline()
            except PSEOF:
                break
-            if line.startswith(b'trailer'):
+            if line_bytes.startswith(b'trailer'):
                parser.seek(pos)
                self.load_trailer(parser)
                log.info('trailer: %r', self.trailer)
                break
-            line = line.decode('latin-1')  # default pdf encoding
+            line = line_bytes.decode('latin-1')  # default pdf encoding
            m = self.PDFOBJ_CUE.match(line)
            if not m:
                continue
-            (objid, genno) = m.groups()
-            objid = int(objid)
-            genno = int(genno)
+            (objid_s, genno_s) = m.groups()
+            objid = int(objid_s)
+            genno = int(genno_s)
            self.offsets[objid] = (None, pos, genno)
            # expand ObjStm.
            parser.seek(pos)
@ -198,11 +202,11 @@ class PDFXRefFallback(PDFXRef):
                        raise PDFSyntaxError('N is not defined: %r' % stream)
                    n = 0
                parser1 = PDFStreamParser(stream.get_data())
-                objs = []
+                objs: List[int] = []
                try:
                    while 1:
                        (_, obj) = parser1.nextobject()
-                        objs.append(obj)
+                        objs.append(cast(int, obj))
                except PSEOF:
                    pass
                n = min(n, len(objs)//2)
@ -214,17 +218,19 @@ class PDFXRefFallback(PDFXRef):

 class PDFXRefStream(PDFBaseXRef):

-    def __init__(self):
-        self.data = None
-        self.entlen = None
-        self.fl1 = self.fl2 = self.fl3 = None
-        self.ranges = []
+    def __init__(self) -> None:
+        self.data: Optional[bytes] = None
+        self.entlen: Optional[int] = None
+        self.fl1: Optional[int] = None
+        self.fl2: Optional[int] = None
+        self.fl3: Optional[int] = None
+        self.ranges: List[Tuple[int, int]] = []
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFXRefStream: ranges=%r>' % (self.ranges)

-    def load(self, parser):
+    def load(self, parser: PDFParser) -> None:
        (_, objid) = parser.nexttoken()  # ignored
        (_, genno) = parser.nexttoken()  # ignored
        (_, kwd) = parser.nexttoken()
@ -236,8 +242,11 @@ class PDFXRefStream(PDFBaseXRef):
        index_array = stream.get('Index', (0, size))
        if len(index_array) % 2 != 0:
            raise PDFSyntaxError('Invalid index number')
-        self.ranges.extend(choplist(2, index_array))
+        self.ranges.extend(cast(Iterator[Tuple[int, int]],
+                                choplist(2, index_array)))
        (self.fl1, self.fl2, self.fl3) = stream['W']
+        assert (self.fl1 is not None and self.fl2 is not None
+                and self.fl3 is not None)
        self.data = stream.get_data()
        self.entlen = self.fl1+self.fl2+self.fl3
        self.trailer = stream.attrs
@ -246,12 +255,14 @@ class PDFXRefStream(PDFBaseXRef):
                 self.fl1, self.fl2, self.fl3)
        return

-    def get_trailer(self):
+    def get_trailer(self) -> Dict[str, Any]:
        return self.trailer

-    def get_objids(self):
+    def get_objids(self) -> Iterator[int]:
        for (start, nobjs) in self.ranges:
            for i in range(nobjs):
+                assert self.entlen is not None
+                assert self.data is not None
                offset = self.entlen * i
                ent = self.data[offset:offset+self.entlen]
                f1 = nunpack(ent[:self.fl1], 1)
@ -259,7 +270,7 @@ class PDFXRefStream(PDFBaseXRef):
                    yield start+i
        return

-    def get_pos(self, objid):
+    def get_pos(self, objid: int) -> Tuple[Optional[int], int, int]:
        index = 0
        for (start, nobjs) in self.ranges:
            if start <= objid and objid < start+nobjs:
@ -269,6 +280,10 @@ class PDFXRefStream(PDFBaseXRef):
                index += nobjs
        else:
            raise KeyError(objid)
+        assert self.entlen is not None
+        assert self.data is not None
+        assert (self.fl1 is not None and self.fl2 is not None
+                and self.fl3 is not None)
        offset = self.entlen * index
        ent = self.data[offset:offset+self.entlen]
        f1 = nunpack(ent[:self.fl1], 1)
@ -287,16 +302,21 @@ class PDFStandardSecurityHandler:

    PASSWORD_PADDING = (b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08'
                        b'..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz')
-    supported_revisions = (2, 3)
+    supported_revisions: Tuple[int, ...] = (2, 3)

-    def __init__(self, docid, param, password=''):
+    def __init__(
+        self,
+        docid: Sequence[bytes],
+        param: Dict[str, Any],
+        password: str = ''
+    ) -> None:
        self.docid = docid
        self.param = param
        self.password = password
        self.init()
        return

-    def init(self):
+    def init(self) -> None:
        self.init_params()
        if self.r not in self.supported_revisions:
            error_msg = 'Unsupported revision: param=%r' % self.param
@ -304,7 +324,7 @@ class PDFStandardSecurityHandler:
        self.init_key()
        return

-    def init_params(self):
+    def init_params(self) -> None:
        self.v = int_value(self.param.get('V', 0))
        self.r = int_value(self.param['R'])
        self.p = uint_value(self.param['P'], 32)
@ -313,22 +333,22 @@ class PDFStandardSecurityHandler:
        self.length = int_value(self.param.get('Length', 40))
        return

-    def init_key(self):
+    def init_key(self) -> None:
        self.key = self.authenticate(self.password)
        if self.key is None:
            raise PDFPasswordIncorrect
        return

-    def is_printable(self):
+    def is_printable(self) -> bool:
        return bool(self.p & 4)

-    def is_modifiable(self):
+    def is_modifiable(self) -> bool:
        return bool(self.p & 8)

-    def is_extractable(self):
+    def is_extractable(self) -> bool:
        return bool(self.p & 16)

-    def compute_u(self, key):
+    def compute_u(self, key: bytes) -> bytes:
        if self.r == 2:
            # Algorithm 3.4
            return Arcfour(key).encrypt(self.PASSWORD_PADDING)  # 2
@ -343,7 +363,7 @@ class PDFStandardSecurityHandler:
            result += result  # 6
            return result

-    def compute_encryption_key(self, password):
+    def compute_encryption_key(self, password: bytes) -> bytes:
        # Algorithm 3.2
        password = (password + self.PASSWORD_PADDING)[:32]  # 1
        hash = md5(password)  # 2
@ -352,7 +372,7 @@ class PDFStandardSecurityHandler:
        hash.update(struct.pack('<L', self.p))  # 4
        hash.update(self.docid[0])  # 5
        if self.r >= 4:
-            if not self.encrypt_metadata:
+            if not cast(PDFStandardSecurityHandlerV4, self).encrypt_metadata:
                hash.update(b'\xff\xff\xff\xff')
        result = hash.digest()
        n = 5
@ -362,28 +382,28 @@ class PDFStandardSecurityHandler:
                result = md5(result[:n]).digest()
        return result[:n]

-    def authenticate(self, password):
-        password = password.encode("latin1")
-        key = self.authenticate_user_password(password)
+    def authenticate(self, password: str) -> Optional[bytes]:
+        password_bytes = password.encode("latin1")
+        key = self.authenticate_user_password(password_bytes)
        if key is None:
-            key = self.authenticate_owner_password(password)
+            key = self.authenticate_owner_password(password_bytes)
        return key

-    def authenticate_user_password(self, password):
+    def authenticate_user_password(self, password: bytes) -> Optional[bytes]:
        key = self.compute_encryption_key(password)
        if self.verify_encryption_key(key):
            return key
        else:
            return None

-    def verify_encryption_key(self, key):
+    def verify_encryption_key(self, key: bytes) -> bool:
        # Algorithm 3.6
        u = self.compute_u(key)
        if self.r == 2:
            return u == self.u
        return u[:16] == self.u[:16]

-    def authenticate_owner_password(self, password):
+    def authenticate_owner_password(self, password: bytes) -> Optional[bytes]:
        # Algorithm 3.7
        password = (password + self.PASSWORD_PADDING)[:32]
        hash = md5(password)
@ -403,10 +423,17 @@ class PDFStandardSecurityHandler:
                user_password = Arcfour(k).decrypt(user_password)
        return self.authenticate_user_password(user_password)

-    def decrypt(self, objid, genno, data, attrs=None):
+    def decrypt(
+        self,
+        objid: int,
+        genno: int,
+        data: bytes,
+        attrs: Optional[Dict[str, Any]] = None
+    ) -> bytes:
        return self.decrypt_rc4(objid, genno, data)

-    def decrypt_rc4(self, objid, genno, data):
+    def decrypt_rc4(self, objid: int, genno: int, data: bytes) -> bytes:
+        assert self.key is not None
        key = self.key + struct.pack('<L', objid)[:3] \
            + struct.pack('<L', genno)[:2]
        hash = md5(key)
@ -416,9 +443,9 @@ class PDFStandardSecurityHandler:

 class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):

-    supported_revisions = (4,)
+    supported_revisions: Tuple[int, ...] = (4,)

-    def init_params(self):
+    def init_params(self) -> None:
        super().init_params()
        self.length = 128
        self.cf = dict_value(self.param.get('CF'))
@ -442,7 +469,10 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
            raise PDFEncryptionError(error_msg)
        return

-    def get_cfm(self, name):
+    def get_cfm(
+        self,
+        name: str
+    ) -> Optional[Callable[[int, int, bytes], bytes]]:
        if name == 'V2':
            return self.decrypt_rc4
        elif name == 'AESV2':
@ -450,7 +480,14 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
        else:
            return None

-    def decrypt(self, objid, genno, data, attrs=None, name=None):
+    def decrypt(
+        self,
+        objid: int,
+        genno: int,
+        data: bytes,
+        attrs: Optional[Dict[str, Any]] = None,
+        name: Optional[str] = None
+    ) -> bytes:
        if not self.encrypt_metadata and attrs is not None:
            t = attrs.get('Type')
            if t is not None and literal_name(t) == 'Metadata':
@ -459,10 +496,11 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
            name = self.strf
        return self.cfm[name](objid, genno, data)

-    def decrypt_identity(self, objid, genno, data):
+    def decrypt_identity(self, objid: int, genno: int, data: bytes) -> bytes:
        return data

-    def decrypt_aes128(self, objid, genno, data):
+    def decrypt_aes128(self, objid: int, genno: int, data: bytes) -> bytes:
+        assert self.key is not None
        key = self.key + struct.pack('<L', objid)[:3] \
            + struct.pack('<L', genno)[:2] + b'sAlT'
        hash = md5(key)
@ -471,15 +509,15 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
        ciphertext = data[16:]
        cipher = Cipher(algorithms.AES(key),
                        modes.CBC(initialization_vector),
-                        backend=default_backend())
-        return cipher.decryptor().update(ciphertext)
+                        backend=default_backend())  # type: ignore
+        return cipher.decryptor().update(ciphertext)  # type: ignore


 class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):

    supported_revisions = (5, 6)

-    def init_params(self):
+    def init_params(self) -> None:
        super().init_params()
        self.length = 256
        self.oe = str_value(self.param['OE'])
@ -492,31 +530,34 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        self.u_key_salt = self.u[40:]
        return

-    def get_cfm(self, name):
+    def get_cfm(
+        self,
+        name: str
+    ) -> Optional[Callable[[int, int, bytes], bytes]]:
        if name == 'AESV3':
            return self.decrypt_aes256
        else:
            return None

-    def authenticate(self, password):
-        password = self._normalize_password(password)
-        hash = self._password_hash(password, self.o_validation_salt, self.u)
+    def authenticate(self, password: str) -> Optional[bytes]:
+        password_b = self._normalize_password(password)
+        hash = self._password_hash(password_b, self.o_validation_salt, self.u)
        if hash == self.o_hash:
-            hash = self._password_hash(password, self.o_key_salt, self.u)
+            hash = self._password_hash(password_b, self.o_key_salt, self.u)
            cipher = Cipher(algorithms.AES(hash),
                            modes.CBC(b'\0' * 16),
-                            backend=default_backend())
-            return cipher.decryptor().update(self.oe)
-        hash = self._password_hash(password, self.u_validation_salt)
+                            backend=default_backend())  # type: ignore
+            return cipher.decryptor().update(self.oe)  # type: ignore
+        hash = self._password_hash(password_b, self.u_validation_salt)
        if hash == self.u_hash:
-            hash = self._password_hash(password, self.u_key_salt)
+            hash = self._password_hash(password_b, self.u_key_salt)
            cipher = Cipher(algorithms.AES(hash),
                            modes.CBC(b'\0' * 16),
-                            backend=default_backend())
-            return cipher.decryptor().update(self.ue)
+                            backend=default_backend())  # type: ignore
+            return cipher.decryptor().update(self.ue)  # type: ignore
        return None

-    def _normalize_password(self, password):
+    def _normalize_password(self, password: str) -> bytes:
        if self.r == 6:
            # saslprep expects non-empty strings, apparently
            if not password:
@ -525,7 +566,12 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
            password = saslprep(password)
        return password.encode('utf-8')[:127]

-    def _password_hash(self, password, salt, vector=None):
+    def _password_hash(
+        self,
+        password: bytes,
+        salt: bytes,
+        vector: Optional[bytes] = None
+    ) -> bytes:
        """
        Compute password hash depending on revision number
        """
@ -533,7 +579,12 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
            return self._r5_password(password, salt, vector)
        return self._r6_password(password, salt[0:8], vector)

-    def _r5_password(self, password, salt, vector):
+    def _r5_password(
+        self,
+        password: bytes,
+        salt: bytes,
+        vector: Optional[bytes] = None
+    ) -> bytes:
        """
        Compute the password for revision 5
        """
@ -543,7 +594,12 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
            hash.update(vector)
        return hash.digest()

-    def _r6_password(self, password, salt, vector):
+    def _r6_password(
+        self,
+        password: bytes,
+        salt: bytes,
+        vector: Optional[bytes] = None
+    ) -> bytes:
        """
        Compute the password for revision 6
        """
@ -568,22 +624,28 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
        return k[:32]

    @staticmethod
-    def _bytes_mod_3(input_bytes):
+    def _bytes_mod_3(input_bytes: bytes) -> int:
        # 256 is 1 mod 3, so we can just sum 'em
        return sum(b % 3 for b in input_bytes) % 3

-    def _aes_cbc_encrypt(self, key, iv, data):
+    def _aes_cbc_encrypt(
+        self,
+        key: bytes,
+        iv: bytes,
+        data: bytes
+    ) -> bytes:
        cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
-        encryptor = cipher.encryptor()
-        return encryptor.update(data) + encryptor.finalize()
+        encryptor = cipher.encryptor()  # type: ignore
+        return encryptor.update(data) + encryptor.finalize()  # type: ignore

-    def decrypt_aes256(self, objid, genno, data):
+    def decrypt_aes256(self, objid: int, genno: int, data: bytes) -> bytes:
        initialization_vector = data[:16]
        ciphertext = data[16:]
+        assert self.key is not None
        cipher = Cipher(algorithms.AES(self.key),
                        modes.CBC(initialization_vector),
-                        backend=default_backend())
-        return cipher.decryptor().update(ciphertext)
+                        backend=default_backend())  # type: ignore
+        return cipher.decryptor().update(ciphertext)  # type: ignore


 class PDFDocument:
@ -599,24 +661,30 @@ class PDFDocument:

    """

-    security_handler_registry = {
+    security_handler_registry: Dict[int, Type[PDFStandardSecurityHandler]] = {
        1: PDFStandardSecurityHandler,
        2: PDFStandardSecurityHandler,
        4: PDFStandardSecurityHandlerV4,
        5: PDFStandardSecurityHandlerV5,
    }

-    def __init__(self, parser, password='', caching=True, fallback=True):
+    def __init__(
+        self,
+        parser: PDFParser,
+        password: str = '',
+        caching: bool = True,
+        fallback: bool = True
+    ) -> None:
        "Set the document to use a given PDFParser object."
        self.caching = caching
-        self.xrefs = []
+        self.xrefs: List[PDFBaseXRef] = []
        self.info = []
-        self.catalog = None
-        self.encryption = None
-        self.decipher = None
+        self.catalog: Dict[str, Any] = {}
+        self.encryption: Optional[Tuple[Any, Any]] = None
+        self.decipher: Optional[DecipherCallable] = None
        self._parser = None
-        self._cached_objs = {}
-        self._parsed_objs = {}
+        self._cached_objs: Dict[int, Tuple[object, int]] = {}
+        self._parsed_objs: Dict[int, Tuple[List[object], int]] = {}
        self._parser = parser
        self._parser.set_document(self)
        self.is_printable = self.is_modifiable = self.is_extractable = True
@ -629,9 +697,9 @@ class PDFDocument:
            pass  # fallback = True
        if fallback:
            parser.fallback = True
-            xref = PDFXRefFallback()
-            xref.load(parser)
-            self.xrefs.append(xref)
+            newxref = PDFXRefFallback()
+            newxref.load(parser)
+            self.xrefs.append(newxref)
        for xref in self.xrefs:
            trailer = xref.get_trailer()
            if not trailer:
@ -665,7 +733,8 @@ class PDFDocument:

    # _initialize_password(password=b'')
    #   Perform the initialization with a given password.
-    def _initialize_password(self, password=''):
+    def _initialize_password(self, password: str = '') -> None:
+        assert self.encryption is not None
        (docid, param) = self.encryption
        if literal_name(param.get('Filter')) != 'Standard':
            raise PDFEncryptionError('Unknown filter: param=%r' % param)
@ -678,15 +747,22 @@ class PDFDocument:
        self.is_printable = handler.is_printable()
        self.is_modifiable = handler.is_modifiable()
        self.is_extractable = handler.is_extractable()
+        assert self._parser is not None
        self._parser.fallback = False  # need to read streams with exact length
        return

-    def _getobj_objstm(self, stream, index, objid):
+    def _getobj_objstm(
+        self,
+        stream: PDFStream,
+        index: int,
+        objid: int
+    ) -> object:
        if stream.objid in self._parsed_objs:
            (objs, n) = self._parsed_objs[stream.objid]
        else:
            (objs, n) = self._get_objects(stream)
            if self.caching:
+                assert stream.objid is not None
                self._parsed_objs[stream.objid] = (objs, n)
        i = n*2+index
        try:
@ -695,19 +771,19 @@ class PDFDocument:
            raise PDFSyntaxError('index too big: %r' % index)
        return obj

-    def _get_objects(self, stream):
+    def _get_objects(self, stream: PDFStream) -> Tuple[List[object], int]:
        if stream.get('Type') is not LITERAL_OBJSTM:
            if settings.STRICT:
                raise PDFSyntaxError('Not a stream object: %r' % stream)
        try:
-            n = stream['N']
+            n = cast(int, stream['N'])
        except KeyError:
            if settings.STRICT:
                raise PDFSyntaxError('N is not defined: %r' % stream)
            n = 0
        parser = PDFStreamParser(stream.get_data())
        parser.set_document(self)
-        objs = []
+        objs: List[object] = []
        try:
            while 1:
                (_, obj) = parser.nextobject()
@ -716,7 +792,8 @@ class PDFDocument:
            pass
        return (objs, n)

-    def _getobj_parse(self, pos, objid):
+    def _getobj_parse(self, pos: int, objid: int) -> object:
+        assert self._parser is not None
        self._parser.seek(pos)
        (_, objid1) = self._parser.nexttoken()  # objid
        (_, genno) = self._parser.nexttoken()  # genno
@ -744,7 +821,7 @@ class PDFDocument:
        return obj

    # can raise PDFObjectNotFound
-    def getobj(self, objid):
+    def getobj(self, objid: int) -> object:
        """Get object from PDF

        :raises PDFException if PDFDocument is not initialized
@ -783,11 +860,14 @@ class PDFDocument:
                self._cached_objs[objid] = (obj, genno)
        return obj

-    def get_outlines(self):
+    OutlineType = Tuple[Any, Any, Any, Any, Any]
+
+    def get_outlines(self) -> Iterator[OutlineType]:
        if 'Outlines' not in self.catalog:
            raise PDFNoOutlines

-        def search(entry, level):
+        def search(entry: object, level: int
+                   ) -> Iterator[PDFDocument.OutlineType]:
            entry = dict_value(entry)
            if 'Title' in entry:
                if 'A' in entry or 'Dest' in entry:
@ -803,7 +883,11 @@ class PDFDocument:
            return
        return search(self.catalog['Outlines'], 0)

-    def lookup_name(self, cat, key):
+    def lookup_name(
+        self,
+        cat: str,
+        key: Union[str, bytes]
+    ) -> Any:
        try:
            names = dict_value(self.catalog['Names'])
        except (PDFTypeError, KeyError):
@ -811,14 +895,15 @@ class PDFDocument:
        # may raise KeyError
        d0 = dict_value(names[cat])

-        def lookup(d):
+        def lookup(d: Dict[str, Any]) -> Any:
            if 'Limits' in d:
                (k1, k2) = list_value(d['Limits'])
                if key < k1 or k2 < key:
                    return None
            if 'Names' in d:
                objs = list_value(d['Names'])
-                names = dict(choplist(2, objs))
+                names = dict(cast(Iterator[Tuple[Union[str, bytes], Any]],
+                                  choplist(2, objs)))
                return names[key]
            if 'Kids' in d:
                for c in list_value(d['Kids']):
@ -828,7 +913,7 @@ class PDFDocument:
            raise KeyError((cat, key))
        return lookup(d0)

-    def get_dest(self, name):
+    def get_dest(self, name: Union[str, bytes]) -> Any:
        try:
            # PDF-1.2 or later
            obj = self.lookup_name('Dests', name)
@ -843,7 +928,7 @@ class PDFDocument:
        return obj

    # find_xref
-    def find_xref(self, parser):
+    def find_xref(self, parser: PDFParser) -> int:
        """Internal function used to locate the first XRef."""
        # search the last xref table by scanning the file backwards.
        prev = None
@ -857,10 +942,16 @@ class PDFDocument:
        else:
            raise PDFNoValidXRef('Unexpected EOF')
        log.info('xref found: pos=%r', prev)
+        assert prev is not None
        return int(prev)

    # read xref table
-    def read_xref_from(self, parser, start, xrefs):
+    def read_xref_from(
+        self,
+        parser: PDFParser,
+        start: int,
+        xrefs: List[PDFBaseXRef]
+    ) -> None:
        """Reads XRefs from the given location."""
        parser.seek(start)
        parser.reset()
@ -873,7 +964,7 @@ class PDFDocument:
            # XRefStream: PDF-1.5
            parser.seek(pos)
            parser.reset()
-            xref = PDFXRefStream()
+            xref: PDFBaseXRef = PDFXRefStream()
            xref.load(parser)
        else:
            if token is parser.KEYWORD_XREF:
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@ -2,11 +2,15 @@ import logging
 import struct
 import sys
 from io import BytesIO
+from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, Mapping,
+                    Optional, Tuple, Union, cast, TYPE_CHECKING)

 from . import settings
 from .cmapdb import CMap
+from .cmapdb import CMapBase
 from .cmapdb import CMapDB
 from .cmapdb import CMapParser
+from .cmapdb import UnicodeMap
 from .cmapdb import FileUnicodeMap
 from .encodingdb import EncodingDB
 from .encodingdb import name2unicode
@ -22,52 +26,59 @@ from .pdftypes import stream_value
 from .psparser import KWD
 from .psparser import LIT
 from .psparser import PSEOF
+from .psparser import PSKeyword
 from .psparser import PSLiteral
 from .psparser import PSStackParser
 from .psparser import literal_name
+from .utils import Matrix, Point
+from .utils import Rect
 from .utils import apply_matrix_norm
 from .utils import choplist
-from .utils import isnumber
 from .utils import nunpack

+if TYPE_CHECKING:
+    from .pdfinterp import PDFResourceManager
+
 log = logging.getLogger(__name__)


-def get_widths(seq):
-    widths = {}
-    r = []
+def get_widths(seq: Iterable[object]) -> Dict[int, float]:
+    """Build a mapping of character widths for horizontal writing."""
+    widths: Dict[int, float] = {}
+    r: List[float] = []
    for v in seq:
        if isinstance(v, list):
            if r:
                char1 = r[-1]
                for (i, w) in enumerate(v):
-                    widths[char1+i] = w
+                    widths[cast(int, char1) + i] = w
                r = []
-        elif isnumber(v):
+        elif isinstance(v, (int, float)):  # == utils.isnumber(v)
            r.append(v)
            if len(r) == 3:
                (char1, char2, w) = r
-                for i in range(char1, char2+1):
+                for i in range(cast(int, char1), cast(int, char2) + 1):
                    widths[i] = w
                r = []
    return widths


-def get_widths2(seq):
-    widths = {}
-    r = []
+def get_widths2(seq: Iterable[object]) -> Dict[int, Tuple[float, Point]]:
+    """Build a mapping of character widths for vertical writing."""
+    widths: Dict[int, Tuple[float, Point]] = {}
+    r: List[float] = []
    for v in seq:
        if isinstance(v, list):
            if r:
                char1 = r[-1]
                for (i, (w, vx, vy)) in enumerate(choplist(3, v)):
-                    widths[char1+i] = (w, (vx, vy))
+                    widths[cast(int, char1) + i] = (w, (vx, vy))
                r = []
-        elif isnumber(v):
+        elif isinstance(v, (int, float)):  # == utils.isnumber(v)
            r.append(v)
            if len(r) == 5:
                (char1, char2, w, vx, vy) = r
-                for i in range(char1, char2+1):
+                for i in range(cast(int, char1), cast(int, char2) + 1):
                    widths[i] = (w, (vx, vy))
                r = []
    return widths
@ -76,11 +87,13 @@ def get_widths2(seq):
 class FontMetricsDB:

    @classmethod
-    def get_metrics(cls, fontname):
+    def get_metrics(cls, fontname: str
+                    ) -> Tuple[Dict[str, object], Dict[str, int]]:
        return FONT_METRICS[fontname]


-class Type1FontHeaderParser(PSStackParser):
+# int here means that we're not extending PSStackParser with additional types.
+class Type1FontHeaderParser(PSStackParser[int]):

    KEYWORD_BEGIN = KWD(b'begin')
    KEYWORD_END = KWD(b'end')
@ -91,12 +104,12 @@ class Type1FontHeaderParser(PSStackParser):
    KEYWORD_READONLY = KWD(b'readonly')
    KEYWORD_FOR = KWD(b'for')

-    def __init__(self, data):
+    def __init__(self, data: BinaryIO) -> None:
        PSStackParser.__init__(self, data)
-        self._cid2unicode = {}
+        self._cid2unicode: Dict[int, str] = {}
        return

-    def get_encoding(self):
+    def get_encoding(self) -> Dict[int, str]:
        """Parse the font encoding.

        The Type1 font encoding maps character codes to character names. These
@ -116,12 +129,12 @@ class Type1FontHeaderParser(PSStackParser):
            except PSEOF:
                break
            try:
-                self._cid2unicode[cid] = name2unicode(name)
+                self._cid2unicode[cid] = name2unicode(cast(str, name))
            except KeyError as e:
                log.debug(str(e))
        return self._cid2unicode

-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        if token is self.KEYWORD_PUT:
            ((_, key), (_, value)) = self.pop(2)
            if (isinstance(key, int) and isinstance(value, PSLiteral)):
@ -140,10 +153,10 @@ IDENTITY_ENCODER = {
 }


-def getdict(data):
-    d = {}
+def getdict(data: bytes) -> Dict[int, List[Union[float, int]]]:
+    d: Dict[int, List[Union[float, int]]] = {}
    fp = BytesIO(data)
-    stack = []
+    stack: List[Union[float, int]] = []
    while 1:
        c = fp.read(1)
        if not c:
@ -162,7 +175,9 @@ def getdict(data):
                    if n == 15:
                        loop = False
                    else:
-                        s += NIBBLES[n]
+                        nibble = NIBBLES[n]
+                        assert nibble is not None
+                        s += nibble
            value = float(s)
        elif 32 <= b0 and b0 <= 246:
            value = b0-139
@ -270,9 +285,9 @@ class CFFFont:

    class INDEX:

-        def __init__(self, fp):
+        def __init__(self, fp: BinaryIO) -> None:
            self.fp = fp
-            self.offsets = []
+            self.offsets: List[int] = []
            (count, offsize) = struct.unpack('>HB', self.fp.read(3))
            for i in range(count+1):
                self.offsets.append(nunpack(self.fp.read(offsize)))
@ -280,20 +295,20 @@ class CFFFont:
            self.fp.seek(self.base+self.offsets[-1])
            return

-        def __repr__(self):
+        def __repr__(self) -> str:
            return '<INDEX: size=%d>' % len(self)

-        def __len__(self):
+        def __len__(self) -> int:
            return len(self.offsets)-1

-        def __getitem__(self, i):
+        def __getitem__(self, i: int) -> bytes:
            self.fp.seek(self.base+self.offsets[i])
            return self.fp.read(self.offsets[i+1]-self.offsets[i])

-        def __iter__(self):
+        def __iter__(self) -> Iterator[bytes]:
            return iter(self[i] for i in range(len(self)))

-    def __init__(self, name, fp):
+    def __init__(self, name: str, fp: BinaryIO) -> None:
        self.name = name
        self.fp = fp
        # Header
@ -314,13 +329,13 @@ class CFFFont:
        (encoding_pos,) = self.top_dict.get(16, [0])
        (charstring_pos,) = self.top_dict.get(17, [0])
        # CharStrings
-        self.fp.seek(charstring_pos)
+        self.fp.seek(cast(int, charstring_pos))
        self.charstring = self.INDEX(self.fp)
        self.nglyphs = len(self.charstring)
        # Encodings
        self.code2gid = {}
        self.gid2code = {}
-        self.fp.seek(encoding_pos)
+        self.fp.seek(cast(int, encoding_pos))
        format = self.fp.read(1)
        if format == b'\x00':
            # Format 0
@ -344,17 +359,18 @@ class CFFFont:
        # Charsets
        self.name2gid = {}
        self.gid2name = {}
-        self.fp.seek(charset_pos)
+        self.fp.seek(cast(int, charset_pos))
        format = self.fp.read(1)
        if format == b'\x00':
            # Format 0
            n = self.nglyphs-1
-            for (gid, sid) in enumerate(struct.unpack('>'+'H'*n,
-                                                      self.fp.read(2*n))):
+            for (gid, sid) in enumerate(
+                    cast(Tuple[int, ...],
+                         struct.unpack('>' + 'H' * n, self.fp.read(2 * n)))):
                gid += 1
-                name = self.getstr(sid)
-                self.name2gid[name] = gid
-                self.gid2name[gid] = name
+                sidname = self.getstr(sid)
+                self.name2gid[sidname] = gid
+                self.gid2name[gid] = sidname
        elif format == b'\x01':
            # Format 1
            (n,) = struct.unpack('B', self.fp.read(1))
@ -362,9 +378,9 @@ class CFFFont:
            for i in range(n):
                (first, nleft) = struct.unpack('BB', self.fp.read(2))
                for gid in range(first, first+nleft+1):
-                    name = self.getstr(sid)
-                    self.name2gid[name] = gid
-                    self.gid2name[gid] = name
+                    sidname = self.getstr(sid)
+                    self.name2gid[sidname] = gid
+                    self.gid2name[gid] = sidname
                    sid += 1
        elif format == b'\x02':
            # Format 2
@ -373,7 +389,9 @@ class CFFFont:
            raise ValueError('unsupported charset format: %r' % format)
        return

-    def getstr(self, sid):
+    def getstr(self, sid: int) -> Union[str, bytes]:
+        # This returns str for one of the STANDARD_STRINGS but bytes otherwise,
+        # and appears to be a needless source of type complexity.
        if sid < len(self.STANDARD_STRINGS):
            return self.STANDARD_STRINGS[sid]
        return self.string_index[sid-len(self.STANDARD_STRINGS)]
@ -384,17 +402,19 @@ class TrueTypeFont:
    class CMapNotFound(Exception):
        pass

-    def __init__(self, name, fp):
+    def __init__(self, name: str, fp: BinaryIO) -> None:
        self.name = name
        self.fp = fp
-        self.tables = {}
+        self.tables: Dict[bytes, Tuple[int, int]] = {}
        self.fonttype = fp.read(4)
        try:
-            (ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
+            (ntables, _1, _2, _3) = cast(Tuple[int, int, int, int],
+                                         struct.unpack('>HHHH', fp.read(8)))
            for _ in range(ntables):
-                (name, tsum, offset, length) = struct.unpack('>4sLLL',
-                                                             fp.read(16))
-                self.tables[name] = (offset, length)
+                (name_bytes, tsum, offset, length) = \
+                    cast(Tuple[bytes, int, int, int],
+                         struct.unpack('>4sLLL', fp.read(16)))
+                self.tables[name_bytes] = (offset, length)
        except struct.error:
            # Do not fail if there are not enough bytes to read. Even for
            # corrupted PDFs we would like to get as much information as
@ -402,34 +422,40 @@ class TrueTypeFont:
            pass
        return

-    def create_unicode_map(self):
+    def create_unicode_map(self) -> FileUnicodeMap:
        if b'cmap' not in self.tables:
            raise TrueTypeFont.CMapNotFound
        (base_offset, length) = self.tables[b'cmap']
        fp = self.fp
        fp.seek(base_offset)
-        (version, nsubtables) = struct.unpack('>HH', fp.read(4))
-        subtables = []
+        (version, nsubtables) = \
+            cast(Tuple[int, int], struct.unpack('>HH', fp.read(4)))
+        subtables: List[Tuple[int, int, int]] = []
        for i in range(nsubtables):
-            subtables.append(struct.unpack('>HHL', fp.read(8)))
-        char2gid = {}
+            subtables.append(
+                cast(Tuple[int, int, int], struct.unpack('>HHL', fp.read(8))))
+        char2gid: Dict[int, int] = {}
        # Only supports subtable type 0, 2 and 4.
        for (_1, _2, st_offset) in subtables:
            fp.seek(base_offset+st_offset)
-            (fmttype, fmtlen, fmtlang) = struct.unpack('>HHH', fp.read(6))
+            (fmttype, fmtlen, fmtlang) = \
+                cast(Tuple[int, int, int], struct.unpack('>HHH', fp.read(6)))
            if fmttype == 0:
-                char2gid.update(enumerate(struct.unpack('>256B',
-                                                        fp.read(256))))
+                char2gid.update(enumerate(
+                    cast(Tuple[int, ...],
+                         struct.unpack('>256B', fp.read(256)))))
            elif fmttype == 2:
-                subheaderkeys = struct.unpack('>256H', fp.read(512))
+                subheaderkeys = cast(Tuple[int, ...],
+                                     struct.unpack('>256H', fp.read(512)))
                firstbytes = [0]*8192
                for (i, k) in enumerate(subheaderkeys):
                    firstbytes[k//8] = i
                nhdrs = max(subheaderkeys)//8 + 1
-                hdrs = []
+                hdrs: List[Tuple[int, int, int, int, int]] = []
                for i in range(nhdrs):
                    (firstcode, entcount, delta, offset) = \
-                        struct.unpack('>HHhH', fp.read(8))
+                        cast(Tuple[int, int, int, int],
+                             struct.unpack('>HHhH', fp.read(8)))
                    hdrs.append((i, firstcode, entcount, delta,
                                 fp.tell()-2+offset))
                for (i, firstcode, entcount, delta, pos) in hdrs:
@ -438,24 +464,36 @@ class TrueTypeFont:
                    first = firstcode + (firstbytes[i] << 8)
                    fp.seek(pos)
                    for c in range(entcount):
-                        gid = struct.unpack('>H', fp.read(2))
+                        gid = cast(Tuple[int],
+                                   struct.unpack('>H', fp.read(2)))[0]
                        if gid:
                            gid += delta
                        char2gid[first+c] = gid
            elif fmttype == 4:
-                (segcount, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
+                (segcount, _1, _2, _3) = \
+                    cast(Tuple[int, int, int, int],
+                         struct.unpack('>HHHH', fp.read(8)))
                segcount //= 2
-                ecs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
+                ecs = cast(Tuple[int, ...],
+                           struct.unpack('>%dH' % segcount,
+                                         fp.read(2*segcount)))
                fp.read(2)
-                scs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
-                idds = struct.unpack('>%dh' % segcount, fp.read(2*segcount))
+                scs = cast(Tuple[int, ...],
+                           struct.unpack('>%dH' % segcount,
+                                         fp.read(2*segcount)))
+                idds = cast(Tuple[int, ...],
+                            struct.unpack('>%dh' % segcount,
+                                          fp.read(2*segcount)))
                pos = fp.tell()
-                idrs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
+                idrs = cast(Tuple[int, ...],
+                            struct.unpack('>%dH' % segcount,
+                                          fp.read(2*segcount)))
                for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs):
                    if idr:
                        fp.seek(pos+idr)
                        for c in range(sc, ec+1):
-                            b = struct.unpack('>H', fp.read(2))[0]
+                            b = cast(Tuple[int],
+                                     struct.unpack('>H', fp.read(2)))[0]
                            char2gid[c] = (b + idd) & 0xffff
                    else:
                        for c in range(sc, ec+1):
@ -480,12 +518,21 @@ class PDFUnicodeNotDefined(PDFFontError):
 LITERAL_STANDARD_ENCODING = LIT('StandardEncoding')
 LITERAL_TYPE1C = LIT('Type1C')

+# Font widths are maintained in a dict type that maps from *either* unicode
+# chars or integer character IDs.
+FontWidthDict = Union[Dict[int, float], Dict[str, float]]
+

 class PDFFont:

-    def __init__(self, descriptor, widths, default_width=None):
+    def __init__(
+        self,
+        descriptor: Mapping[str, Any],
+        widths: FontWidthDict,
+        default_width: Optional[float] = None
+    ) -> None:
        self.descriptor = descriptor
-        self.widths = resolve_all(widths)
+        self.widths: FontWidthDict = resolve_all(widths)
        self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
        if isinstance(self.fontname, PSLiteral):
            self.fontname = literal_name(self.fontname)
@ -498,8 +545,8 @@ class PDFFont:
        else:
            self.default_width = default_width
        self.leading = num_value(descriptor.get('Leading', 0))
-        self.bbox = list_value(resolve_all(descriptor.get('FontBBox',
-                                                          (0, 0, 0, 0))))
+        self.bbox = cast(Rect, list_value(
+            resolve_all(descriptor.get('FontBBox', (0, 0, 0, 0)))))
        self.hscale = self.vscale = .001

        # PDF RM 9.8.1 specifies /Descent should always be a negative number.
@ -510,57 +557,72 @@ class PDFFont:
            self.descent = -self.descent
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFFont>'

-    def is_vertical(self):
+    def is_vertical(self) -> bool:
        return False

-    def is_multibyte(self):
+    def is_multibyte(self) -> bool:
        return False

-    def decode(self, bytes):
+    def decode(self, bytes: bytes) -> Iterable[int]:
        return bytearray(bytes)  # map(ord, bytes)

-    def get_ascent(self):
+    def get_ascent(self) -> float:
        """Ascent above the baseline, in text space units"""
        return self.ascent * self.vscale

-    def get_descent(self):
+    def get_descent(self) -> float:
        """Descent below the baseline, in text space units; always negative"""
        return self.descent * self.vscale

-    def get_width(self):
+    def get_width(self) -> float:
        w = self.bbox[2]-self.bbox[0]
        if w == 0:
            w = -self.default_width
        return w * self.hscale

-    def get_height(self):
+    def get_height(self) -> float:
        h = self.bbox[3]-self.bbox[1]
        if h == 0:
            h = self.ascent - self.descent
        return h * self.vscale

-    def char_width(self, cid):
+    def char_width(self, cid: int) -> float:
+        # Because character widths may be mapping either IDs or strings,
+        # we try to lookup the character ID first, then its str equivalent.
        try:
-            return self.widths[cid] * self.hscale
+            return cast(Dict[int, float], self.widths)[cid] * self.hscale
        except KeyError:
+            str_widths = cast(Dict[str, float], self.widths)
            try:
-                return self.widths[self.to_unichr(cid)] * self.hscale
+                return str_widths[self.to_unichr(cid)] * self.hscale
            except (KeyError, PDFUnicodeNotDefined):
                return self.default_width * self.hscale

-    def char_disp(self, cid):
+    def char_disp(
+        self,
+        cid: int
+    ) -> Union[float, Tuple[Optional[float], float]]:
+        "Returns an integer for horizontal fonts, a tuple for vertical fonts."
        return 0

-    def string_width(self, s):
+    def string_width(self, s: bytes) -> float:
        return sum(self.char_width(cid) for cid in self.decode(s))

+    def to_unichr(self, cid: int) -> str:
+        raise NotImplementedError
+

 class PDFSimpleFont(PDFFont):

-    def __init__(self, descriptor, widths, spec):
+    def __init__(
+        self,
+        descriptor: Mapping[str, Any],
+        widths: FontWidthDict,
+        spec: Mapping[str, Any]
+    ) -> None:
        # Font encoding is specified either by a name of
        # built-in encoding or a dictionary that describes
        # the differences.
@ -575,7 +637,7 @@ class PDFSimpleFont(PDFFont):
            self.cid2unicode = EncodingDB.get_encoding(name, diff)
        else:
            self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
-        self.unicode_map = None
+        self.unicode_map: Optional[UnicodeMap] = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
@ -583,7 +645,7 @@ class PDFSimpleFont(PDFFont):
        PDFFont.__init__(self, descriptor, widths)
        return

-    def to_unichr(self, cid):
+    def to_unichr(self, cid: int) -> str:
        if self.unicode_map:
            try:
                return self.unicode_map.get_unichr(cid)
@ -597,21 +659,28 @@ class PDFSimpleFont(PDFFont):

 class PDFType1Font(PDFSimpleFont):

-    def __init__(self, rsrcmgr, spec):
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        spec: Mapping[str, Any]
+    ) -> None:
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if settings.STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
+
+        widths: FontWidthDict
        try:
-            (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
+            (descriptor, int_widths) = FontMetricsDB.get_metrics(self.basefont)
+            widths = cast(Dict[str, float], int_widths)  # implicit int->float
        except KeyError:
            descriptor = dict_value(spec.get('FontDescriptor', {}))
            firstchar = int_value(spec.get('FirstChar', 0))
            # lastchar = int_value(spec.get('LastChar', 255))
-            widths = list_value(spec.get('Widths', [0]*256))
-            widths = {i+firstchar: w for (i, w) in enumerate(widths)}
+            width_list = list_value(spec.get('Widths', [0]*256))
+            widths = {i+firstchar: w for (i, w) in enumerate(width_list)}
        PDFSimpleFont.__init__(self, descriptor, widths, spec)
        if 'Encoding' not in spec and 'FontFile' in descriptor:
            # try to recover the missing encoding info from the font file.
@ -622,41 +691,51 @@ class PDFType1Font(PDFSimpleFont):
            self.cid2unicode = parser.get_encoding()
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFType1Font: basefont=%r>' % self.basefont


 class PDFTrueTypeFont(PDFType1Font):

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFTrueTypeFont: basefont=%r>' % self.basefont


 class PDFType3Font(PDFSimpleFont):

-    def __init__(self, rsrcmgr, spec):
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        spec: Mapping[str, Any]
+    ) -> None:
        firstchar = int_value(spec.get('FirstChar', 0))
        # lastchar = int_value(spec.get('LastChar', 0))
-        widths = list_value(spec.get('Widths', [0]*256))
-        widths = {i+firstchar: w for (i, w) in enumerate(widths)}
+        width_list = list_value(spec.get('Widths', [0]*256))
+        widths = {i+firstchar: w for (i, w) in enumerate(width_list)}
        if 'FontDescriptor' in spec:
            descriptor = dict_value(spec['FontDescriptor'])
        else:
            descriptor = {'Ascent': 0, 'Descent': 0,
                          'FontBBox': spec['FontBBox']}
        PDFSimpleFont.__init__(self, descriptor, widths, spec)
-        self.matrix = tuple(list_value(spec.get('FontMatrix')))
+        self.matrix = cast(Matrix, tuple(list_value(spec.get('FontMatrix'))))
        (_, self.descent, _, self.ascent) = self.bbox
        (self.hscale, self.vscale) = apply_matrix_norm(self.matrix, (1, 1))
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFType3Font>'


 class PDFCIDFont(PDFFont):
+    default_disp: Union[float, Tuple[Optional[float], float]]

-    def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
+    def __init__(
+        self,
+        rsrcmgr: "PDFResourceManager",
+        spec: Mapping[str, Any],
+        strict: bool = settings.STRICT
+    ) -> None:
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
@ -669,7 +748,7 @@ class PDFCIDFont(PDFFont):
        cid_ordering = resolve1(
            self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1")
        self.cidcoding = '{}-{}'.format(cid_registry, cid_ordering)
-        self.cmap = self.get_cmap_from_spec(spec, strict)
+        self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict)

        try:
            descriptor = dict_value(spec['FontDescriptor'])
@ -682,7 +761,7 @@ class PDFCIDFont(PDFFont):
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               BytesIO(self.fontfile.get_data()))
-        self.unicode_map = None
+        self.unicode_map: Optional[UnicodeMap] = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
@ -703,12 +782,12 @@ class PDFCIDFont(PDFFont):
        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
-            widths = get_widths2(list_value(spec.get('W2', [])))
+            widths2 = get_widths2(list_value(spec.get('W2', [])))
            self.disps = {cid: (vx, vy)
-                          for (cid, (_, (vx, vy))) in widths.items()}
+                          for (cid, (_, (vx, vy))) in widths2.items()}
            (vy, w) = resolve1(spec.get('DW2', [880, -1000]))
            self.default_disp = (None, vy)
-            widths = {cid: w for (cid, (w, _)) in widths.items()}
+            widths = {cid: w for (cid, (w, _)) in widths2.items()}
            default_width = w
        else:
            # writing mode: horizontal
@ -719,7 +798,11 @@ class PDFCIDFont(PDFFont):
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return

-    def get_cmap_from_spec(self, spec, strict):
+    def get_cmap_from_spec(
+        self,
+        spec: Mapping[str, Any],
+        strict: bool
+    ) -> CMapBase:
        """Get cmap from font specification

        For certain PDFs, Encoding Type isn't mentioned as an attribute of
@ -738,7 +821,7 @@ class PDFCIDFont(PDFFont):
            return CMap()

    @staticmethod
-    def _get_cmap_name(spec, strict):
+    def _get_cmap_name(spec: Mapping[str, Any], strict: bool) -> str:
        """Get cmap name from font specification"""
        cmap_name = 'unknown'  # default value

@ -752,34 +835,37 @@ class PDFCIDFont(PDFFont):
            if strict:
                raise PDFFontError('Encoding is unspecified')

-        if type(cmap_name) is PDFStream:
-            if 'CMapName' in cmap_name:
-                cmap_name = cmap_name.get('CMapName').name
+        if type(cmap_name) is PDFStream:  # type: ignore[comparison-overlap]
+            cmap_name_stream: PDFStream = cast(PDFStream, cmap_name)
+            if 'CMapName' in cmap_name_stream:
+                cmap_name = cmap_name_stream.get('CMapName').name
            else:
                if strict:
                    raise PDFFontError('CMapName unspecified for encoding')

-        cmap_name = IDENTITY_ENCODER.get(cmap_name, cmap_name)
-        return cmap_name
+        return IDENTITY_ENCODER.get(cmap_name, cmap_name)

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFCIDFont: basefont={!r}, cidcoding={!r}>'\
            .format(self.basefont, self.cidcoding)

-    def is_vertical(self):
+    def is_vertical(self) -> bool:
        return self.vertical

-    def is_multibyte(self):
+    def is_multibyte(self) -> bool:
        return True

-    def decode(self, bytes):
+    def decode(self, bytes: bytes) -> Iterable[int]:
        return self.cmap.decode(bytes)

-    def char_disp(self, cid):
+    def char_disp(
+        self,
+        cid: int
+    ) -> Union[float, Tuple[Optional[float], float]]:
        "Returns an integer for horizontal fonts, a tuple for vertical fonts."
        return self.disps.get(cid, self.default_disp)

-    def to_unichr(self, cid):
+    def to_unichr(self, cid: int) -> str:
        try:
            if not self.unicode_map:
                raise KeyError(cid)
@ -788,7 +874,7 @@ class PDFCIDFont(PDFFont):
            raise PDFUnicodeNotDefined(self.cidcoding, cid)


-def main(argv):
+def main(argv: List[str]) -> None:
    for fname in argv[1:]:
        fp = open(fname, 'rb')
        font = CFFFont(fname, fp)
@ -798,4 +884,4 @@ def main(argv):


 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    main(sys.argv)
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -1,9 +1,12 @@
 import re
 import logging
+from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast
 from io import BytesIO
 from .cmapdb import CMapDB
 from .cmapdb import CMap
-from .psparser import PSTypeError
+from .cmapdb import CMapBase
+from .psparser import PSLiteral, PSTypeError
+from .psparser import PSStackType
 from .psparser import PSEOF
 from .psparser import PSKeyword
 from .psparser import literal_name
@ -12,6 +15,9 @@ from .psparser import PSStackParser
 from .psparser import LIT
 from .psparser import KWD
 from . import settings
+from .pdfdevice import PDFDevice
+from .pdfdevice import PDFTextSeq
+from .pdfpage import PDFPage
 from .pdftypes import PDFException
 from .pdftypes import PDFStream
 from .pdftypes import PDFObjRef
@ -19,6 +25,7 @@ from .pdftypes import resolve1
 from .pdftypes import list_value
 from .pdftypes import dict_value
 from .pdftypes import stream_value
+from .pdffont import PDFFont
 from .pdffont import PDFFontError
 from .pdffont import PDFType1Font
 from .pdffont import PDFTrueTypeFont
@ -26,6 +33,7 @@ from .pdffont import PDFType3Font
 from .pdffont import PDFCIDFont
 from .pdfcolor import PDFColorSpace
 from .pdfcolor import PREDEFINED_COLORSPACE
+from .utils import Matrix, Point, PathSegment, Rect
 from .utils import choplist
 from .utils import mult_matrix
 from .utils import MATRIX_IDENTITY
@ -50,22 +58,24 @@ LITERAL_IMAGE = LIT('Image')


 class PDFTextState:
+    matrix: Matrix
+    linematrix: Point

-    def __init__(self):
-        self.font = None
-        self.fontsize = 0
-        self.charspace = 0
-        self.wordspace = 0
-        self.scaling = 100
-        self.leading = 0
-        self.render = 0
-        self.rise = 0
+    def __init__(self) -> None:
+        self.font: Optional[PDFFont] = None
+        self.fontsize: float = 0
+        self.charspace: float = 0
+        self.wordspace: float = 0
+        self.scaling: float = 100
+        self.leading: float = 0
+        self.render: int = 0
+        self.rise: float = 0
        self.reset()
        # self.matrix is set
        # self.linematrix is set
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFTextState: font=%r, fontsize=%r, charspace=%r, ' \
               'wordspace=%r, scaling=%r, leading=%r, render=%r, rise=%r, ' \
               'matrix=%r, linematrix=%r>' \
@ -73,7 +83,7 @@ class PDFTextState:
                  self.scaling, self.leading, self.render, self.rise,
                  self.matrix, self.linematrix)

-    def copy(self):
+    def copy(self) -> "PDFTextState":
        obj = PDFTextState()
        obj.font = self.font
        obj.fontsize = self.fontsize
@ -87,31 +97,37 @@ class PDFTextState:
        obj.linematrix = self.linematrix
        return obj

-    def reset(self):
+    def reset(self) -> None:
        self.matrix = MATRIX_IDENTITY
        self.linematrix = (0, 0)
        return


+Color = Union[
+    float,                              # Greyscale
+    Tuple[float, float, float],         # R, G, B
+    Tuple[float, float, float, float]]  # C, M, Y, K
+
+
 class PDFGraphicState:

-    def __init__(self):
-        self.linewidth = 0
-        self.linecap = None
-        self.linejoin = None
-        self.miterlimit = None
-        self.dash = None
-        self.intent = None
-        self.flatness = None
+    def __init__(self) -> None:
+        self.linewidth: float = 0
+        self.linecap: Optional[object] = None
+        self.linejoin: Optional[object] = None
+        self.miterlimit: Optional[object] = None
+        self.dash: Optional[Tuple[object, object]] = None
+        self.intent: Optional[object] = None
+        self.flatness: Optional[object] = None

        # stroking color
-        self.scolor = None
+        self.scolor: Optional[Color] = None

        # non stroking color
-        self.ncolor = None
+        self.ncolor: Optional[Color] = None
        return

-    def copy(self):
+    def copy(self) -> "PDFGraphicState":
        obj = PDFGraphicState()
        obj.linewidth = self.linewidth
        obj.linecap = self.linecap
@ -124,7 +140,7 @@ class PDFGraphicState:
        obj.ncolor = self.ncolor
        return obj

-    def __repr__(self):
+    def __repr__(self) -> str:
        return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
                ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
                ' stroking color=%r, non stroking color=%r>' %
@ -141,12 +157,12 @@ class PDFResourceManager:
    allocated multiple times.
    """

-    def __init__(self, caching=True):
+    def __init__(self, caching: bool = True) -> None:
        self.caching = caching
-        self._cached_fonts = {}
+        self._cached_fonts: Dict[object, PDFFont] = {}
        return

-    def get_procset(self, procs):
+    def get_procset(self, procs: Sequence[object]) -> None:
        for proc in procs:
            if proc is LITERAL_PDF:
                pass
@ -156,7 +172,7 @@ class PDFResourceManager:
                pass
        return

-    def get_cmap(self, cmapname, strict=False):
+    def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase:
        try:
            return CMapDB.get_cmap(cmapname)
        except CMapDB.CMapNotFound:
@ -164,7 +180,7 @@ class PDFResourceManager:
                raise
            return CMap()

-    def get_font(self, objid, spec):
+    def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont:
        if objid and objid in self._cached_fonts:
            font = self._cached_fonts[objid]
        else:
@ -209,15 +225,18 @@ class PDFResourceManager:
        return font


-class PDFContentParser(PSStackParser):
+class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):

-    def __init__(self, streams):
+    def __init__(self, streams: Sequence[object]) -> None:
        self.streams = streams
        self.istream = 0
-        PSStackParser.__init__(self, None)
+        # PSStackParser.__init__(fp=None) is safe only because we've overloaded
+        # all the methods that would attempt to access self.fp without first
+        # calling self.fillfp().
+        PSStackParser.__init__(self, None)  # type: ignore[arg-type]
        return

-    def fillfp(self):
+    def fillfp(self) -> None:
        if not self.fp:
            if self.istream < len(self.streams):
                strm = stream_value(self.streams[self.istream])
@ -227,12 +246,12 @@ class PDFContentParser(PSStackParser):
            self.fp = BytesIO(strm.get_data())
        return

-    def seek(self, pos):
+    def seek(self, pos: int) -> None:
        self.fillfp()
        PSStackParser.seek(self, pos)
        return

-    def fillbuf(self):
+    def fillbuf(self) -> None:
        if self.charpos < len(self.buf):
            return
        while 1:
@ -241,19 +260,23 @@ class PDFContentParser(PSStackParser):
            self.buf = self.fp.read(self.BUFSIZ)
            if self.buf:
                break
-            self.fp = None
+            self.fp = None  # type: ignore[assignment]
        self.charpos = 0
        return

-    def get_inline_data(self, pos, target=b'EI'):
+    def get_inline_data(
+        self,
+        pos: int,
+        target: bytes = b'EI'
+    ) -> Tuple[int, bytes]:
        self.seek(pos)
        i = 0
        data = b''
        while i <= len(target):
            self.fillbuf()
            if i:
-                c = self.buf[self.charpos]
-                c = bytes((c,))
+                ci = self.buf[self.charpos]
+                c = bytes((ci,))
                data += c
                self.charpos += 1
                if len(target) <= i and c.isspace():
@ -275,7 +298,7 @@ class PDFContentParser(PSStackParser):
        data = re.sub(br'(\x0d\x0a|[\x0d\x0a])$', b'', data)
        return (pos, data)

-    def flush(self):
+    def flush(self) -> None:
        self.add_results(*self.popall())
        return

@ -283,7 +306,7 @@ class PDFContentParser(PSStackParser):
    KEYWORD_ID = KWD(b'ID')
    KEYWORD_EI = KWD(b'EI')

-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        if token is self.KEYWORD_BI:
            # inline image within a content stream
            self.start_type(pos, 'inline')
@ -307,30 +330,34 @@ class PDFContentParser(PSStackParser):
        return


+PDFStackT = PSStackType[PDFStream]
+"""Types that may appear on the PDF argument stack."""
+
+
 class PDFPageInterpreter:
    """Processor for the content of a PDF page

    Reference: PDF Reference, Appendix A, Operator Summary
    """

-    def __init__(self, rsrcmgr, device):
+    def __init__(self, rsrcmgr: PDFResourceManager, device: PDFDevice) -> None:
        self.rsrcmgr = rsrcmgr
        self.device = device
        return

-    def dup(self):
+    def dup(self) -> "PDFPageInterpreter":
        return self.__class__(self.rsrcmgr, self.device)

-    def init_resources(self, resources):
+    def init_resources(self, resources: Dict[object, object]) -> None:
        """Prepare the fonts and XObjects listed in the Resource attribute."""
        self.resources = resources
-        self.fontmap = {}
+        self.fontmap: Dict[object, PDFFont] = {}
        self.xobjmap = {}
-        self.csmap = PREDEFINED_COLORSPACE.copy()
+        self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy()
        if not resources:
            return

-        def get_colorspace(spec):
+        def get_colorspace(spec: object) -> Optional[PDFColorSpace]:
            if isinstance(spec, list):
                name = literal_name(spec[0])
            else:
@ -343,6 +370,7 @@ class PDFPageInterpreter:
                return PDFColorSpace(name, len(list_value(spec[1])))
            else:
                return PREDEFINED_COLORSPACE.get(name)
+
        for (k, v) in dict_value(resources).items():
            log.debug('Resource: %r: %r', k, v)
            if k == 'Font':
@ -354,7 +382,9 @@ class PDFPageInterpreter:
                    self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
            elif k == 'ColorSpace':
                for (csid, spec) in dict_value(v).items():
-                    self.csmap[csid] = get_colorspace(resolve1(spec))
+                    colorspace = get_colorspace(resolve1(spec))
+                    if colorspace is not None:
+                        self.csmap[csid] = colorspace
            elif k == 'ProcSet':
                self.rsrcmgr.get_procset(list_value(v))
            elif k == 'XObject':
@ -362,130 +392,180 @@ class PDFPageInterpreter:
                    self.xobjmap[xobjid] = xobjstrm
        return

-    def init_state(self, ctm):
+    def init_state(self, ctm: Matrix) -> None:
        """Initialize the text and graphic states for rendering a page."""
-        self.gstack = []  # stack for graphical states.
+        # gstack: stack for graphical states.
+        self.gstack: List[Tuple[Matrix, PDFTextState, PDFGraphicState]] = []
        self.ctm = ctm
        self.device.set_ctm(self.ctm)
        self.textstate = PDFTextState()
        self.graphicstate = PDFGraphicState()
-        self.curpath = []
+        self.curpath: List[PathSegment] = []
        # argstack: stack for command arguments.
-        self.argstack = []
+        self.argstack: List[PDFStackT] = []
        # set some global states.
-        self.scs = self.ncs = None
+        self.scs: Optional[PDFColorSpace] = None
+        self.ncs: Optional[PDFColorSpace] = None
        if self.csmap:
            self.scs = self.ncs = next(iter(self.csmap.values()))
        return

-    def push(self, obj):
+    def push(self, obj: PDFStackT) -> None:
        self.argstack.append(obj)
        return

-    def pop(self, n):
+    def pop(self, n: int) -> List[PDFStackT]:
        if n == 0:
            return []
        x = self.argstack[-n:]
        self.argstack = self.argstack[:-n]
        return x

-    def get_current_state(self):
+    def get_current_state(
+        self
+    ) -> Tuple[Matrix, PDFTextState, PDFGraphicState]:
        return (self.ctm, self.textstate.copy(), self.graphicstate.copy())

-    def set_current_state(self, state):
+    def set_current_state(
+        self,
+        state: Tuple[Matrix, PDFTextState, PDFGraphicState]
+    ) -> None:
        (self.ctm, self.textstate, self.graphicstate) = state
        self.device.set_ctm(self.ctm)
        return

-    def do_q(self):
+    def do_q(self) -> None:
        """Save graphics state"""
        self.gstack.append(self.get_current_state())
        return

-    def do_Q(self):
+    def do_Q(self) -> None:
        """Restore graphics state"""
        if self.gstack:
            self.set_current_state(self.gstack.pop())
        return

-    def do_cm(self, a1, b1, c1, d1, e1, f1):
+    def do_cm(
+        self,
+        a1: PDFStackT,
+        b1: PDFStackT,
+        c1: PDFStackT,
+        d1: PDFStackT,
+        e1: PDFStackT,
+        f1: PDFStackT
+    ) -> None:
        """Concatenate matrix to current transformation matrix"""
-        self.ctm = mult_matrix((a1, b1, c1, d1, e1, f1), self.ctm)
+        self.ctm = \
+            mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm)
        self.device.set_ctm(self.ctm)
        return

-    def do_w(self, linewidth):
+    def do_w(self, linewidth: PDFStackT) -> None:
        """Set line width"""
-        self.graphicstate.linewidth = linewidth
+        self.graphicstate.linewidth = cast(float, linewidth)
        return

-    def do_J(self, linecap):
+    def do_J(self, linecap: PDFStackT) -> None:
        """Set line cap style"""
        self.graphicstate.linecap = linecap
        return

-    def do_j(self, linejoin):
+    def do_j(self, linejoin: PDFStackT) -> None:
        """Set line join style"""
        self.graphicstate.linejoin = linejoin
        return

-    def do_M(self, miterlimit):
+    def do_M(self, miterlimit: PDFStackT) -> None:
        """Set miter limit"""
        self.graphicstate.miterlimit = miterlimit
        return

-    def do_d(self, dash, phase):
+    def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None:
        """Set line dash pattern"""
        self.graphicstate.dash = (dash, phase)
        return

-    def do_ri(self, intent):
+    def do_ri(self, intent: PDFStackT) -> None:
        """Set color rendering intent"""
        self.graphicstate.intent = intent
        return

-    def do_i(self, flatness):
+    def do_i(self, flatness: PDFStackT) -> None:
        """Set flatness tolerance"""
        self.graphicstate.flatness = flatness
        return

-    def do_gs(self, name):
+    def do_gs(self, name: PDFStackT) -> None:
        """Set parameters from graphics state parameter dictionary"""
        # todo
        return

-    def do_m(self, x, y):
+    def do_m(self, x: PDFStackT, y: PDFStackT) -> None:
        """Begin new subpath"""
-        self.curpath.append(('m', x, y))
+        self.curpath.append(('m', cast(float, x), cast(float, y)))
        return

-    def do_l(self, x, y):
+    def do_l(self, x: PDFStackT, y: PDFStackT) -> None:
        """Append straight line segment to path"""
-        self.curpath.append(('l', x, y))
+        self.curpath.append(('l', cast(float, x), cast(float, y)))
        return

-    def do_c(self, x1, y1, x2, y2, x3, y3):
+    def do_c(
+        self,
+        x1: PDFStackT,
+        y1: PDFStackT,
+        x2: PDFStackT,
+        y2: PDFStackT,
+        x3: PDFStackT,
+        y3: PDFStackT
+    ) -> None:
        """Append curved segment to path (three control points)"""
-        self.curpath.append(('c', x1, y1, x2, y2, x3, y3))
+        self.curpath.append(('c', cast(float, x1), cast(float, y1),
+                             cast(float, x2), cast(float, y2),
+                             cast(float, x3), cast(float, y3)))
        return

-    def do_v(self, x2, y2, x3, y3):
+    def do_v(
+        self,
+        x2: PDFStackT,
+        y2: PDFStackT,
+        x3: PDFStackT,
+        y3: PDFStackT
+    ) -> None:
        """Append curved segment to path (initial point replicated)"""
-        self.curpath.append(('v', x2, y2, x3, y3))
+        self.curpath.append(('v', cast(float, x2), cast(float, y2),
+                             cast(float, x3), cast(float, y3)))
        return

-    def do_y(self, x1, y1, x3, y3):
+    def do_y(
+        self,
+        x1: PDFStackT,
+        y1: PDFStackT,
+        x3: PDFStackT,
+        y3: PDFStackT
+    ) -> None:
        """Append curved segment to path (final point replicated)"""
-        self.curpath.append(('y', x1, y1, x3, y3))
+        self.curpath.append(('y', cast(float, x1), cast(float, y1),
+                             cast(float, x3), cast(float, y3)))
        return

-    def do_h(self):
+    def do_h(self) -> None:
        """Close subpath"""
        self.curpath.append(('h',))
        return

-    def do_re(self, x, y, w, h):
+    def do_re(
+        self,
+        x: PDFStackT,
+        y: PDFStackT,
+        w: PDFStackT,
+        h: PDFStackT
+    ) -> None:
        """Append rectangle to path"""
+        x = cast(float, x)
+        y = cast(float, y)
+        w = cast(float, w)
+        h = cast(float, h)
        self.curpath.append(('m', x, y))
        self.curpath.append(('l', x+w, y))
        self.curpath.append(('l', x+w, y+h))
@ -493,77 +573,77 @@ class PDFPageInterpreter:
        self.curpath.append(('h',))
        return

-    def do_S(self):
+    def do_S(self) -> None:
        """Stroke path"""
        self.device.paint_path(self.graphicstate, True, False, False,
                               self.curpath)
        self.curpath = []
        return

-    def do_s(self):
+    def do_s(self) -> None:
        """Close and stroke path"""
        self.do_h()
        self.do_S()
        return

-    def do_f(self):
+    def do_f(self) -> None:
        """Fill path using nonzero winding number rule"""
        self.device.paint_path(self.graphicstate, False, True, False,
                               self.curpath)
        self.curpath = []
        return

-    def do_F(self):
+    def do_F(self) -> None:
        """Fill path using nonzero winding number rule (obsolete)"""
        return self.do_f()

-    def do_f_a(self):
+    def do_f_a(self) -> None:
        """Fill path using even-odd rule"""
        self.device.paint_path(self.graphicstate, False, True, True,
                               self.curpath)
        self.curpath = []
        return

-    def do_B(self):
+    def do_B(self) -> None:
        """Fill and stroke path using nonzero winding number rule"""
        self.device.paint_path(self.graphicstate, True, True, False,
                               self.curpath)
        self.curpath = []
        return

-    def do_B_a(self):
+    def do_B_a(self) -> None:
        """Fill and stroke path using even-odd rule"""
        self.device.paint_path(self.graphicstate, True, True, True,
                               self.curpath)
        self.curpath = []
        return

-    def do_b(self):
+    def do_b(self) -> None:
        """Close, fill, and stroke path using nonzero winding number rule"""
        self.do_h()
        self.do_B()
        return

-    def do_b_a(self):
+    def do_b_a(self) -> None:
        """Close, fill, and stroke path using even-odd rule"""
        self.do_h()
        self.do_B_a()
        return

-    def do_n(self):
+    def do_n(self) -> None:
        """End path without filling or stroking"""
        self.curpath = []
        return

-    def do_W(self):
+    def do_W(self) -> None:
        """Set clipping path using nonzero winding number rule"""
        return

-    def do_W_a(self):
+    def do_W_a(self) -> None:
        """Set clipping path using even-odd rule"""
        return

-    def do_CS(self, name):
+    def do_CS(self, name: PDFStackT) -> None:
        """Set color space for stroking operations

        Introduced in PDF 1.1
@ -575,7 +655,7 @@ class PDFPageInterpreter:
                raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
        return

-    def do_cs(self, name):
+    def do_cs(self, name: PDFStackT) -> None:
        """Set color space for nonstroking operations"""
        try:
            self.ncs = self.csmap[literal_name(name)]
@ -584,37 +664,53 @@ class PDFPageInterpreter:
                raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
        return

-    def do_G(self, gray):
+    def do_G(self, gray: PDFStackT) -> None:
        """Set gray level for stroking operations"""
-        self.graphicstate.scolor = gray
+        self.graphicstate.scolor = cast(float, gray)
        return

-    def do_g(self, gray):
+    def do_g(self, gray: PDFStackT) -> None:
        """Set gray level for nonstroking operations"""
-        self.graphicstate.ncolor = gray
+        self.graphicstate.ncolor = cast(float, gray)
        return

-    def do_RG(self, r, g, b):
+    def do_RG(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
        """Set RGB color for stroking operations"""
-        self.graphicstate.scolor = (r, g, b)
+        self.graphicstate.scolor = \
+            (cast(float, r), cast(float, g), cast(float, b))
        return

-    def do_rg(self, r, g, b):
+    def do_rg(self, r: PDFStackT, g: PDFStackT, b: PDFStackT) -> None:
        """Set RGB color for nonstroking operations"""
-        self.graphicstate.ncolor = (r, g, b)
+        self.graphicstate.ncolor = \
+            (cast(float, r), cast(float, g), cast(float, b))
        return

-    def do_K(self, c, m, y, k):
+    def do_K(
+        self,
+        c: PDFStackT,
+        m: PDFStackT,
+        y: PDFStackT,
+        k: PDFStackT
+    ) -> None:
        """Set CMYK color for stroking operations"""
-        self.graphicstate.scolor = (c, m, y, k)
+        self.graphicstate.scolor = \
+            (cast(float, c), cast(float, m), cast(float, y), cast(float, k))
        return

-    def do_k(self, c, m, y, k):
+    def do_k(
+        self,
+        c: PDFStackT,
+        m: PDFStackT,
+        y: PDFStackT,
+        k: PDFStackT
+    ) -> None:
        """Set CMYK color for nonstroking operations"""
-        self.graphicstate.ncolor = (c, m, y, k)
+        self.graphicstate.ncolor = \
+            (cast(float, c), cast(float, m), cast(float, y), cast(float, k))
        return

-    def do_SCN(self):
+    def do_SCN(self) -> None:
        """Set color for stroking operations."""
        if self.scs:
            n = self.scs.ncomponents
@ -622,10 +718,10 @@ class PDFPageInterpreter:
            if settings.STRICT:
                raise PDFInterpreterError('No colorspace specified!')
            n = 1
-        self.graphicstate.scolor = self.pop(n)
+        self.graphicstate.scolor = cast(Color, self.pop(n))
        return

-    def do_scn(self):
+    def do_scn(self) -> None:
        """Set color for nonstroking operations"""
        if self.ncs:
            n = self.ncs.ncomponents
@ -633,24 +729,24 @@ class PDFPageInterpreter:
            if settings.STRICT:
                raise PDFInterpreterError('No colorspace specified!')
            n = 1
-        self.graphicstate.ncolor = self.pop(n)
+        self.graphicstate.ncolor = cast(Color, self.pop(n))
        return

-    def do_SC(self):
+    def do_SC(self) -> None:
        """Set color for stroking operations"""
        self.do_SCN()
        return

-    def do_sc(self):
+    def do_sc(self) -> None:
        """Set color for nonstroking operations"""
        self.do_scn()
        return

-    def do_sh(self, name):
+    def do_sh(self, name: object) -> None:
        """Paint area defined by shading pattern"""
        return

-    def do_BT(self):
+    def do_BT(self) -> None:
        """Begin text object

        Initializing the text matrix, Tm, and the text line matrix, Tlm, to
@ -660,82 +756,82 @@ class PDFPageInterpreter:
        self.textstate.reset()
        return

-    def do_ET(self):
+    def do_ET(self) -> None:
        """End a text object"""
        return

-    def do_BX(self):
+    def do_BX(self) -> None:
        """Begin compatibility section"""
        return

-    def do_EX(self):
+    def do_EX(self) -> None:
        """End compatibility section"""
        return

-    def do_MP(self, tag):
+    def do_MP(self, tag: PDFStackT) -> None:
        """Define marked-content point"""
-        self.device.do_tag(tag)
+        self.device.do_tag(cast(PSLiteral, tag))
        return

-    def do_DP(self, tag, props):
+    def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None:
        """Define marked-content point with property list"""
-        self.device.do_tag(tag, props)
+        self.device.do_tag(cast(PSLiteral, tag), props)
        return

-    def do_BMC(self, tag):
+    def do_BMC(self, tag: PDFStackT) -> None:
        """Begin marked-content sequence"""
-        self.device.begin_tag(tag)
+        self.device.begin_tag(cast(PSLiteral, tag))
        return

-    def do_BDC(self, tag, props):
+    def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None:
        """Begin marked-content sequence with property list"""
-        self.device.begin_tag(tag, props)
+        self.device.begin_tag(cast(PSLiteral, tag), props)
        return

-    def do_EMC(self):
+    def do_EMC(self) -> None:
        """End marked-content sequence"""
        self.device.end_tag()
        return

-    def do_Tc(self, space):
+    def do_Tc(self, space: PDFStackT) -> None:
        """Set character spacing.

        Character spacing is used by the Tj, TJ, and ' operators.

        :param space: a number expressed in unscaled text space units.
        """
-        self.textstate.charspace = space
+        self.textstate.charspace = cast(float, space)
        return

-    def do_Tw(self, space):
+    def do_Tw(self, space: PDFStackT) -> None:
        """Set the word spacing.

        Word spacing is used by the Tj, TJ, and ' operators.

        :param space: a number expressed in unscaled text space units
        """
-        self.textstate.wordspace = space
+        self.textstate.wordspace = cast(float, space)
        return

-    def do_Tz(self, scale):
+    def do_Tz(self, scale: PDFStackT) -> None:
        """Set the horizontal scaling.

        :param scale: is a number specifying the percentage of the normal width
        """
-        self.textstate.scaling = scale
+        self.textstate.scaling = cast(float, scale)
        return

-    def do_TL(self, leading):
+    def do_TL(self, leading: PDFStackT) -> None:
        """Set the text leading.

        Text leading is used only by the T*, ', and " operators.

        :param leading: a number expressed in unscaled text space units
        """
-        self.textstate.leading = -leading
+        self.textstate.leading = -cast(float, leading)
        return

-    def do_Tf(self, fontid, fontsize):
+    def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None:
        """Set the text font

        :param fontid: the name of a font resource in the Font subdictionary
@ -748,44 +844,56 @@ class PDFPageInterpreter:
            if settings.STRICT:
                raise PDFInterpreterError('Undefined Font id: %r' % fontid)
            self.textstate.font = self.rsrcmgr.get_font(None, {})
-        self.textstate.fontsize = fontsize
+        self.textstate.fontsize = cast(float, fontsize)
        return

-    def do_Tr(self, render):
+    def do_Tr(self, render: PDFStackT) -> None:
        """Set the text rendering mode"""
-        self.textstate.render = render
+        self.textstate.render = cast(int, render)
        return

-    def do_Ts(self, rise):
+    def do_Ts(self, rise: PDFStackT) -> None:
        """Set the text rise

        :param rise: a number expressed in unscaled text space units
        """
-        self.textstate.rise = rise
+        self.textstate.rise = cast(float, rise)
        return

-    def do_Td(self, tx, ty):
+    def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None:
        """Move text position"""
+        tx = cast(float, tx)
+        ty = cast(float, ty)
        (a, b, c, d, e, f) = self.textstate.matrix
        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
        self.textstate.linematrix = (0, 0)
        return

-    def do_TD(self, tx, ty):
+    def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None:
        """Move text position and set leading"""
+        tx = cast(float, tx)
+        ty = cast(float, ty)
        (a, b, c, d, e, f) = self.textstate.matrix
        self.textstate.matrix = (a, b, c, d, tx*a+ty*c+e, tx*b+ty*d+f)
        self.textstate.leading = ty
        self.textstate.linematrix = (0, 0)
        return

-    def do_Tm(self, a, b, c, d, e, f):
+    def do_Tm(
+        self,
+        a: PDFStackT,
+        b: PDFStackT,
+        c: PDFStackT,
+        d: PDFStackT,
+        e: PDFStackT,
+        f: PDFStackT
+    ) -> None:
        """Set text matrix and text line matrix"""
-        self.textstate.matrix = (a, b, c, d, e, f)
+        self.textstate.matrix = cast(Matrix, (a, b, c, d, e, f))
        self.textstate.linematrix = (0, 0)
        return

-    def do_T_a(self):
+    def do_T_a(self) -> None:
        """Move to start of next text line"""
        (a, b, c, d, e, f) = self.textstate.matrix
        self.textstate.matrix = (a, b, c, d, self.textstate.leading*c+e,
@ -793,22 +901,23 @@ class PDFPageInterpreter:
        self.textstate.linematrix = (0, 0)
        return

-    def do_TJ(self, seq):
+    def do_TJ(self, seq: PDFStackT) -> None:
        """Show text, allowing individual glyph positioning"""
        if self.textstate.font is None:
            if settings.STRICT:
                raise PDFInterpreterError('No font specified!')
            return
-        self.device.render_string(self.textstate, seq, self.ncs,
-                                  self.graphicstate.copy())
+        assert self.ncs is not None
+        self.device.render_string(self.textstate, cast(PDFTextSeq, seq),
+                                  self.ncs, self.graphicstate.copy())
        return

-    def do_Tj(self, s):
+    def do_Tj(self, s: PDFStackT) -> None:
        """Show text"""
        self.do_TJ([s])
        return

-    def do__q(self, s):
+    def do__q(self, s: PDFStackT) -> None:
        """Move to next line and show text

        The ' (single quote) operator.
@ -817,7 +926,7 @@ class PDFPageInterpreter:
        self.do_TJ([s])
        return

-    def do__w(self, aw, ac, s):
+    def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None:
        """Set word and character spacing, move to next line, and show text

        The " (double quote) operator.
@ -827,15 +936,15 @@ class PDFPageInterpreter:
        self.do_TJ([s])
        return

-    def do_BI(self):
+    def do_BI(self) -> None:
        """Begin inline image object"""
        return

-    def do_ID(self):
+    def do_ID(self) -> None:
        """Begin inline image data"""
        return

-    def do_EI(self, obj):
+    def do_EI(self, obj: PDFStackT) -> None:
        """End inline image object"""
        if isinstance(obj, PDFStream) and 'W' in obj and 'H' in obj:
            iobjid = str(id(obj))
@ -844,9 +953,9 @@ class PDFPageInterpreter:
            self.device.end_figure(iobjid)
        return

-    def do_Do(self, xobjid):
+    def do_Do(self, xobjid_arg: PDFStackT) -> None:
        """Invoke named XObject"""
-        xobjid = literal_name(xobjid)
+        xobjid = cast(str, literal_name(xobjid_arg))
        try:
            xobj = stream_value(self.xobjmap[xobjid])
        except KeyError:
@ -857,8 +966,9 @@ class PDFPageInterpreter:
        subtype = xobj.get('Subtype')
        if subtype is LITERAL_FORM and 'BBox' in xobj:
            interpreter = self.dup()
-            bbox = list_value(xobj['BBox'])
-            matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
+            bbox = cast(Rect, list_value(xobj['BBox']))
+            matrix = cast(Matrix, list_value(
+                xobj.get('Matrix', MATRIX_IDENTITY)))
            # According to PDF reference 1.7 section 4.9.1, XObjects in
            # earlier PDFs (prior to v1.2) use the page's Resources entry
            # instead of having their own Resources entry.
@ -880,7 +990,7 @@ class PDFPageInterpreter:
            pass
        return

-    def process_page(self, page):
+    def process_page(self, page: PDFPage) -> None:
        log.info('Processing page: %r', page)
        (x0, y0, x1, y1) = page.mediabox
        if page.rotate == 90:
@ -896,7 +1006,12 @@ class PDFPageInterpreter:
        self.device.end_page(page)
        return

-    def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY):
+    def render_contents(
+        self,
+        resources: Dict[object, object],
+        streams: Sequence[object],
+        ctm: Matrix = MATRIX_IDENTITY
+    ) -> None:
        """Render the content streams.

        This method may be called recursively.
@ -908,7 +1023,7 @@ class PDFPageInterpreter:
        self.execute(list_value(streams))
        return

-    def execute(self, streams):
+    def execute(self, streams: Sequence[object]) -> None:
        try:
            parser = PDFContentParser(streams)
        except PSEOF:
--- a/pdfminer/pdfpage.py
+++ b/pdfminer/pdfpage.py
@ -1,4 +1,6 @@
 import logging
+from pdfminer.utils import Rect
+from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
 import warnings
 from . import settings
 from .psparser import LIT
@ -32,7 +34,7 @@ class PDFPage:
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
-      resources: a list of resources used by the page.
+      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
@ -40,7 +42,12 @@ class PDFPage:
      beads: a chain that represents natural reading order.
    """

-    def __init__(self, doc, pageid, attrs):
+    def __init__(
+        self,
+        doc: PDFDocument,
+        pageid: object,
+        attrs: object
+    ) -> None:
        """Initialize a page object.

        doc: a PDFDocument object.
@ -51,10 +58,11 @@ class PDFPage:
        self.pageid = pageid
        self.attrs = dict_value(attrs)
        self.lastmod = resolve1(self.attrs.get('LastModified'))
-        self.resources = resolve1(self.attrs.get('Resources', dict()))
-        self.mediabox = resolve1(self.attrs['MediaBox'])
+        self.resources: Dict[object, object] = \
+            resolve1(self.attrs.get('Resources', dict()))
+        self.mediabox: Rect = resolve1(self.attrs['MediaBox'])
        if 'CropBox' in self.attrs:
-            self.cropbox = resolve1(self.attrs['CropBox'])
+            self.cropbox: Rect = resolve1(self.attrs['CropBox'])
        else:
            self.cropbox = self.mediabox
        self.rotate = (int_value(self.attrs.get('Rotate', 0))+360) % 360
@ -66,23 +74,28 @@ class PDFPage:
            contents = []
        if not isinstance(contents, list):
            contents = [contents]
-        self.contents = contents
+        self.contents: List[object] = contents
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFPage: Resources={!r}, MediaBox={!r}>'\
            .format(self.resources, self.mediabox)

    INHERITABLE_ATTRS = {'Resources', 'MediaBox', 'CropBox', 'Rotate'}

    @classmethod
-    def create_pages(cls, document):
-        def search(obj, parent):
+    def create_pages(cls, document: PDFDocument) -> Iterator["PDFPage"]:
+        def search(
+            obj: object,
+            parent: Dict[str, object]
+        ) -> Iterator[Tuple[int, Dict[object, Dict[object, object]]]]:
            if isinstance(obj, int):
                objid = obj
                tree = dict_value(document.getobj(objid)).copy()
            else:
-                objid = obj.objid
+                # This looks broken. obj.objid means obj could be either
+                # PDFObjRef or PDFStream, but neither is valid for dict_value.
+                objid = obj.objid  # type: ignore[attr-defined]
                tree = dict_value(obj).copy()
            for (k, v) in parent.items():
                if k in cls.INHERITABLE_ATTRS and k not in tree:
@ -119,9 +132,15 @@ class PDFPage:
        return

    @classmethod
-    def get_pages(cls, fp,
-                  pagenos=None, maxpages=0, password='',
-                  caching=True, check_extractable=False):
+    def get_pages(
+        cls,
+        fp: BinaryIO,
+        pagenos: Optional[Container[int]] = None,
+        maxpages: int = 0,
+        password: str = '',
+        caching: bool = True,
+        check_extractable: bool = False
+    ) -> Iterator["PDFPage"]:
        # Create a PDF parser object associated with the file object.
        parser = PDFParser(fp)
        # Create a PDF document object that stores the document structure.
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@ -1,6 +1,8 @@
 import logging
 from io import BytesIO
+from typing import BinaryIO, TYPE_CHECKING, Optional, Union
 from .psparser import PSStackParser
+from .psparser import PSKeyword
 from .psparser import PSSyntaxError
 from .psparser import PSEOF
 from .psparser import KWD
@ -11,6 +13,9 @@ from .pdftypes import PDFObjRef
 from .pdftypes import int_value
 from .pdftypes import dict_value

+if TYPE_CHECKING:
+    from .pdfdocument import PDFDocument
+
 log = logging.getLogger(__name__)


@ -18,7 +23,8 @@ class PDFSyntaxError(PDFException):
    pass


-class PDFParser(PSStackParser):
+# PDFParser stack holds all the base types plus PDFStream, PDFObjRef, and None
+class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
    """
    PDFParser fetch PDF objects from a file stream.
    It can handle indirect references by referring to
@ -35,13 +41,13 @@ class PDFParser(PSStackParser):

    """

-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO) -> None:
        PSStackParser.__init__(self, fp)
-        self.doc = None
+        self.doc: Optional["PDFDocument"] = None
        self.fallback = False
        return

-    def set_document(self, doc):
+    def set_document(self, doc: "PDFDocument") -> None:
        """Associates the parser with a PDFDocument object."""
        self.doc = doc
        return
@ -53,7 +59,7 @@ class PDFParser(PSStackParser):
    KEYWORD_XREF = KWD(b'xref')
    KEYWORD_STARTXREF = KWD(b'startxref')

-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        """Handles PDF-related keywords."""

        if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
@ -71,7 +77,9 @@ class PDFParser(PSStackParser):
            if len(self.curstack) >= 2:
                try:
                    ((_, objid), (_, genno)) = self.pop(2)
-                    (objid, genno) = (int(objid), int(genno))
+                    (objid, genno) = (
+                        int(objid), int(genno))  # type: ignore[arg-type]
+                    assert self.doc is not None
                    obj = PDFObjRef(self.doc, objid, genno)
                    self.push((pos, obj))
                except PSSyntaxError:
@ -114,13 +122,13 @@ class PDFParser(PSStackParser):
                objlen += len(line)
                if self.fallback:
                    data += line
-            data = bytes(data)
            self.seek(pos+objlen)
            # XXX limit objlen not to exceed object boundary
            log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos,
                      objlen, dic, data[:10])
-            obj = PDFStream(dic, data, self.doc.decipher)
-            self.push((pos, obj))
+            assert self.doc is not None
+            stream = PDFStream(dic, bytes(data), self.doc.decipher)
+            self.push((pos, stream))

        else:
            # others
@ -138,22 +146,23 @@ class PDFStreamParser(PDFParser):
    indirect references to other objects in the same document.
    """

-    def __init__(self, data):
+    def __init__(self, data: bytes) -> None:
        PDFParser.__init__(self, BytesIO(data))
        return

-    def flush(self):
+    def flush(self) -> None:
        self.add_results(*self.popall())
        return

    KEYWORD_OBJ = KWD(b'obj')

-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        if token is self.KEYWORD_R:
            # reference to indirect object
            try:
                ((_, objid), (_, genno)) = self.pop(2)
-                (objid, genno) = (int(objid), int(genno))
+                (objid, genno) = (
+                    int(objid), int(genno))  # type: ignore[arg-type]
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
            except PSSyntaxError:
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@ -1,5 +1,8 @@
 import zlib
 import logging
+import sys
+from typing import (TYPE_CHECKING, Any, Dict, Iterable, Optional, Union, List,
+                    Tuple, cast)
 from .lzw import lzwdecode
 from .ascii85 import ascii85decode
 from .ascii85 import asciihexdecode
@ -10,7 +13,9 @@ from .psparser import PSObject
 from .psparser import LIT
 from . import settings
 from .utils import apply_png_predictor
-from .utils import isnumber
+
+if TYPE_CHECKING:
+    from .pdfdocument import PDFDocument


 log = logging.getLogger(__name__)
@ -28,6 +33,21 @@ LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))
 LITERALS_JBIG2_DECODE = (LIT('JBIG2Decode'),)


+if sys.version_info >= (3, 8):
+    from typing import Protocol
+
+    class DecipherCallable(Protocol):
+        """Fully typed a decipher callback, with optional parameter."""
+        def __call__(self, objid: int, genno: int, data: bytes,
+                     attrs: Optional[Dict[str, Any]] = None) -> bytes:
+            raise NotImplementedError
+
+else:  # Fallback for older Python
+    from typing import Callable
+
+    DecipherCallable = Callable[..., bytes]
+
+
 class PDFObject(PSObject):
    pass

@ -54,7 +74,12 @@ class PDFNotImplementedError(PDFException):

 class PDFObjRef(PDFObject):

-    def __init__(self, doc, objid, _):
+    def __init__(
+        self,
+        doc: Optional["PDFDocument"],
+        objid: int,
+        _: object
+    ) -> None:
        if objid == 0:
            if settings.STRICT:
                raise PDFValueError('PDF object id cannot be 0.')
@ -62,17 +87,18 @@ class PDFObjRef(PDFObject):
        self.objid = objid
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<PDFObjRef:%d>' % (self.objid)

-    def resolve(self, default=None):
+    def resolve(self, default: object = None) -> Any:
+        assert self.doc is not None
        try:
            return self.doc.getobj(self.objid)
        except PDFObjectNotFound:
            return default


-def resolve1(x, default=None):
+def resolve1(x: object, default: object = None) -> Any:
    """Resolves an object.

    If this is an array or dictionary, it may still contains
@ -83,7 +109,7 @@ def resolve1(x, default=None):
    return x


-def resolve_all(x, default=None):
+def resolve_all(x: object, default: object = None) -> Any:
    """Recursively resolves the given object and all the internals.

    Make sure there is no indirect reference within the nested object.
@ -99,7 +125,12 @@ def resolve_all(x, default=None):
    return x


-def decipher_all(decipher, objid, genno, x):
+def decipher_all(
+    decipher: DecipherCallable,
+    objid: int,
+    genno: int,
+    x: object
+) -> Any:
    """Recursively deciphers the given object.
    """
    if isinstance(x, bytes):
@ -112,7 +143,7 @@ def decipher_all(decipher, objid, genno, x):
    return x


-def int_value(x):
+def int_value(x: object) -> int:
    x = resolve1(x)
    if not isinstance(x, int):
        if settings.STRICT:
@ -121,7 +152,7 @@ def int_value(x):
    return x


-def float_value(x):
+def float_value(x: object) -> float:
    x = resolve1(x)
    if not isinstance(x, float):
        if settings.STRICT:
@ -130,34 +161,34 @@ def float_value(x):
    return x


-def num_value(x):
+def num_value(x: object) -> float:
    x = resolve1(x)
-    if not isnumber(x):
+    if not isinstance(x, (int, float)):  # == utils.isnumber(x)
        if settings.STRICT:
            raise PDFTypeError('Int or Float required: %r' % x)
        return 0
    return x


-def uint_value(x, n_bits):
+def uint_value(x: object, n_bits: int) -> int:
    """Resolve number and interpret it as a two's-complement unsigned number"""
-    x = int_value(x)
-    if x > 0:
-        return x
+    xi = int_value(x)
+    if xi > 0:
+        return xi
    else:
-        return x + 2**n_bits
+        return xi + cast(int, 2**n_bits)


-def str_value(x):
+def str_value(x: object) -> bytes:
    x = resolve1(x)
    if not isinstance(x, bytes):
        if settings.STRICT:
            raise PDFTypeError('String required: %r' % x)
-        return ''
+        return b''
    return x


-def list_value(x):
+def list_value(x: object) -> Union[List[Any], Tuple[Any, ...]]:
    x = resolve1(x)
    if not isinstance(x, (list, tuple)):
        if settings.STRICT:
@ -166,7 +197,7 @@ def list_value(x):
    return x


-def dict_value(x):
+def dict_value(x: object) -> Dict[Any, Any]:
    x = resolve1(x)
    if not isinstance(x, dict):
        if settings.STRICT:
@ -176,7 +207,7 @@ def dict_value(x):
    return x


-def stream_value(x):
+def stream_value(x: object) -> "PDFStream":
    x = resolve1(x)
    if not isinstance(x, PDFStream):
        if settings.STRICT:
@ -187,22 +218,27 @@ def stream_value(x):

 class PDFStream(PDFObject):

-    def __init__(self, attrs, rawdata, decipher=None):
+    def __init__(
+        self,
+        attrs: Dict[str, Any],
+        rawdata: bytes,
+        decipher: Optional[DecipherCallable] = None
+    ) -> None:
        assert isinstance(attrs, dict), str(type(attrs))
        self.attrs = attrs
-        self.rawdata = rawdata
+        self.rawdata: Optional[bytes] = rawdata
        self.decipher = decipher
-        self.data = None
-        self.objid = None
-        self.genno = None
+        self.data: Optional[bytes] = None
+        self.objid: Optional[int] = None
+        self.genno: Optional[int] = None
        return

-    def set_objid(self, objid, genno):
+    def set_objid(self, objid: int, genno: int) -> None:
        self.objid = objid
        self.genno = genno
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        if self.data is None:
            assert self.rawdata is not None
            return '<PDFStream(%r): raw=%d, %r>' % \
@ -212,22 +248,22 @@ class PDFStream(PDFObject):
            return '<PDFStream(%r): len=%d, %r>' % \
                   (self.objid, len(self.data), self.attrs)

-    def __contains__(self, name):
+    def __contains__(self, name: object) -> bool:
        return name in self.attrs

-    def __getitem__(self, name):
+    def __getitem__(self, name: str) -> Any:
        return self.attrs[name]

-    def get(self, name, default=None):
+    def get(self, name: str, default: object = None) -> Any:
        return self.attrs.get(name, default)

-    def get_any(self, names, default=None):
+    def get_any(self, names: Iterable[str], default: object = None) -> Any:
        for name in names:
            if name in self.attrs:
                return self.attrs[name]
        return default

-    def get_filters(self):
+    def get_filters(self) -> List[Tuple[Any, Any]]:
        filters = self.get_any(('F', 'Filter'))
        params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
        if not filters:
@ -248,12 +284,14 @@ class PDFStream(PDFObject):
        # return list solves https://github.com/pdfminer/pdfminer.six/issues/15
        return list(zip(_filters, params))

-    def decode(self):
+    def decode(self) -> None:
        assert self.data is None \
               and self.rawdata is not None, str((self.data, self.rawdata))
        data = self.rawdata
        if self.decipher:
            # Handle encryption
+            assert self.objid is not None
+            assert self.genno is not None
            data = self.decipher(self.objid, self.genno, data, self.attrs)
        filters = self.get_filters()
        if not filters:
@ -314,10 +352,11 @@ class PDFStream(PDFObject):
        self.rawdata = None
        return

-    def get_data(self):
+    def get_data(self) -> bytes:
        if self.data is None:
            self.decode()
+            assert self.data is not None
        return self.data

-    def get_rawdata(self):
+    def get_rawdata(self) -> Optional[bytes]:
        return self.rawdata
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@ -4,7 +4,8 @@

 import re
 import logging
-
+from typing import (Any, BinaryIO, Dict, Generic, Iterator, List,
+                    Optional, Tuple, Type, TypeVar, Union)

 from . import settings
 from .utils import choplist
@ -51,10 +52,12 @@ class PSLiteral(PSObject):
    Always use PSLiteralTable.intern().
    """

-    def __init__(self, name):
+    NameType = Union[str, bytes]
+
+    def __init__(self, name: NameType) -> None:
        self.name = name

-    def __repr__(self):
+    def __repr__(self) -> str:
        name = self.name
        return '/%r' % name

@ -71,31 +74,36 @@ class PSKeyword(PSObject):
    Always use PSKeywordTable.intern().
    """

-    def __init__(self, name):
+    def __init__(self, name: bytes) -> None:
        self.name = name
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        name = self.name
        return '/%r' % name


-class PSSymbolTable:
+_SymbolT = TypeVar('_SymbolT', PSLiteral, PSKeyword)
+
+
+class PSSymbolTable(Generic[_SymbolT]):
    """A utility class for storing PSLiteral/PSKeyword objects.

    Interned objects can be checked its identity with "is" operator.
    """

-    def __init__(self, klass):
-        self.dict = {}
-        self.klass = klass
+    def __init__(self, klass: Type[_SymbolT]) -> None:
+        self.dict: Dict[PSLiteral.NameType, _SymbolT] = {}
+        self.klass: Type[_SymbolT] = klass
        return

-    def intern(self, name):
+    def intern(self, name: PSLiteral.NameType) -> _SymbolT:
        if name in self.dict:
            lit = self.dict[name]
        else:
-            lit = self.klass(name)
+            # Type confusion issue: PSKeyword always takes bytes as name
+            #                       PSLiteral uses either str or bytes
+            lit = self.klass(name)  # type: ignore[arg-type]
            self.dict[name] = lit
        return lit

@ -112,7 +120,7 @@ KEYWORD_DICT_BEGIN = KWD(b'<<')
 KEYWORD_DICT_END = KWD(b'>>')


-def literal_name(x):
+def literal_name(x: object) -> Any:
    if not isinstance(x, PSLiteral):
        if settings.STRICT:
            raise PSTypeError('Literal required: {!r}'.format(x))
@ -120,6 +128,7 @@ def literal_name(x):
            name = x
    else:
        name = x.name
+        if not isinstance(name, str):
            try:
                name = str(name, 'utf-8')
            except Exception:
@ -127,7 +136,7 @@ def literal_name(x):
    return name


-def keyword_name(x):
+def keyword_name(x: object) -> Any:
    if not isinstance(x, PSKeyword):
        if settings.STRICT:
            raise PSTypeError('Keyword required: %r' % x)
@ -161,32 +170,35 @@ ESC_STRING = {
 }


+PSBaseParserToken = Union[float, bool, PSLiteral, PSKeyword, bytes]
+
+
 class PSBaseParser:

    """Most basic PostScript parser that performs only tokenization.
    """
    BUFSIZ = 4096

-    def __init__(self, fp):
+    def __init__(self, fp: BinaryIO) -> None:
        self.fp = fp
        self.seek(0)
        return

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<%s: %r, bufpos=%d>' % (self.__class__.__name__, self.fp,
                                        self.bufpos)

-    def flush(self):
+    def flush(self) -> None:
        return

-    def close(self):
+    def close(self) -> None:
        self.flush()
        return

-    def tell(self):
+    def tell(self) -> int:
        return self.bufpos+self.charpos

-    def poll(self, pos=None, n=80):
+    def poll(self, pos: Optional[int] = None, n: int = 80) -> None:
        pos0 = self.fp.tell()
        if not pos:
            pos = self.bufpos+self.charpos
@ -195,7 +207,7 @@ class PSBaseParser:
        self.fp.seek(pos0)
        return

-    def seek(self, pos):
+    def seek(self, pos: int) -> None:
        """Seeks the parser to the given position.
        """
        log.debug('seek: %r', pos)
@ -208,10 +220,10 @@ class PSBaseParser:
        self._parse1 = self._parse_main
        self._curtoken = b''
        self._curtokenpos = 0
-        self._tokens = []
+        self._tokens: List[Tuple[int, PSBaseParserToken]] = []
        return

-    def fillbuf(self):
+    def fillbuf(self) -> None:
        if self.charpos < len(self.buf):
            return
        # fetch next chunk.
@ -222,7 +234,7 @@ class PSBaseParser:
        self.charpos = 0
        return

-    def nextline(self):
+    def nextline(self) -> Tuple[int, bytes]:
        """Fetches a next line that ends either with \\r or \\n.
        """
        linebuf = b''
@ -252,7 +264,7 @@ class PSBaseParser:

        return (linepos, linebuf)

-    def revreadlines(self):
+    def revreadlines(self) -> Iterator[bytes]:
        """Fetches a next line backword.

        This is used to locate the trailers at the end of a file.
@ -277,7 +289,7 @@ class PSBaseParser:
                buf = b''
        return

-    def _parse_main(self, s, i):
+    def _parse_main(self, s: bytes, i: int) -> int:
        m = NONSPC.search(s, i)
        if not m:
            return len(s)
@ -321,11 +333,11 @@ class PSBaseParser:
            self._add_token(KWD(c))
            return j+1

-    def _add_token(self, obj):
+    def _add_token(self, obj: PSBaseParserToken) -> None:
        self._tokens.append((self._curtokenpos, obj))
        return

-    def _parse_comment(self, s, i):
+    def _parse_comment(self, s: bytes, i: int) -> int:
        m = EOL.search(s, i)
        if not m:
            self._curtoken += s[i:]
@ -337,7 +349,7 @@ class PSBaseParser:
        # self._tokens.append(self._curtoken)
        return j

-    def _parse_literal(self, s, i):
+    def _parse_literal(self, s: bytes, i: int) -> int:
        m = END_LITERAL.search(s, i)
        if not m:
            self._curtoken += s[i:]
@ -350,14 +362,14 @@ class PSBaseParser:
            self._parse1 = self._parse_literal_hex
            return j+1
        try:
-            self._curtoken = str(self._curtoken, 'utf-8')
+            name: Union[str, bytes] = str(self._curtoken, 'utf-8')
        except Exception:
-            pass
-        self._add_token(LIT(self._curtoken))
+            name = self._curtoken
+        self._add_token(LIT(name))
        self._parse1 = self._parse_main
        return j

-    def _parse_literal_hex(self, s, i):
+    def _parse_literal_hex(self, s: bytes, i: int) -> int:
        c = s[i:i+1]
        if HEX.match(c) and len(self.hex) < 2:
            self.hex += c
@ -367,7 +379,7 @@ class PSBaseParser:
        self._parse1 = self._parse_literal
        return i

-    def _parse_number(self, s, i):
+    def _parse_number(self, s: bytes, i: int) -> int:
        m = END_NUMBER.search(s, i)
        if not m:
            self._curtoken += s[i:]
@ -386,7 +398,7 @@ class PSBaseParser:
        self._parse1 = self._parse_main
        return j

-    def _parse_float(self, s, i):
+    def _parse_float(self, s: bytes, i: int) -> int:
        m = END_NUMBER.search(s, i)
        if not m:
            self._curtoken += s[i:]
@ -400,7 +412,7 @@ class PSBaseParser:
        self._parse1 = self._parse_main
        return j

-    def _parse_keyword(self, s, i):
+    def _parse_keyword(self, s: bytes, i: int) -> int:
        m = END_KEYWORD.search(s, i)
        if not m:
            self._curtoken += s[i:]
@ -408,7 +420,7 @@ class PSBaseParser:
        j = m.start(0)
        self._curtoken += s[i:j]
        if self._curtoken == b'true':
-            token = True
+            token: Union[bool, PSKeyword] = True
        elif self._curtoken == b'false':
            token = False
        else:
@ -417,7 +429,7 @@ class PSBaseParser:
        self._parse1 = self._parse_main
        return j

-    def _parse_string(self, s, i):
+    def _parse_string(self, s: bytes, i: int) -> int:
        m = END_STRING.search(s, i)
        if not m:
            self._curtoken += s[i:]
@ -443,7 +455,7 @@ class PSBaseParser:
        self._parse1 = self._parse_main
        return j+1

-    def _parse_string_1(self, s, i):
+    def _parse_string_1(self, s: bytes, i: int) -> int:
        """Parse literal strings

        PDF Reference 3.2.3
@ -470,7 +482,7 @@ class PSBaseParser:
        self._parse1 = self._parse_string
        return i+1

-    def _parse_wopen(self, s, i):
+    def _parse_wopen(self, s: bytes, i: int) -> int:
        c = s[i:i+1]
        if c == b'<':
            self._add_token(KEYWORD_DICT_BEGIN)
@ -480,7 +492,7 @@ class PSBaseParser:
            self._parse1 = self._parse_hexstring
        return i

-    def _parse_wclose(self, s, i):
+    def _parse_wclose(self, s: bytes, i: int) -> int:
        c = s[i:i+1]
        if c == b'>':
            self._add_token(KEYWORD_DICT_END)
@ -488,7 +500,7 @@ class PSBaseParser:
        self._parse1 = self._parse_main
        return i

-    def _parse_hexstring(self, s, i):
+    def _parse_hexstring(self, s: bytes, i: int) -> int:
        m = END_HEX_STRING.search(s, i)
        if not m:
            self._curtoken += s[i:]
@ -501,7 +513,7 @@ class PSBaseParser:
        self._parse1 = self._parse_main
        return j

-    def nexttoken(self):
+    def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
        while not self._tokens:
            self.fillbuf()
            self.charpos = self._parse1(self.buf, self.charpos)
@ -510,39 +522,51 @@ class PSBaseParser:
        return token


-class PSStackParser(PSBaseParser):
-    def __init__(self, fp):
+# Stack slots may by occupied by any of:
+#  * the PSBaseParserToken types
+#  * list (via KEYWORD_ARRAY)
+#  * dict (via KEYWORD_DICT)
+#  * subclass-specific extensions (e.g. PDFStream, PDFObjRef) via ExtraT
+ExtraT = TypeVar("ExtraT")
+PSStackType = Union[float, bool, PSLiteral, bytes, List, Dict, ExtraT]
+PSStackEntry = Tuple[int, PSStackType[ExtraT]]
+
+
+class PSStackParser(PSBaseParser, Generic[ExtraT]):
+
+    def __init__(self, fp: BinaryIO) -> None:
        PSBaseParser.__init__(self, fp)
        self.reset()
        return

-    def reset(self):
-        self.context = []
-        self.curtype = None
-        self.curstack = []
-        self.results = []
+    def reset(self) -> None:
+        self.context: List[Tuple[int, Optional[str],
+                           List[PSStackEntry[ExtraT]]]] = []
+        self.curtype: Optional[str] = None
+        self.curstack: List[PSStackEntry[ExtraT]] = []
+        self.results: List[PSStackEntry[ExtraT]] = []
        return

-    def seek(self, pos):
+    def seek(self, pos: int) -> None:
        PSBaseParser.seek(self, pos)
        self.reset()
        return

-    def push(self, *objs):
+    def push(self, *objs: PSStackEntry[ExtraT]) -> None:
        self.curstack.extend(objs)
        return

-    def pop(self, n):
+    def pop(self, n: int) -> List[PSStackEntry[ExtraT]]:
        objs = self.curstack[-n:]
        self.curstack[-n:] = []
        return objs

-    def popall(self):
+    def popall(self) -> List[PSStackEntry[ExtraT]]:
        objs = self.curstack
        self.curstack = []
        return objs

-    def add_results(self, *objs):
+    def add_results(self, *objs: PSStackEntry[ExtraT]) -> None:
        try:
            log.debug('add_results: %r', objs)
        except Exception:
@ -550,13 +574,13 @@ class PSStackParser(PSBaseParser):
        self.results.extend(objs)
        return

-    def start_type(self, pos, type):
+    def start_type(self, pos: int, type: str) -> None:
        self.context.append((pos, self.curtype, self.curstack))
        (self.curtype, self.curstack) = (type, [])
        log.debug('start_type: pos=%r, type=%r', pos, type)
        return

-    def end_type(self, type):
+    def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]:
        if self.curtype != type:
            raise PSTypeError('Type mismatch: {!r} != {!r}'
                              .format(self.curtype, type))
@ -565,10 +589,10 @@ class PSStackParser(PSBaseParser):
        log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs)
        return (pos, objs)

-    def do_keyword(self, pos, token):
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
        return

-    def nextobject(self):
+    def nextobject(self) -> PSStackEntry[ExtraT]:
        """Yields a list of objects.

        Arrays and dictionaries are represented as Python lists and
--- a/pdfminer/runlength.py
+++ b/pdfminer/runlength.py
@ -6,7 +6,7 @@
 #


-def rldecode(data):
+def rldecode(data: bytes) -> bytes:
    """
    RunLength decoder (Adobe version) implementation based on PDF Reference
    version 1.4 section 3.3.4:
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@ -4,8 +4,15 @@ Miscellaneous Routines.
 import io
 import pathlib
 import struct
+from typing import (Any, BinaryIO, Callable, Dict, Generic, Iterable, Iterator,
+                    List, Optional, Set, TextIO, Tuple, TypeVar, Union,
+                    TYPE_CHECKING, cast)
+from typing_extensions import Literal
 from html import escape

+if TYPE_CHECKING:
+    from .layout import LTComponent
+
 import chardet  # For str encoding detection

 # from sys import maxint as INF doesn't work anymore under Python3, but PDF
@ -13,40 +20,54 @@ import chardet  # For str encoding detection
 INF = (1 << 31) - 1


+FileOrName = Union[pathlib.PurePath, str, io.IOBase]
+AnyIO = Union[TextIO, BinaryIO]
+
+
 class open_filename(object):
    """
    Context manager that allows opening a filename
    (str or pathlib.PurePath type is supported) and closes it on exit,
    (just like `open`), but does nothing for file-like objects.
    """
-    def __init__(self, filename, *args, **kwargs):
+    def __init__(
+        self,
+        filename: FileOrName,
+        *args: Any,
+        **kwargs: Any
+    ) -> None:
        if isinstance(filename, pathlib.PurePath):
            filename = str(filename)
        if isinstance(filename, str):
-            self.file_handler = open(filename, *args, **kwargs)
+            self.file_handler: AnyIO = open(filename, *args, **kwargs)
            self.closing = True
        elif isinstance(filename, io.IOBase):
-            self.file_handler = filename
+            self.file_handler = cast(AnyIO, filename)
            self.closing = False
        else:
            raise TypeError('Unsupported input type: %s' % type(filename))

-    def __enter__(self):
+    def __enter__(self) -> AnyIO:
        return self.file_handler

-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: object,
+        exc_val: object,
+        exc_tb: object
+    ) -> Literal[False]:
        if self.closing:
            self.file_handler.close()
        return False


-def make_compat_bytes(in_str):
+def make_compat_bytes(in_str: str) -> bytes:
    "Converts to bytes, encoding to unicode."
    assert isinstance(in_str, str), str(type(in_str))
    return in_str.encode()


-def make_compat_str(o):
+def make_compat_str(o: object) -> str:
    """Converts everything to string, if bytes guessing the encoding."""
    if isinstance(o, bytes):
        enc = chardet.detect(o)
@ -55,7 +76,7 @@ def make_compat_str(o):
        return str(o)


-def shorten_str(s, size):
+def shorten_str(s: str, size: int) -> str:
    if size < 7:
        return s[:size]
    if len(s) > size:
@ -65,8 +86,11 @@ def shorten_str(s, size):
        return s


-def compatible_encode_method(bytesorstring, encoding='utf-8',
-                             erraction='ignore'):
+def compatible_encode_method(
+    bytesorstring: Union[bytes, str],
+    encoding: str = 'utf-8',
+    erraction: str = 'ignore'
+) -> str:
    """When Py2 str.encode is called, it often means bytes.encode in Py3.

     This does either.
@ -77,7 +101,7 @@ def compatible_encode_method(bytesorstring, encoding='utf-8',
    return bytesorstring.decode(encoding, erraction)


-def paeth_predictor(left, above, upper_left):
+def paeth_predictor(left: int, above: int, upper_left: int) -> int:
    # From http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
    # Initial estimate
    p = left + above - upper_left
@ -95,7 +119,13 @@ def paeth_predictor(left, above, upper_left):
        return upper_left


-def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
+def apply_png_predictor(
+    pred: int,
+    colors: int,
+    columns: int,
+    bitspercomponent: int,
+    data: bytes
+) -> bytes:
    """Reverse the effect of the PNG predictor

    Documentation: http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
@ -190,11 +220,20 @@ def apply_png_predictor(pred, colors, columns, bitspercomponent, data):
    return buf


+Point = Tuple[float, float]
+Rect = Tuple[float, float, float, float]
+Matrix = Tuple[float, float, float, float, float, float]
+PathSegment = Union[
+    Tuple[str],                                             # Literal['h']
+    Tuple[str, float, float],                               # Literal['m', 'l']
+    Tuple[str, float, float, float, float],                 # Literal['v', 'y']
+    Tuple[str, float, float, float, float, float, float]]   # Literal['c']
+
 #  Matrix operations
-MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
+MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0)


-def mult_matrix(m1, m0):
+def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix:
    (a1, b1, c1, d1, e1, f1) = m1
    (a0, b0, c0, d0, e0, f0) = m0
    """Returns the multiplication of two matrices."""
@ -203,21 +242,21 @@ def mult_matrix(m1, m0):
            a0 * e1 + c0 * f1 + e0, b0 * e1 + d0 * f1 + f0)


-def translate_matrix(m, v):
+def translate_matrix(m: Matrix, v: Point) -> Matrix:
    """Translates a matrix by (x, y)."""
    (a, b, c, d, e, f) = m
    (x, y) = v
    return a, b, c, d, x * a + y * c + e, x * b + y * d + f


-def apply_matrix_pt(m, v):
+def apply_matrix_pt(m: Matrix, v: Point) -> Point:
    (a, b, c, d, e, f) = m
    (x, y) = v
    """Applies a matrix to a point."""
    return a * x + c * y + e, b * x + d * y + f


-def apply_matrix_norm(m, v):
+def apply_matrix_norm(m: Matrix, v: Point) -> Point:
    """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
    (a, b, c, d, e, f) = m
    (p, q) = v
@ -226,11 +265,14 @@ def apply_matrix_norm(m, v):

 #  Utility functions

-def isnumber(x):
+def isnumber(x: object) -> bool:
    return isinstance(x, (int, float))


-def uniq(objs):
+_T = TypeVar('_T')
+
+
+def uniq(objs: Iterable[_T]) -> Iterator[_T]:
    """Eliminates duplicated elements."""
    done = set()
    for obj in objs:
@ -241,7 +283,10 @@ def uniq(objs):
    return


-def fsplit(pred, objs):
+def fsplit(
+    pred: Callable[[_T], bool],
+    objs: Iterable[_T]
+) -> Tuple[List[_T], List[_T]]:
    """Split a list into two classes according to the predicate."""
    t = []
    f = []
@ -253,14 +298,15 @@ def fsplit(pred, objs):
    return t, f


-def drange(v0, v1, d):
+def drange(v0: float, v1: float, d: int) -> range:
    """Returns a discrete range."""
    return range(int(v0) // d, int(v1 + d) // d)


-def get_bound(pts):
+def get_bound(pts: Iterable[Point]) -> Rect:
    """Compute a minimal rectangle that covers all the points."""
-    (x0, y0, x1, y1) = (INF, INF, -INF, -INF)
+    limit: Rect = (INF, INF, -INF, -INF)
+    (x0, y0, x1, y1) = limit
    for (x, y) in pts:
        x0 = min(x0, x)
        y0 = min(y0, y)
@ -269,7 +315,11 @@ def get_bound(pts):
    return x0, y0, x1, y1


-def pick(seq, func, maxobj=None):
+def pick(
+    seq: Iterable[_T],
+    func: Callable[[_T], float],
+    maxobj: Optional[_T] = None
+) -> Optional[_T]:
    """Picks the object obj where func(obj) has the highest value."""
    maxscore = None
    for obj in seq:
@ -279,7 +329,7 @@ def pick(seq, func, maxobj=None):
    return maxobj


-def choplist(n, seq):
+def choplist(n: int, seq: Iterable[_T]) -> Iterator[Tuple[_T, ...]]:
    """Groups every n elements of the list."""
    r = []
    for x in seq:
@ -290,7 +340,7 @@ def choplist(n, seq):
    return


-def nunpack(s, default=0):
+def nunpack(s: bytes, default: int = 0) -> int:
    """Unpacks 1 to 4 or 8 byte integers (big endian)."""
    length = len(s)
    if not length:
@ -298,13 +348,13 @@ def nunpack(s, default=0):
    elif length == 1:
        return ord(s)
    elif length == 2:
-        return struct.unpack('>H', s)[0]
+        return cast(int, struct.unpack('>H', s)[0])
    elif length == 3:
-        return struct.unpack('>L', b'\x00' + s)[0]
+        return cast(int, struct.unpack('>L', b'\x00' + s)[0])
    elif length == 4:
-        return struct.unpack('>L', s)[0]
+        return cast(int, struct.unpack('>L', s)[0])
    elif length == 8:
-        return struct.unpack('>Q', s)[0]
+        return cast(int, struct.unpack('>Q', s)[0])
    else:
        raise TypeError('invalid length: %d' % length)

@ -345,7 +395,7 @@ PDFDocEncoding = ''.join(chr(x) for x in (
 ))


-def decode_text(s):
+def decode_text(s: bytes) -> str:
    """Decodes a PDFDocEncoding string to Unicode."""
    if s.startswith(b'\xfe\xff'):
        return str(s[2:], 'utf-16be', 'ignore')
@ -353,25 +403,25 @@ def decode_text(s):
        return ''.join(PDFDocEncoding[c] for c in s)


-def enc(x):
+def enc(x: str) -> str:
    """Encodes a string for SGML/XML/HTML"""
    if isinstance(x, bytes):
        return ''
    return escape(x)


-def bbox2str(bbox):
+def bbox2str(bbox: Rect) -> str:
    (x0, y0, x1, y1) = bbox
    return '{:.3f},{:.3f},{:.3f},{:.3f}'.format(x0, y0, x1, y1)


-def matrix2str(m):
+def matrix2str(m: Matrix) -> str:
    (a, b, c, d, e, f) = m
    return '[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]'\
        .format(a, b, c, d, e, f)


-def vecBetweenBoxes(obj1, obj2):
+def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
    """A distance function between two TextBoxes.

    Consider the bounding rectangle for obj1 and obj2.
@ -397,7 +447,10 @@ def vecBetweenBoxes(obj1, obj2):
        return max(0, iw), max(0, ih)


-class Plane:
+LTComponentT = TypeVar('LTComponentT', bound='LTComponent')
+
+
+class Plane(Generic[LTComponentT]):
    """A set-like data structure for objects placed on a plane.

    Can efficiently find objects in a certain rectangular area.
@ -405,26 +458,26 @@ class Plane:
    which is sorted by its x or y coordinate.
    """

-    def __init__(self, bbox, gridsize=50):
-        self._seq = []  # preserve the object order.
-        self._objs = set()
-        self._grid = {}
+    def __init__(self, bbox: Rect, gridsize: int = 50) -> None:
+        self._seq: List[LTComponentT] = []  # preserve the object order.
+        self._objs: Set[LTComponentT] = set()
+        self._grid: Dict[Point, List[LTComponentT]] = {}
        self.gridsize = gridsize
        (self.x0, self.y0, self.x1, self.y1) = bbox

-    def __repr__(self):
+    def __repr__(self) -> str:
        return '<Plane objs=%r>' % list(self)

-    def __iter__(self):
+    def __iter__(self) -> Iterator[LTComponentT]:
        return (obj for obj in self._seq if obj in self._objs)

-    def __len__(self):
+    def __len__(self) -> int:
        return len(self._objs)

-    def __contains__(self, obj):
+    def __contains__(self, obj: object) -> bool:
        return obj in self._objs

-    def _getrange(self, bbox):
+    def _getrange(self, bbox: Rect) -> Iterator[Point]:
        (x0, y0, x1, y1) = bbox
        if x1 <= self.x0 or self.x1 <= x0 or y1 <= self.y0 or self.y1 <= y0:
            return
@ -436,15 +489,15 @@ class Plane:
            for grid_x in drange(x0, x1, self.gridsize):
                yield (grid_x, grid_y)

-    def extend(self, objs):
+    def extend(self, objs: Iterable[LTComponentT]) -> None:
        for obj in objs:
            self.add(obj)

-    def add(self, obj):
+    def add(self, obj: LTComponentT) -> None:
        """place an object."""
        for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
            if k not in self._grid:
-                r = []
+                r: List[LTComponentT] = []
                self._grid[k] = r
            else:
                r = self._grid[k]
@ -452,7 +505,7 @@ class Plane:
        self._seq.append(obj)
        self._objs.add(obj)

-    def remove(self, obj):
+    def remove(self, obj: LTComponentT) -> None:
        """displace an object."""
        for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
            try:
@ -461,7 +514,7 @@ class Plane:
                pass
        self._objs.remove(obj)

-    def find(self, bbox):
+    def find(self, bbox: Rect) -> Iterator[LTComponentT]:
        """finds objects that are in a certain area."""
        (x0, y0, x1, y1) = bbox
        done = set()
--- a/setup.py
+++ b/setup.py
@ -17,7 +17,7 @@ setup(
        'cryptography',
    ],
    extras_require={
-        "dev": ["nose", "tox"],
+        "dev": ["nose", "tox", "mypy == 0.910"],
        "docs": ["sphinx", "sphinx-argparse"],
    },
    description='PDF parser and analyzer',
--- a/tests/test_tools_dumppdf.py
+++ b/tests/test_tools_dumppdf.py
@ -1,5 +1,5 @@
 import warnings
-
+from nose.tools import raises
 from helpers import absolute_sample_path
 from tempfilepath import TemporaryFilePath
 from pdfminer.pdfdocument import PDFNoValidXRefWarning
@ -51,3 +51,13 @@ class TestDumpPDF():

    def test_6(self):
        run('nonfree/naacl06-shinyama.pdf', '-t -a')
+
+    @raises(TypeError)
+    def test_simple1_raw(self):
+        """Known issue: crash in dumpxml writing binary to text stream."""
+        run('simple1.pdf', '-r -a')
+
+    @raises(TypeError)
+    def test_simple1_binary(self):
+        """Known issue: crash in dumpxml writing binary to text stream."""
+        run('simple1.pdf', '-b -a')
--- a/tools/conv_afm.py
+++ b/tools/conv_afm.py
@ -42,4 +42,4 @@ def main(argv):


 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
--- a/tools/conv_cmap.py
+++ b/tools/conv_cmap.py
@ -199,4 +199,4 @@ def main(argv):


 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
--- a/tools/conv_glyphlist.py
+++ b/tools/conv_glyphlist.py
@ -24,4 +24,4 @@ def main(argv):


 if __name__ == '__main__':
-    sys.exit(main(sys.argv))
+    sys.exit(main(sys.argv))  # type: ignore[no-untyped-call]
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@ -4,6 +4,8 @@ import logging
 import os.path
 import re
 import sys
+from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
+    Union, cast
 import warnings
 from argparse import ArgumentParser

@ -22,13 +24,15 @@ logging.basicConfig()
 ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')


-def escape(s):
+def escape(s: Union[str, bytes]) -> str:
    if isinstance(s, bytes):
-        s = str(s, 'latin-1')
-    return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), s)
+        us = str(s, 'latin-1')
+    else:
+        us = s
+    return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), us)


-def dumpxml(out, obj, codec=None):
+def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
    if obj is None:
        out.write('<null />')
        return
@ -51,15 +55,17 @@ def dumpxml(out, obj, codec=None):
        out.write('</list>')
        return

-    if isinstance(obj, ((str,), bytes)):
+    if isinstance(obj, (str, bytes)):
        out.write('<string size="%d">%s</string>' % (len(obj), escape(obj)))
        return

    if isinstance(obj, PDFStream):
        if codec == 'raw':
-            out.write(obj.get_rawdata())
+            # Bug: writing bytes to text I/O. This will raise TypeError.
+            out.write(obj.get_rawdata())  # type: ignore [arg-type]
        elif codec == 'binary':
-            out.write(obj.get_data())
+            # Bug: writing bytes to text I/O. This will raise TypeError.
+            out.write(obj.get_data())  # type: ignore [arg-type]
        else:
            out.write('<stream>\n<props>\n')
            dumpxml(out, obj.attrs)
@ -76,11 +82,15 @@ def dumpxml(out, obj, codec=None):
        return

    if isinstance(obj, PSKeyword):
-        out.write('<keyword>%s</keyword>' % obj.name)
+        # Likely bug: obj.name is bytes, not str
+        out.write('<keyword>%s</keyword>'
+                  % obj.name)  # type: ignore [str-bytes-safe]
        return

    if isinstance(obj, PSLiteral):
-        out.write('<literal>%s</literal>' % obj.name)
+        # Likely bug: obj.name may be bytes, not str
+        out.write('<literal>%s</literal>'
+                  % obj.name)  # type: ignore [str-bytes-safe]
        return

    if isnumber(obj):
@ -90,11 +100,15 @@ def dumpxml(out, obj, codec=None):
    raise TypeError(obj)


-def dumptrailers(out, doc, show_fallback_xref=False):
+def dumptrailers(
+    out: TextIO,
+    doc: PDFDocument,
+    show_fallback_xref: bool = False
+) -> None:
    for xref in doc.xrefs:
        if not isinstance(xref, PDFXRefFallback) or show_fallback_xref:
            out.write('<trailer>\n')
-            dumpxml(out, xref.trailer)
+            dumpxml(out, xref.get_trailer())
            out.write('\n</trailer>\n\n')
    no_xrefs = all(isinstance(xref, PDFXRefFallback) for xref in doc.xrefs)
    if no_xrefs and not show_fallback_xref:
@ -105,7 +119,12 @@ def dumptrailers(out, doc, show_fallback_xref=False):
    return


-def dumpallobjs(out, doc, codec=None, show_fallback_xref=False):
+def dumpallobjs(
+    out: TextIO,
+    doc: PDFDocument,
+    codec: Optional[str] = None,
+    show_fallback_xref: bool = False
+) -> None:
    visited = set()
    out.write('<pdf>')
    for xref in doc.xrefs:
@ -127,15 +146,23 @@ def dumpallobjs(out, doc, codec=None, show_fallback_xref=False):
    return


-def dumpoutline(outfp, fname, objids, pagenos, password='',
-                dumpall=False, codec=None, extractdir=None):
+def dumpoutline(
+    outfp: TextIO,
+    fname: str,
+    objids: Any,
+    pagenos: Container[int],
+    password: str = '',
+    dumpall: bool = False,
+    codec: Optional[str] = None,
+    extractdir: Optional[str] = None
+) -> None:
    fp = open(fname, 'rb')
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
    pages = {page.pageid: pageno for (pageno, page)
             in enumerate(PDFPage.create_pages(doc), 1)}

-    def resolve_dest(dest):
+    def resolve_dest(dest: object) -> Any:
        if isinstance(dest, (str, bytes)):
            dest = resolve1(doc.get_dest(dest))
        elif isinstance(dest, PSLiteral):
@ -183,10 +210,10 @@ LITERAL_FILESPEC = LIT('Filespec')
 LITERAL_EMBEDDEDFILE = LIT('EmbeddedFile')


-def extractembedded(outfp, fname, objids, pagenos, password='',
-                    dumpall=False, codec=None, extractdir=None):
-    def extract1(objid, obj):
-        filename = os.path.basename(obj.get('UF') or obj.get('F').decode())
+def extractembedded(fname: str, password: str, extractdir: str) -> None:
+    def extract1(objid: int, obj: Dict[str, Any]) -> None:
+        filename = os.path.basename(obj.get('UF') or
+                                    cast(bytes, obj.get('F')).decode())
        fileref = obj['EF'].get('UF') or obj['EF'].get('F')
        fileobj = doc.getobj(fileref.objid)
        if not isinstance(fileobj, PDFStream):
@ -221,8 +248,17 @@ def extractembedded(outfp, fname, objids, pagenos, password='',
    return


-def dumppdf(outfp, fname, objids, pagenos, password='', dumpall=False,
-            codec=None, extractdir=None, show_fallback_xref=False):
+def dumppdf(
+    outfp: TextIO,
+    fname: str,
+    objids: Iterable[int],
+    pagenos: Container[int],
+    password: str = '',
+    dumpall: bool = False,
+    codec: Optional[str] = None,
+    extractdir: Optional[str] = None,
+    show_fallback_xref: bool = False
+) -> None:
    fp = open(fname, 'rb')
    parser = PDFParser(fp)
    doc = PDFDocument(parser, password)
@ -249,7 +285,7 @@ def dumppdf(outfp, fname, objids, pagenos, password='', dumpall=False,
    return


-def create_parser():
+def create_parser() -> ArgumentParser:
    parser = ArgumentParser(description=__doc__, add_help=True)
    parser.add_argument('files', type=str, default=None, nargs='+',
                        help='One or more paths to PDF files.')
@ -313,7 +349,7 @@ def create_parser():
    return parser


-def main(argv=None):
+def main(argv: Optional[List[str]] = None) -> None:
    parser = create_parser()
    args = parser.parse_args(args=argv)

@ -340,7 +376,7 @@ def main(argv=None):
    password = args.password

    if args.raw_stream:
-        codec = 'raw'
+        codec: Optional[str] = 'raw'
    elif args.binary_stream:
        codec = 'binary'
    elif args.text_stream:
@ -356,8 +392,7 @@ def main(argv=None):
            )
        elif args.extract_embedded:
            extractembedded(
-                outfp, fname, objids, pagenos, password=password,
-                dumpall=args.all, codec=codec, extractdir=args.extract_embedded
+                fname, password=password, extractdir=args.extract_embedded
            )
        else:
            dumppdf(
@ -370,4 +405,4 @@ def main(argv=None):


 if __name__ == '__main__':
-    sys.exit(main())
+    main()
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@ -4,9 +4,12 @@ output it to plain text, html, xml or tags."""
 import argparse
 import logging
 import sys
+from typing import Any, Container, Iterable, List, Optional, Union
+from typing_extensions import Literal

 import pdfminer.high_level
-import pdfminer.layout
+from pdfminer.layout import LAParams
+from pdfminer.utils import AnyIO

 logging.basicConfig()

@ -15,24 +18,42 @@ OUTPUT_TYPES = ((".htm", "html"),
                (".xml", "xml"),
                (".tag", "tag"))

+FloatOrDisabled = Union[float, Literal["disabled"]]

-def float_or_disabled(x):
+
+def float_or_disabled(x: str) -> FloatOrDisabled:
    if x.lower().strip() == "disabled":
-        return x
+        return "disabled"
    try:
-        x = float(x)
+        return float(x)
    except ValueError:
        raise argparse.ArgumentTypeError("invalid float value: {}".format(x))


-def extract_text(files=[], outfile='-',
-                 no_laparams=False, all_texts=None, detect_vertical=None,
-                 word_margin=None, char_margin=None, line_margin=None,
-                 boxes_flow=None, output_type='text', codec='utf-8',
-                 strip_control=False, maxpages=0, page_numbers=None,
-                 password="", scale=1.0, rotation=0, layoutmode='normal',
-                 output_dir=None, debug=False, disable_caching=False,
-                 **kwargs):
+def extract_text(
+    files: Iterable[str] = [],
+    outfile: str = '-',
+    no_laparams: bool = False,
+    all_texts: Optional[bool] = None,
+    detect_vertical: Optional[bool] = None,
+    word_margin: Optional[float] = None,
+    char_margin: Optional[float] = None,
+    line_margin: Optional[float] = None,
+    boxes_flow: Optional[FloatOrDisabled] = None,
+    output_type: str = 'text',
+    codec: str = 'utf-8',
+    strip_control: bool = False,
+    maxpages: int = 0,
+    page_numbers: Optional[Container[int]] = None,
+    password: str = "",
+    scale: float = 1.0,
+    rotation: int = 0,
+    layoutmode: str = 'normal',
+    output_dir: Optional[str] = None,
+    debug: bool = False,
+    disable_caching: bool = False,
+    **kwargs: Any
+) -> AnyIO:
    if not files:
        raise ValueError("Must provide files to work upon!")

@ -40,7 +61,7 @@ def extract_text(files=[], outfile='-',
    # create an LAParams object and
    # populate with given args. Otherwise, set it to None.
    if not no_laparams:
-        laparams = pdfminer.layout.LAParams()
+        laparams: Optional[LAParams] = LAParams()
        for param in ("all_texts", "detect_vertical", "word_margin",
                      "char_margin", "line_margin", "boxes_flow"):
            paramv = locals().get(param, None)
@ -55,8 +76,8 @@ def extract_text(files=[], outfile='-',
                output_type = alttype

    if outfile == "-":
-        outfp = sys.stdout
-        if outfp.encoding is not None:
+        outfp: AnyIO = sys.stdout
+        if sys.stdout.encoding is not None:
            codec = 'utf-8'
    else:
        outfp = open(outfile, "wb")
@ -67,7 +88,7 @@ def extract_text(files=[], outfile='-',
    return outfp


-def maketheparser():
+def maketheparser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description=__doc__, add_help=True)
    parser.add_argument(
        "files", type=str, default=None, nargs="+",
@ -180,7 +201,7 @@ def maketheparser():
 # main


-def main(args=None):
+def main(args: Optional[List[str]] = None) -> int:

    P = maketheparser()
    A = P.parse_args(args=args)
--- a/tools/pdfdiff.py
+++ b/tools/pdfdiff.py
@ -6,6 +6,7 @@ compares two pdf files.
 import io
 import logging
 import sys
+from typing import Any, Iterable, List, Optional

 import pdfminer.settings
 from pdfminer import high_level, layout
@ -16,7 +17,7 @@ pdfminer.settings.STRICT = False
 logging.basicConfig()


-def compare(file1, file2, **kwargs):
+def compare(file1: str, file2: str, **kwargs: Any) -> Iterable[str]:
    # If any LAParams group arguments were passed,
    # create an LAParams object and
    # populate with given args. Otherwise, set it to None.
@ -26,7 +27,7 @@ def compare(file1, file2, **kwargs):
                      "char_margin", "line_margin", "boxes_flow"):
            paramv = kwargs.get(param, None)
            if paramv is not None:
-                laparams[param] = paramv
+                setattr(laparams, param, paramv)
        kwargs['laparams'] = laparams

    s1 = io.StringIO()
@ -40,20 +41,20 @@ def compare(file1, file2, **kwargs):
    import difflib
    s1.seek(0)
    s2.seek(0)
-    s1, s2 = s1.readlines(), s2.readlines()
+    s1_lines, s2_lines = s1.readlines(), s2.readlines()

    import os.path
    try:
        extension = os.path.splitext(kwargs['outfile'])[1][1:4]
        if extension.lower() == 'htm':
-            return difflib.HtmlDiff().make_file(s1, s2)
+            return difflib.HtmlDiff().make_file(s1_lines, s2_lines)
    except KeyError:
        pass
-    return difflib.unified_diff(s1, s2, n=kwargs['context_lines'])
+    return difflib.unified_diff(s1_lines, s2_lines, n=kwargs['context_lines'])


 # main
-def main(args=None):
+def main(args: Optional[List[str]] = None) -> int:
    import argparse
    P = argparse.ArgumentParser(description=__doc__)
    P.add_argument("file1", type=str, default=None, help="File 1 to compare.")
--- a/tools/pdfstats.py
+++ b/tools/pdfstats.py
@ -7,10 +7,11 @@
 import sys
 import os
 import collections
+from typing import Any, Counter, Iterator, List

 from pdfminer.pdfparser import PDFParser
-from pdfminer.pdfdocument import PDFDocument
-from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed
+from pdfminer.pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
+from pdfminer.pdfpage import PDFPage
 from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
 from pdfminer.converter import PDFPageAggregator
 from pdfminer.layout import LAParams, LTContainer
@ -19,18 +20,18 @@ from pdfminer.layout import LAParams, LTContainer
 _, SCRIPT = os.path.split(__file__)


-def msg(*args, **kwargs):
+def msg(*args: object, **kwargs: Any) -> None:
    print(' '.join(map(str, args)), **kwargs)  # noqa E999


-def flat_iter(obj):
+def flat_iter(obj: object) -> Iterator[object]:
    yield obj
    if isinstance(obj, LTContainer):
        for ob in obj:
            yield from flat_iter(ob)


-def main(args):
+def main(args: List[str]) -> int:
    msg(SCRIPT, args)

    if len(args) != 1:
@ -40,7 +41,7 @@ def main(args):

    infilename, = args

-    lt_types = collections.Counter()
+    lt_types: Counter[str] = collections.Counter()

    with open(infilename, 'rb') as pdf_file:

@ -77,6 +78,8 @@ def main(args):
    msg('page_count', page_count)
    msg('lt_types:', ' '.join('{}:{}'.format(*tc) for tc in lt_types.items()))

+    return 0
+

 if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))
--- a/tools/prof.py
+++ b/tools/prof.py
@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 import sys
+from typing import List


-def prof_main(argv):
-    import hotshot.stats
+def prof_main(argv: List[str]) -> int:
+    import hotshot.stats  # type: ignore[import]

-    def usage():
+    def usage() -> int:
        print('usage: %s module.function [args ...]' % argv[0])
        return 100
    args = argv[1:]
@ -15,19 +16,24 @@ def prof_main(argv):
    prof = name+'.prof'
    i = name.rindex('.')
    (modname, funcname) = (name[:i], name[i+1:])
-    module = __import__(modname, fromlist=1)
+
+    # Type error: fromlist expects sequence of strings; presumably the intent
+    # is to retrieve the named module rather than a top-level package (as in
+    # "when a non-empty fromlist argument is given...").
+    module = __import__(modname, fromlist=1)  # type: ignore[arg-type]
+
    func = getattr(module, funcname)
    if args:
        args.insert(0, argv[0])
-        prof = hotshot.Profile(prof)
-        prof.runcall(lambda: func(args))
-        prof.close()
+        profile = hotshot.Profile(prof)
+        profile.runcall(lambda: func(args))
+        profile.close()
    else:
        stats = hotshot.stats.load(prof)
        stats.strip_dirs()
        stats.sort_stats('time', 'calls')
        stats.print_stats(1000)
-    return
+    return 0


 if __name__ == '__main__':
--- a/tox.ini
+++ b/tox.ini
@ -9,6 +9,7 @@ whitelist_externals =
    flake8
 commands =
    flake8 pdfminer/ tools/ tests/ --count --statistics
+    mypy --install-types --non-interactive --show-error-codes .
    nosetests --nologcapture
    python -m sphinx -b html docs/source docs/build/html
    python -m sphinx -b doctest docs/source docs/build/doctest