Update development tools: travis ci to github actions, tox to nox, nose to pytest (#704)

* Replace tox with nox * Replace travis with github actions * Fix pytest, mypy and flake8 errors * Add pytest. * Run on all commits * Remove nose * Speedup slow tests to save GitHub actions minutes * Added line to CHANGELOG.md * Fix line too long in pdfdocument.py * Update .github/workflows/actions.yml Co-authored-by: Jake Stockwin <jstockwin@gmail.com> * Improve actions.yml * Fix error with nox name for mypy * Add names for jobs * Replace nose.raises with pytest.raises Co-authored-by: Jake Stockwin <jstockwin@gmail.com>
2022-02-02 22:24:32 +01:00 · 2022-02-02 22:24:32 +01:00 · b84cfc98e0
parent 1d1602e0c5
commit b84cfc98e0
27 changed files with 435 additions and 312 deletions
--- a/.github/workflows/actions.yml
+++ b/.github/workflows/actions.yml
@ -0,0 +1,94 @@
 name: Continuous integration
 on:
  push:
 env:
  default-python: "3.10"
 jobs:
  check-coding-style:
    name: Check coding style
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v2
      - name: Set up Python ${{ env.default-python }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ env.default-python }}
      - name: Upgrade pip, Install nox
        run: |
          python -m pip install --upgrade pip
          python -m pip install nox
      - name: Check coding style
        run: |
          nox --error-on-missing-interpreters --non-interactive --session lint
  check-static-types:
    name: Check static types
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v2
      - name: Set up Python ${{ env.default-python }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ env.default-python }}
      - name: Upgrade pip, Install nox
        run: |
          python -m pip install --upgrade pip
          python -m pip install nox
      - name: Check static types
        run: |
          nox --error-on-missing-interpreters --non-interactive --session types
  tests:
    name: Run tests
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ ubuntu-latest ]
        python-version: [ "3.6", "3.7", "3.8", "3.9", "3.10" ]
    steps:
      - name: Checkout code
        uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Determine pip cache directory
        id: pip-cache
        run: |
          echo "::set-output name=dir::$(pip cache dir)"
      - name: Cache pip cache
        uses: actions/cache@v2
        with:
          path: ${{ steps.pip-cache.outputs.dir }}
          key: ${{ runner.os }}-pip${{ matrix.python-version }}
      - name: Upgrade pip and install nox
        run: |
          python -m pip install --upgrade pip
          python -m pip install nox
      - name: Run tests
        run: |
          nox --non-interactive --session tests-${{ matrix.python-version }}
  build-docs:
    name: Test building docs
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v2
      - name: Set up Python ${{ env.default-python }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ env.default-python }}
      - name: Upgrade pip and install nox
        run: |
          python -m pip install --upgrade pip
          python -m pip install nox
      - name: Build docs
        run: |
          nox --error-on-missing-interpreters --non-interactive --session docs
--- a/.gitignore
+++ b/.gitignore
@ -17,6 +17,7 @@ tests/*.xml
 tests/*.txt
 .idea/
 .tox/
 .nox/
 # python venv management tools
 Pipfile
--- a/.travis.yml
+++ b/.travis.yml
@ -1,11 +0,0 @@
 dist: focal
 language: python
 python:
  - "3.6"
  - "3.7"
  - "3.8"
  - "3.9"
 install:
  - pip install tox tox-travis
 script:
  - tox -r
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -24,6 +24,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Removed
 - Unnecessary return statements without argument at the end of functions ([#707](https://github.com/pdfminer/pdfminer.six/pull/707))
 ### Changed
 - Switched from nose to pytest, from tox to nox and from Travis CI to GitHub Actions ([#704](https://github.com/pdfminer/pdfminer.six/pull/704))
 ## [20211012]
 ### Added
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -60,11 +60,11 @@ Any contribution is appreciated! You might want to:
    On all Python versions:
    ```sh
-    tox
+    nox
   ```
   Or on a single Python version:
   ```sh
-    tox -e py36
+    nox -e py36
    ```
--- a/3
+++ b/3
@ -53,6 +53,3 @@ $(CMAPDST)/to-unicode-Adobe-Japan1.pickle.gz: $(CMAPDST)
 $(CMAPDST)/to-unicode-Adobe-Korea1.pickle.gz: $(CMAPDST)
 	$(CONV_CMAP) -c KSC-EUC=euc-kr -c KSC-Johab=johab -c KSCms-UHC=cp949 -c UniKS-UTF8=utf-8 \
 		$(CMAPDST) Adobe-Korea1 $(CMAPSRC)/cid2code_Adobe_Korea1.txt
 test: cmap
 	nosetests
--- a/mypy.ini
+++ b/mypy.ini
@ -20,8 +20,11 @@ disallow_untyped_defs = True
 [mypy-cryptography.hazmat.*]
 ignore_missing_imports = True
-[mypy-nose.*]
+[mypy-pytest.*]
 ignore_missing_imports = True
 [mypy-setuptools]
 ignore_missing_imports = True
 [mypy-nox]
 ignore_missing_imports = True
--- a/noxfile.py
+++ b/noxfile.py
@ -0,0 +1,58 @@
 import nox
 PYTHON_ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"]
@nox.session
 def lint(session):
    session.install('flake8')
    session.run(
        'flake8',
        'pdfminer/',
        'tools/',
        'tests/',
        '--count',
        '--statistics'
    )
@nox.session
 def types(session):
    session.install('mypy')
    session.run(
        'mypy',
        '--install-types',
        '--non-interactive',
        '--show-error-codes',
        '.'
    )
@nox.session(python=PYTHON_ALL_VERSIONS)
 def tests(session):
    session.install("-e", ".[dev]")
    session.run('pytest')
@nox.session
 def docs(session):
    session.install("-e", ".[docs]")
    session.run(
        'python',
        '-m',
        'sphinx',
        '-b',
        'html',
        'docs/source',
        'docs/build/html'
    )
    session.run(
        'python',
        '-m',
        'sphinx',
        '-b',
        'doctest',
        'docs/source',
        'docs/build/doctest'
    )
--- a/pdfminer/data_structures.py
+++ b/pdfminer/data_structures.py
@ -1,5 +1,4 @@
-import functools
+from typing import Any, Iterable, List, Optional, Tuple
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 from pdfminer import settings
 from pdfminer.pdfparser import PDFSyntaxError
@ -26,21 +25,20 @@ class NumberTree:
            self.limits = list_value(self._obj['Limits'])
    def _parse(self) -> List[Tuple[int, Any]]:
-        l = []
+        items = []
        if self.nums:  # Leaf node
            for k, v in choplist(2, self.nums):
-                l.append((int_value(k), v))
+                items.append((int_value(k), v))
        if self.kids:  # Root or intermediate node
            for child_ref in self.kids:
-                l += NumberTree(child_ref)._parse()
+                items += NumberTree(child_ref)._parse()
-        return l
+        return items
    values: List[Tuple[int, Any]]  # workaround decorators unsupported by mypy
-    @property  # type: ignore [no-redef,misc]
+    @property  # type: ignore[no-redef,misc]
    @functools.lru_cache
    def values(self) -> List[Tuple[int, Any]]:
        values = self._parse()
--- a/pdfminer/image.py
+++ b/pdfminer/image.py
@ -2,7 +2,7 @@ import os
 import os.path
 import struct
 from io import BytesIO
-from typing import BinaryIO, Tuple
+from typing import BinaryIO, Tuple, List, Any
 from .jbig2 import JBIG2StreamReader, JBIG2StreamWriter
 from .layout import LTImage
@ -104,6 +104,7 @@ class ImageWriter:
            # seems to be easily opened by other programs
            from PIL import Image
            raw_data = image.stream.get_rawdata()
            assert raw_data is not None
            ifp = BytesIO(raw_data)
            i = Image.open(ifp)
            i.save(fp, 'JPEG2000')
@ -162,7 +163,7 @@ class ImageWriter:
        return is_jbig2
    @staticmethod
-    def jbig2_global(image):
+    def jbig2_global(image: LTImage) -> List[Any]:
        global_streams = []
        filters = image.stream.get_filters()
        for filter_name, params in filters:
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@ -13,9 +13,9 @@ from . import settings
 from .arcfour import Arcfour
 from .data_structures import NumberTree
 from .pdfparser import PDFSyntaxError, PDFParser, PDFStreamParser
-from .pdftypes import DecipherCallable, PDFException, PDFTypeError, PDFStream, \
+from .pdftypes import DecipherCallable, PDFException, PDFTypeError, \
-    PDFObjectNotFound, decipher_all, int_value, str_value, list_value, \
+    PDFStream, PDFObjectNotFound, decipher_all, int_value, str_value, \
-    uint_value, dict_value, stream_value
+    list_value, uint_value, dict_value, stream_value
 from .psparser import PSEOF, literal_name, LIT, KWD
 from .utils import choplist, decode_text, nunpack, format_int_roman, \
    format_int_alpha
@ -51,6 +51,10 @@ class PDFEncryptionError(PDFException):
    pass
 class PDFPasswordIncorrect(PDFEncryptionError):
    pass
 class PDFEncryptionWarning(UserWarning):
    """Legacy warning for failed decryption.
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@ -217,7 +217,7 @@ def stream_value(x: object) -> "PDFStream":
    return x
-def decompress_corrupted(data):
+def decompress_corrupted(data: bytes) -> bytes:
    """Called on some data that can't be properly decoded because of CRC checksum
    error. Attempt to decode it skipping the CRC.
    """
--- a/setup.py
+++ b/setup.py
@ -1,6 +1,10 @@
 import sys
 from pathlib import Path
 from setuptools import setup
 from os import path
 sys.path.append(str(Path(__file__).parent))
 import pdfminer as package
@ -17,7 +21,7 @@ setup(
        'cryptography',
    ],
    extras_require={
-        "dev": ["nose", "tox", "mypy == 0.910"],
+        "dev": ["pytest", "nox", "mypy == 0.931"],
        "docs": ["sphinx", "sphinx-argparse"],
    },
    description='PDF parser and analyzer',
--- a/tests/test_converter.py
+++ b/tests/test_converter.py
@ -1,28 +1,26 @@
 import io
 from tempfile import TemporaryFile
 from nose.tools import assert_equal, assert_false, assert_true
 from pdfminer.converter import PDFLayoutAnalyzer, PDFConverter
 from pdfminer.high_level import extract_pages
 from pdfminer.layout import LTContainer, LTRect, LTLine, LTCurve
 from pdfminer.pdfinterp import PDFGraphicState
-class TestPaintPath():
+class TestPaintPath:
    def test_paint_path(self):
        path = [('m', 6, 7), ('l', 7, 7)]
        analyzer = self._get_analyzer()
        analyzer.cur_item = LTContainer([0, 100, 0, 100])
        analyzer.paint_path(PDFGraphicState(), False, False, False, path)
-        assert_equal(len(analyzer.cur_item._objs), 1)
+        assert len(analyzer.cur_item._objs) == 1
    def test_paint_path_mlllh(self):
-        path = [('m', 6, 7), ('l', 7, 7), ('l', 7, 91),  ('l', 6, 91), ('h',)]
+        path = [('m', 6, 7), ('l', 7, 7), ('l', 7, 91), ('l', 6, 91), ('h',)]
        analyzer = self._get_analyzer()
        analyzer.cur_item = LTContainer([0, 100, 0, 100])
        analyzer.paint_path(PDFGraphicState(), False, False, False, path)
-        assert_equal(len(analyzer.cur_item), 1)
+        assert len(analyzer.cur_item) == 1
    def test_paint_path_multiple_mlllh(self):
        """Path from samples/contrib/issue-00369-excel.pdf"""
@ -34,7 +32,7 @@ class TestPaintPath():
        analyzer = self._get_analyzer()
        analyzer.cur_item = LTContainer([0, 100, 0, 100])
        analyzer.paint_path(PDFGraphicState(), False, False, False, path)
-        assert_equal(len(analyzer.cur_item._objs), 3)
+        assert len(analyzer.cur_item._objs) == 3
    def test_paint_path_quadrilaterals(self):
        """via https://github.com/pdfminer/pdfminer.six/issues/473"""
@ -49,98 +47,114 @@ class TestPaintPath():
            return list(map(type, parse(path)))
        # Standard rect
-        assert_equal(get_types([
+        assert get_types(
-            ("m", 10, 90),
+            [
-            ("l", 90, 90),
+                ("m", 10, 90),
-            ("l", 90, 10),
+                ("l", 90, 90),
-            ("l", 10, 10),
+                ("l", 90, 10),
-            ("h",),
+                ("l", 10, 10),
-        ]), [LTRect])
+                ("h",),
            ]
        ) == [LTRect]
        # Same but mllll variation
-        assert_equal(get_types([
+        assert get_types(
-            ("m", 10, 90),
+            [
-            ("l", 90, 90),
+                ("m", 10, 90),
-            ("l", 90, 10),
+                ("l", 90, 90),
-            ("l", 10, 10),
+                ("l", 90, 10),
-            ("l", 10, 90),
+                ("l", 10, 10),
-        ]), [LTRect])
+                ("l", 10, 90),
            ]
        ) == [LTRect]
        # Bowtie shape
-        assert_equal(get_types([
+        assert get_types(
-            ("m", 110, 90),
+            [
-            ("l", 190, 10),
+                ("m", 110, 90),
-            ("l", 190, 90),
+                ("l", 190, 10),
-            ("l", 110, 10),
+                ("l", 190, 90),
-            ("h",),
+                ("l", 110, 10),
-        ]), [LTCurve])
+                ("h",),
            ]
        ) == [LTCurve]
        # Quadrilateral with one slanted side
-        assert_equal(get_types([
+        assert get_types(
-            ("m", 210, 90),
+            [
-            ("l", 290, 60),
+                ("m", 210, 90),
-            ("l", 290, 10),
+                ("l", 290, 60),
-            ("l", 210, 10),
+                ("l", 290, 10),
-            ("h",),
+                ("l", 210, 10),
-        ]), [LTCurve])
+                ("h",),
            ]
        ) == [LTCurve]
        # Path with two rect subpaths
-        assert_equal(get_types([
+        assert get_types(
-            ("m", 310, 90),
+            [
-            ("l", 350, 90),
+                ("m", 310, 90),
-            ("l", 350, 10),
+                ("l", 350, 90),
-            ("l", 310, 10),
+                ("l", 350, 10),
-            ("h",),
+                ("l", 310, 10),
-            ("m", 350, 90),
+                ("h",),
-            ("l", 390, 90),
+                ("m", 350, 90),
-            ("l", 390, 10),
+                ("l", 390, 90),
-            ("l", 350, 10),
+                ("l", 390, 10),
-            ("h",),
+                ("l", 350, 10),
-        ]), [LTRect, LTRect])
+                ("h",),
            ]
        ) == [LTRect, LTRect]
        # Path with one rect subpath and one pentagon
-        assert_equal(get_types([
+        assert get_types(
-            ("m", 410, 90),
+            [
-            ("l", 445, 90),
+                ("m", 410, 90),
-            ("l", 445, 10),
+                ("l", 445, 90),
-            ("l", 410, 10),
+                ("l", 445, 10),
-            ("h",),
+                ("l", 410, 10),
-            ("m", 455, 70),
+                ("h",),
-            ("l", 475, 90),
+                ("m", 455, 70),
-            ("l", 490, 70),
+                ("l", 475, 90),
-            ("l", 490, 10),
+                ("l", 490, 70),
-            ("l", 455, 10),
+                ("l", 490, 10),
-            ("h",),
+                ("l", 455, 10),
-        ]), [LTRect, LTCurve])
+                ("h",),
            ]
        ) == [LTRect, LTCurve]
        # Three types of simple lines
-        assert_equal(get_types([
+        assert get_types(
-            # Vertical line
+            [
-            ("m", 10, 30),
+                # Vertical line
-            ("l", 10, 40),
+                ("m", 10, 30),
-            ("h",),
+                ("l", 10, 40),
-            # Horizontal line
+                ("h",),
-            ("m", 10, 50),
+                # Horizontal line
-            ("l", 70, 50),
+                ("m", 10, 50),
-            ("h",),
+                ("l", 70, 50),
-            # Diagonal line
+                ("h",),
-            ("m", 10, 10),
+                # Diagonal line
-            ("l", 30, 30),
+                ("m", 10, 10),
-            ("h",),
+                ("l", 30, 30),
-        ]), [LTLine, LTLine, LTLine])
+                ("h",),
            ]
        ) == [LTLine, LTLine, LTLine]
        # Same as above, but 'ml' variation
-        assert_equal(get_types([
+        assert get_types(
-            # Vertical line
+            [
-            ("m", 10, 30),
+                # Vertical line
-            ("l", 10, 40),
+                ("m", 10, 30),
-            # Horizontal line
+                ("l", 10, 40),
-            ("m", 10, 50),
+                # Horizontal line
-            ("l", 70, 50),
+                ("m", 10, 50),
-            # Diagonal line
+                ("l", 70, 50),
-            ("m", 10, 10),
+                # Diagonal line
-            ("l", 30, 30),
+                ("m", 10, 10),
-        ]), [LTLine, LTLine, LTLine])
+                ("l", 30, 30),
            ]
        ) == [LTLine, LTLine, LTLine]
        # There are six lines in this one-page PDF;
        # they all have shape 'ml' not 'mlh'
@ -192,21 +206,21 @@ class TestPaintPath():
 class TestBinaryDetector():
    def test_stringio(self):
-        assert_false(PDFConverter._is_binary_stream(io.StringIO()))
+        assert not PDFConverter._is_binary_stream(io.StringIO())
    def test_bytesio(self):
-        assert_true(PDFConverter._is_binary_stream(io.BytesIO()))
+        assert PDFConverter._is_binary_stream(io.BytesIO())
    def test_tmpfile(self):
        with TemporaryFile(mode='w') as f:
-            assert_false(PDFConverter._is_binary_stream(f))
+            assert not PDFConverter._is_binary_stream(f)
    def test_binary_tmpfile(self):
        with TemporaryFile(mode='wb') as f:
-            assert_true(PDFConverter._is_binary_stream(f))
+            assert PDFConverter._is_binary_stream(f)
    def test_non_file_like_object_defaults_to_binary(self):
-        assert_true(PDFConverter._is_binary_stream(object()))
+        assert PDFConverter._is_binary_stream(object())
    def test_textiowrapper(self):
-        assert_false(PDFConverter._is_binary_stream(io.TextIOBase()))
+        assert not PDFConverter._is_binary_stream(io.TextIOBase())
--- a/tests/test_encodingdb.py
+++ b/tests/test_encodingdb.py
@ -4,7 +4,7 @@ See: https://github.com/adobe-type-tools/agl-specification#2-the-mapping
 While not in the specification, lowercase unicode often occurs in pdf's.
 Therefore lowercase unittest variants are added.
 """
-from nose.tools import assert_raises
+import pytest
 from pdfminer.encodingdb import name2unicode, EncodingDB
 from pdfminer.psparser import PSLiteral
@ -59,7 +59,8 @@ def test_name2unicode_uni_empty_string_long():
    This character can be correctly mapped by using the
    glyph name "u1040C.
    """
-    assert_raises(KeyError, name2unicode, 'uniD801DC0C')
+    with pytest.raises(KeyError):
        name2unicode('uniD801DC0C')
 def test_name2unicode_uni_empty_string_long_lowercase():
@ -71,7 +72,8 @@ def test_name2unicode_uni_empty_string_long_lowercase():
    expressed as D801 DC0C in UTF-16, specifically U+1040C.
    This character can be correctly mapped by using the
    glyph name "u1040C."""
-    assert_raises(KeyError, name2unicode, 'uniD801DC0C')
+    with pytest.raises(KeyError):
        name2unicode('uniD801DC0C')
 def test_name2unicode_uni_pua():
@ -128,13 +130,15 @@ def test_name2unicode_foo():
    """The name 'foo' maps to an empty string,
    because 'foo' is not in AGL,
    and because it does not start with a 'u.'"""
-    assert_raises(KeyError, name2unicode, 'foo')
+    with pytest.raises(KeyError):
        name2unicode('foo')
 def test_name2unicode_notdef():
    """The name ".notdef" is reduced to an empty string (step 1)
    and mapped to an empty string (step 3)"""
-    assert_raises(KeyError, name2unicode, '.notdef')
+    with pytest.raises(KeyError):
        name2unicode('.notdef')
 def test_name2unicode_pua_ogoneksmall():
@ -145,7 +149,8 @@ def test_name2unicode_pua_ogoneksmall():
 def test_name2unicode_overflow_error():
-    assert_raises(KeyError, name2unicode, '226215240241240240240240')
+    with pytest.raises(KeyError):
        name2unicode('226215240241240240240240')
 def test_get_encoding_with_invalid_differences():
--- a/tests/test_pdfdocument.py
+++ b/tests/test_pdfdocument.py
@ -1,6 +1,6 @@
 import itertools
-from nose.tools import assert_equal, raises
+import pytest
 from helpers import absolute_sample_path
 from pdfminer.pdfdocument import PDFDocument, PDFNoPageLabels
@ -10,12 +10,12 @@ from pdfminer.pdftypes import PDFObjectNotFound, dict_value, int_value
 class TestPdfDocument(object):
    @raises(PDFObjectNotFound)
    def test_get_zero_objid_raises_pdfobjectnotfound(self):
        with open(absolute_sample_path('simple1.pdf'), 'rb') as in_file:
            parser = PDFParser(in_file)
            doc = PDFDocument(parser)
-            doc.getobj(0)
+            with pytest.raises(PDFObjectNotFound):
                doc.getobj(0)
    def test_encrypted_no_id(self):
        # Some documents may be encrypted but not have an /ID key in
@ -25,8 +25,7 @@ class TestPdfDocument(object):
        with open(path, 'rb') as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
-            assert_equal(doc.info,
+            assert doc.info == [{'Producer': b'European Patent Office'}]
                         [{'Producer': b'European Patent Office'}])
    def test_page_labels(self):
        path = absolute_sample_path('contrib/pagelabels.pdf')
@ -34,14 +33,14 @@ class TestPdfDocument(object):
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
            total_pages = int_value(dict_value(doc.catalog['Pages'])['Count'])
-            assert_equal(
+            assert list(itertools.islice(doc.get_page_labels(), total_pages)) \
-                list(itertools.islice(doc.get_page_labels(), total_pages)),
+                   == ['iii', 'iv', '1', '2', '1']
                ['iii', 'iv', '1', '2', '1'])
    @raises(PDFNoPageLabels)
    def test_no_page_labels(self):
        path = absolute_sample_path('simple1.pdf')
        with open(path, 'rb') as fp:
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
-            doc.get_page_labels()
+
            with pytest.raises(PDFNoPageLabels):
                doc.get_page_labels()
--- a/tests/test_pdfencoding.py
+++ b/tests/test_pdfencoding.py
@ -2,15 +2,13 @@
 # -*- coding: utf-8 -*-
 import nose
 from pdfminer.cmapdb import IdentityCMap, CMap, IdentityCMapByte
 from pdfminer.pdffont import PDFCIDFont
 from pdfminer.pdftypes import PDFStream
 from pdfminer.psparser import PSLiteral
-class TestPDFEncoding():
+class TestPDFEncoding:
    def test_cmapname_onebyteidentityV(self):
        stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityV')}, '')
@ -107,7 +105,3 @@ class TestPDFEncoding():
    def test_font_without_spec(self):
        font = PDFCIDFont(None, {})
        assert isinstance(font.cmap, CMap)
 if __name__ == '__main__':
    nose.runmodule()
--- a/tests/test_pdffont.py
+++ b/tests/test_pdffont.py
@ -1,5 +1,3 @@
 from nose.tools import assert_equal, assert_greater
 from pdfminer.pdffont import PDFCIDFont
 from pdfminer.pdfinterp import PDFResourceManager
 from pdfminer.psparser import PSLiteral
@ -17,5 +15,5 @@ def test_get_cmap_from_pickle():
    cmap = font.get_cmap_from_spec(spec, False)
-    assert_equal(cmap.attrs.get('CMapName'), cmap_name)
+    assert cmap.attrs.get('CMapName') == cmap_name
-    assert_greater(len(cmap.code2cid), 0)
+    assert len(cmap.code2cid) > 0
--- a/tests/test_pdfminer_ccitt.py
+++ b/tests/test_pdfminer_ccitt.py
@ -1,5 +1,3 @@
 from nose.tools import assert_equal
 from pdfminer.ccitt import CCITTG4Parser, CCITTFaxDecoder
@ -13,98 +11,98 @@ class TestCCITTG4Parser():
    def test_b1(self):
        parser = self.get_parser('00000')
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 0)
+        assert parser._curpos == 0
        return
    def test_b2(self):
        parser = self.get_parser('10000')
        parser._do_vertical(-1)
-        assert_equal(parser._curpos, 0)
+        assert parser._curpos == 0
        return
    def test_b3(self):
        parser = self.get_parser('000111')
        parser._do_pass()
-        assert_equal(parser._curpos, 3)
+        assert parser._curpos == 3
-        assert_equal(parser._get_bits(), '111')
+        assert parser._get_bits() == '111'
        return
    def test_b4(self):
        parser = self.get_parser('00000')
        parser._do_vertical(+2)
-        assert_equal(parser._curpos, 2)
+        assert parser._curpos == 2
-        assert_equal(parser._get_bits(), '11')
+        assert parser._get_bits() == '11'
        return
    def test_b5(self):
        parser = self.get_parser('11111111100')
        parser._do_horizontal(0, 3)
-        assert_equal(parser._curpos, 3)
+        assert parser._curpos == 3
        parser._do_vertical(1)
-        assert_equal(parser._curpos, 10)
+        assert parser._curpos == 10
-        assert_equal(parser._get_bits(), '0001111111')
+        assert parser._get_bits() == '0001111111'
        return
    def test_e1(self):
        parser = self.get_parser('10000')
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
+        assert parser._curpos == 1
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 5)
+        assert parser._curpos == 5
-        assert_equal(parser._get_bits(), '10000')
+        assert parser._get_bits() == '10000'
        return
    def test_e2(self):
        parser = self.get_parser('10011')
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
+        assert parser._curpos == 1
        parser._do_vertical(2)
-        assert_equal(parser._curpos, 5)
+        assert parser._curpos == 5
-        assert_equal(parser._get_bits(), '10000')
+        assert parser._get_bits() == '10000'
        return
    def test_e3(self):
        parser = self.get_parser('011111')
        parser._color = 0
        parser._do_vertical(0)
-        assert_equal(parser._color, 1)
+        assert parser._color == 1
-        assert_equal(parser._curpos, 1)
+        assert parser._curpos == 1
        parser._do_vertical(-2)
-        assert_equal(parser._color, 0)
+        assert parser._color == 0
-        assert_equal(parser._curpos, 4)
+        assert parser._curpos == 4
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 6)
+        assert parser._curpos == 6
-        assert_equal(parser._get_bits(), '011100')
+        assert parser._get_bits() == '011100'
        return
    def test_e4(self):
        parser = self.get_parser('10000')
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
+        assert parser._curpos == 1
        parser._do_vertical(-2)
-        assert_equal(parser._curpos, 3)
+        assert parser._curpos == 3
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 5)
+        assert parser._curpos == 5
-        assert_equal(parser._get_bits(), '10011')
+        assert parser._get_bits() == '10011'
        return
    def test_e5(self):
        parser = self.get_parser('011000')
        parser._color = 0
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
+        assert parser._curpos == 1
        parser._do_vertical(3)
-        assert_equal(parser._curpos, 6)
+        assert parser._curpos == 6
-        assert_equal(parser._get_bits(), '011111')
+        assert parser._get_bits() == '011111'
        return
    def test_e6(self):
        parser = self.get_parser('11001')
        parser._do_pass()
-        assert_equal(parser._curpos, 4)
+        assert parser._curpos == 4
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 5)
+        assert parser._curpos == 5
-        assert_equal(parser._get_bits(), '11111')
+        assert parser._get_bits() == '11111'
        return
    def test_e7(self):
@ -112,8 +110,8 @@ class TestCCITTG4Parser():
        parser._curpos = 2
        parser._color = 1
        parser._do_horizontal(2, 6)
-        assert_equal(parser._curpos, 10)
+        assert parser._curpos == 10
-        assert_equal(parser._get_bits(), '1111000000')
+        assert parser._get_bits() == '1111000000'
        return
    def test_e8(self):
@ -121,19 +119,19 @@ class TestCCITTG4Parser():
        parser._curpos = 1
        parser._color = 0
        parser._do_vertical(0)
-        assert_equal(parser._curpos, 2)
+        assert parser._curpos == 2
        parser._do_horizontal(7, 0)
-        assert_equal(parser._curpos, 9)
+        assert parser._curpos == 9
-        assert_equal(parser._get_bits(), '101111111')
+        assert parser._get_bits() == '101111111'
        return
    def test_m1(self):
        parser = self.get_parser('10101')
        parser._do_pass()
-        assert_equal(parser._curpos, 2)
+        assert parser._curpos == 2
        parser._do_pass()
-        assert_equal(parser._curpos, 4)
+        assert parser._curpos == 4
-        assert_equal(parser._get_bits(), '1111')
+        assert parser._get_bits() == '1111'
        return
    def test_m2(self):
@ -142,7 +140,7 @@ class TestCCITTG4Parser():
        parser._do_vertical(-1)
        parser._do_vertical(1)
        parser._do_horizontal(1, 1)
-        assert_equal(parser._get_bits(), '011101')
+        assert parser._get_bits() == '011101'
        return
    def test_m3(self):
@ -151,7 +149,7 @@ class TestCCITTG4Parser():
        parser._do_pass()
        parser._do_vertical(1)
        parser._do_vertical(1)
-        assert_equal(parser._get_bits(), '00000001')
+        assert parser._get_bits() == '00000001'
        return
@ -159,5 +157,5 @@ class TestCCITTFaxDecoder:
    def test_b1(self):
        decoder = CCITTFaxDecoder(5)
        decoder.output_line(0, b'0')
-        assert_equal(decoder.close(), b'\x80')
+        assert decoder.close() == b'\x80'
        return
--- a/tests/test_pdfminer_crypto.py
+++ b/tests/test_pdfminer_crypto.py
@ -1,7 +1,6 @@
 """Test of various compression/encoding modules (previously in doctests)
 """
 import binascii
 from nose.tools import assert_equal
 from pdfminer.arcfour import Arcfour
 from pdfminer.ascii85 import asciihexdecode, ascii85decode
@ -23,37 +22,32 @@ class TestAscii85():
    def test_ascii85decode(self):
        """The sample string is taken from:
        http://en.wikipedia.org/w/index.php?title=Ascii85"""
-        assert_equal(ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q'),
+        assert ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q') \
-                     b'Man is distinguished')
+               == b'Man is distinguished'
-        assert_equal(ascii85decode(b'E,9)oF*2M7/c~>'),
+        assert ascii85decode(b'E,9)oF*2M7/c~>') == b'pleasure.'
                     b'pleasure.')
    def test_asciihexdecode(self):
-        assert_equal(asciihexdecode(b'61 62 2e6364   65'),
+        assert asciihexdecode(b'61 62 2e6364   65') == b'ab.cde'
-                     b'ab.cde')
+        assert asciihexdecode(b'61 62 2e6364   657>') == b'ab.cdep'
-        assert_equal(asciihexdecode(b'61 62 2e6364   657>'),
+        assert asciihexdecode(b'7>') == b'p'
                     b'ab.cdep')
        assert_equal(asciihexdecode(b'7>'),
                     b'p')
 class TestArcfour():
    def test(self):
-        assert_equal(hex(Arcfour(b'Key').process(b'Plaintext')),
+        assert hex(Arcfour(b'Key').process(b'Plaintext')) \
-                     b'bbf316e8d940af0ad3')
+               == b'bbf316e8d940af0ad3'
-        assert_equal(hex(Arcfour(b'Wiki').process(b'pedia')),
+        assert hex(Arcfour(b'Wiki').process(b'pedia')) == b'1021bf0420'
-                     b'1021bf0420')
+        assert hex(Arcfour(b'Secret').process(b'Attack at dawn')) \
-        assert_equal(hex(Arcfour(b'Secret').process(b'Attack at dawn')),
+               == b'45a01f645fc35b383552544b9bf5'
                     b'45a01f645fc35b383552544b9bf5')
 class TestLzw():
    def test_lzwdecode(self):
-        assert_equal(lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01'),
+        assert lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01') \
-                     b'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42')
+               == b'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
 class TestRunlength():
    def test_rldecode(self):
-        assert_equal(rldecode(b'\x05123456\xfa7\x04abcde\x80junk'),
+        assert rldecode(b'\x05123456\xfa7\x04abcde\x80junk') \
-                     b'1234567777777abcde')
+               == b'1234567777777abcde'
--- a/tests/test_pdfminer_psparser.py
+++ b/tests/test_pdfminer_psparser.py
@ -1,7 +1,5 @@
 import logging
 from nose.tools import assert_equal
 from pdfminer.psparser import KWD, LIT, PSBaseParser, PSStackParser, PSEOF
 logger = logging.getLogger(__name__)
@ -92,11 +90,11 @@ func/a/b{(c)do*}def
    def test_1(self):
        tokens = self.get_tokens(self.TESTDATA)
        logger.info(tokens)
-        assert_equal(tokens, self.TOKENS)
+        assert tokens == self.TOKENS
        return
    def test_2(self):
        objs = self.get_objects(self.TESTDATA)
        logger.info(objs)
-        assert_equal(objs, self.OBJS)
+        assert objs == self.OBJS
        return
--- a/tests/test_pdfpage.py
+++ b/tests/test_pdfpage.py
@ -1,9 +1,7 @@
 from nose.tools import assert_equal
 from helpers import absolute_sample_path
 from pdfminer.pdfdocument import PDFDocument
 from pdfminer.pdfparser import PDFParser
 from pdfminer.pdfpage import PDFPage
 from pdfminer.pdfparser import PDFParser
 class TestPdfPage(object):
@ -15,4 +13,4 @@ class TestPdfPage(object):
            parser = PDFParser(fp)
            doc = PDFDocument(parser)
            for (i, page) in enumerate(PDFPage.create_pages(doc)):
-                assert_equal(page.label, expected_labels[i])
+                assert page.label == expected_labels[i]
--- a/tests/test_tools_dumppdf.py
+++ b/tests/test_tools_dumppdf.py
@ -1,6 +1,7 @@
 import unittest
-import logging
+
-from nose.tools import raises
+import pytest
 from helpers import absolute_sample_path
 from tempfilepath import TemporaryFilePath
 from tools import dumppdf
@ -46,12 +47,12 @@ class TestDumpPDF(unittest.TestCase):
    def test_6(self):
        run('nonfree/naacl06-shinyama.pdf', '-t -a')
    @raises(TypeError)
    def test_simple1_raw(self):
        """Known issue: crash in dumpxml writing binary to text stream."""
-        run('simple1.pdf', '-r -a')
+        with pytest.raises(TypeError):
            run('simple1.pdf', '-r -a')
    @raises(TypeError)
    def test_simple1_binary(self):
        """Known issue: crash in dumpxml writing binary to text stream."""
-        run('simple1.pdf', '-b -a')
+        with pytest.raises(TypeError):
            run('simple1.pdf', '-b -a')
--- a/tests/test_tools_pdf2txt.py
+++ b/tests/test_tools_pdf2txt.py
@ -46,10 +46,10 @@ class TestPdf2Txt():
        run('nonfree/dmca.pdf')
    def test_nonfree_f1040nr(self):
-        run('nonfree/f1040nr.pdf')
+        run('nonfree/f1040nr.pdf', '-p 1')
    def test_nonfree_i1040nr(self):
-        run('nonfree/i1040nr.pdf')
+        run('nonfree/i1040nr.pdf', '-p 1')
    def test_nonfree_kampo(self):
        run('nonfree/kampo.pdf')
@ -58,7 +58,7 @@ class TestPdf2Txt():
        run('nonfree/naacl06-shinyama.pdf')
    def test_nlp2004slides(self):
-        run('nonfree/nlp2004slides.pdf')
+        run('nonfree/nlp2004slides.pdf', '-p 1')
    def test_contrib_2b(self):
        run('contrib/2b.pdf', '-A -t xml')
@ -116,11 +116,11 @@ class TestPdf2Txt():
 class TestDumpImages:
    @staticmethod
-    def extract_images(input_file):
+    def extract_images(input_file, *args):
        output_dir = mkdtemp()
        with TemporaryFilePath() as output_file_name:
            commands = ['-o', output_file_name, '--output-dir',
-                        output_dir, input_file]
+                        output_dir, input_file, *args]
            pdf2txt.main(commands)
        image_files = os.listdir(output_dir)
        rmtree(output_dir)
@ -132,8 +132,8 @@ class TestDumpImages:
        Regression test for:
        https://github.com/pdfminer/pdfminer.six/issues/131
        """
-        image_files = self.extract_images(
+        filepath = absolute_sample_path('../samples/nonfree/dmca.pdf')
-            absolute_sample_path('../samples/nonfree/dmca.pdf'))
+        image_files = self.extract_images(filepath, '-p', '1')
        assert image_files[0].endswith('bmp')
    def test_nonfree_175(self):
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -1,61 +1,63 @@
 from nose.tools import assert_equal, assert_raises
 import pathlib
 import pytest
 from helpers import absolute_sample_path
 from pdfminer.layout import LTComponent
-from pdfminer.utils import (format_int_alpha, format_int_roman, open_filename,
+from pdfminer.utils import open_filename, Plane, shorten_str, \
-                            Plane, shorten_str)
+    format_int_roman, format_int_alpha
 class TestOpenFilename:
    def test_string_input(self):
        filename = absolute_sample_path("simple1.pdf")
        opened = open_filename(filename)
-        assert_equal(opened.closing, True)
+        assert opened.closing
    def test_pathlib_input(self):
        filename = pathlib.Path(absolute_sample_path("simple1.pdf"))
        opened = open_filename(filename)
-        assert_equal(opened.closing, True)
+        assert opened.closing
    def test_file_input(self):
        filename = absolute_sample_path("simple1.pdf")
        with open(filename, "rb") as in_file:
            opened = open_filename(in_file)
-            assert_equal(opened.file_handler, in_file)
+            assert opened.file_handler == in_file
    def test_unsupported_input(self):
-        assert_raises(TypeError, open_filename, 0)
+        with pytest.raises(TypeError):
            open_filename(0)
 class TestPlane:
    def test_find_nothing_in_empty_bbox(self):
        plane, _ = self.given_plane_with_one_object()
        result = list(plane.find((50, 50, 100, 100)))
-        assert_equal(result, [])
+        assert result == []
    def test_find_nothing_after_removing(self):
        plane, obj = self.given_plane_with_one_object()
        plane.remove(obj)
        result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [])
+        assert result == []
    def test_find_object_in_whole_plane(self):
        plane, obj = self.given_plane_with_one_object()
        result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [obj])
+        assert result == [obj]
    def test_find_if_object_is_smaller_than_gridsize(self):
        plane, obj = self.given_plane_with_one_object(object_size=1,
                                                      gridsize=100)
        result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [obj])
+        assert result == [obj]
    def test_find_object_if_much_larger_than_gridsize(self):
        plane, obj = self.given_plane_with_one_object(object_size=100,
                                                      gridsize=10)
        result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [obj])
+        assert result == [obj]
    @staticmethod
    def given_plane_with_one_object(object_size=50, gridsize=50):
@ -69,42 +71,42 @@ class TestPlane:
 class TestFunctions(object):
    def test_shorten_str(self):
        s = shorten_str('Hello there World', 15)
-        assert_equal(s, 'Hello ... World')
+        assert s == 'Hello ... World'
    def test_shorten_short_str_is_same(self):
        s = 'Hello World'
-        assert_equal(s, shorten_str(s, 50))
+        assert shorten_str(s, 50) == s
    def test_shorten_to_really_short(self):
-        assert_equal('Hello', shorten_str('Hello World', 5))
+        assert shorten_str('Hello World', 5) == 'Hello'
    def test_format_int_alpha(self):
-        assert_equal('a', format_int_alpha(1))
+        assert format_int_alpha(1) == 'a'
-        assert_equal('b', format_int_alpha(2))
+        assert format_int_alpha(2) == 'b'
-        assert_equal('z', format_int_alpha(26))
+        assert format_int_alpha(26) == 'z'
-        assert_equal('aa', format_int_alpha(27))
+        assert format_int_alpha(27) == 'aa'
-        assert_equal('ab', format_int_alpha(28))
+        assert format_int_alpha(28) == 'ab'
-        assert_equal('az', format_int_alpha(26*2))
+        assert format_int_alpha(26 * 2) == 'az'
-        assert_equal('ba', format_int_alpha(26*2 + 1))
+        assert format_int_alpha(26 * 2 + 1) == 'ba'
-        assert_equal('zz', format_int_alpha(26*27))
+        assert format_int_alpha(26 * 27) == 'zz'
-        assert_equal('aaa', format_int_alpha(26*27 + 1))
+        assert format_int_alpha(26 * 27 + 1) == 'aaa'
    def test_format_int_roman(self):
-        assert_equal('i', format_int_roman(1))
+        assert format_int_roman(1) == 'i'
-        assert_equal('ii', format_int_roman(2))
+        assert format_int_roman(2) == 'ii'
-        assert_equal('iii', format_int_roman(3))
+        assert format_int_roman(3) == 'iii'
-        assert_equal('iv', format_int_roman(4))
+        assert format_int_roman(4) == 'iv'
-        assert_equal('v', format_int_roman(5))
+        assert format_int_roman(5) == 'v'
-        assert_equal('vi', format_int_roman(6))
+        assert format_int_roman(6) == 'vi'
-        assert_equal('vii', format_int_roman(7))
+        assert format_int_roman(7) == 'vii'
-        assert_equal('viii', format_int_roman(8))
+        assert format_int_roman(8) == 'viii'
-        assert_equal('ix', format_int_roman(9))
+        assert format_int_roman(9) == 'ix'
-        assert_equal('x', format_int_roman(10))
+        assert format_int_roman(10) == 'x'
-        assert_equal('xi', format_int_roman(11))
+        assert format_int_roman(11) == 'xi'
-        assert_equal('xx', format_int_roman(20))
+        assert format_int_roman(20) == 'xx'
-        assert_equal('xl', format_int_roman(40))
+        assert format_int_roman(40) == 'xl'
-        assert_equal('xlv', format_int_roman(45))
+        assert format_int_roman(45) == 'xlv'
-        assert_equal('l', format_int_roman(50))
+        assert format_int_roman(50) == 'l'
-        assert_equal('xc', format_int_roman(90))
+        assert format_int_roman(90) == 'xc'
-        assert_equal('xci', format_int_roman(91))
+        assert format_int_roman(91) == 'xci'
-        assert_equal('c', format_int_roman(100))
+        assert format_int_roman(100) == 'c'
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@ -202,7 +202,10 @@ def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
        parsed_args.page_numbers = {x-1 for x in parsed_args.page_numbers}
    if parsed_args.pagenos:
-        parsed_args.page_numbers = {int(x)-1 for x in parsed_args.pagenos.split(",")}
+        parsed_args.page_numbers = {
            int(x) - 1
            for x in parsed_args.pagenos.split(",")
        }
    if parsed_args.output_type == "text" and parsed_args.outfile != "-":
        for override, alttype in OUTPUT_TYPES:
--- a/tox.ini
+++ b/tox.ini
@ -1,33 +0,0 @@
 [tox]
 envlist = py{36,37,38,39}-{nose,flake8,mypy,docs}
 [testenv:py{36,37,38,39}-nose]
 deps =
    nose
 allowlist_externals =
    nosetests
 commands =
    nosetests --nologcapture
 [testenv:py{36,37,38,39}-flake8]
 deps =
    flake8
 allowlist_externals =
    flake8
 commands =
    flake8 pdfminer/ tools/ tests/ --count --statistics
 [testenv:py{36,37,38,39}-mypy]
 deps =
    mypy
 allowlist_externals =
    mypy
 commands =
    mypy --install-types --non-interactive --show-error-codes .
 [testenv:py{36,37,38,39}-docs]
 extras =
    docs
 commands =
    python -m sphinx -b html docs/source docs/build/html
    python -m sphinx -b doctest docs/source docs/build/doctest