2021-09-06 20:00:23 +00:00
|
|
|
# Copyright 2016-present MongoDB, Inc.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
# Some changes copyright 2021-present Matthias Valvekens,
|
|
|
|
# licensed under the license of the pyHanko project (see LICENSE file).
|
|
|
|
|
|
|
|
|
|
|
|
"""An implementation of RFC4013 SASLprep."""
|
|
|
|
|
|
|
|
__all__ = ['saslprep']
|
|
|
|
|
|
|
|
import stringprep
|
Add type annotations (#661)
Squashed commit of the following:
commit fa229f7b7591c07aea4e5a4545f9e0c34246e1cd
Merge: eaab3c6 c3e3499
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 20:33:06 2021 -0700
Merge branch 'develop' into mypy (and fixed types)
commit eaab3c65e2e3ab5f1f400cfc5186a3834c4ffe34
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 20:00:45 2021 -0700
reformat all multi-line function defs to one-arg-per-line
commit 3fe2b69eed9197009d9da6776462f580ebf0dfa3
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:58:48 2021 -0700
ccitt nit -- avoid casting needlessly
commit 15983d8c1e7162632fde43752c9d1c15938cd980
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:58:36 2021 -0700
tweak CHANGELOG
commit 13dc0babf782938e7d5b5e482d4c5adf92d82702
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:43:46 2021 -0700
add failing tests for dumppdf crash
commit 6b509c517876b8c15ac5a98a963884e23bd2e4d8
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:24:23 2021 -0700
ccitt: apply misc PR feedback
commit feb031ba86d3f22e41cfbbda13f17c039359f1e6
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:18:26 2021 -0700
add missing None return type to all __init__ methods
commit c0d62d6c54c7ec37b40bea54a3f6a7a618ec0ec6
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:13:08 2021 -0700
minor cleanup, remove a few more Any types
commit b52a0594e1998a492c172538a9b35491c5fc5f52
Author: Andrew Baumann <ab@ab.id.au>
Date: Sun Sep 5 22:37:28 2021 -0700
tighten up types, avoid Any in favour of explicit casts
commit e58fd48bd14f31bebd2de8259f12630ac02756d6
Author: Andrew Baumann <ab@ab.id.au>
Date: Sun Sep 5 14:10:49 2021 -0700
annotate ccitt.py, and fix one definite bug (array.tostring was renamed tobytes)
commit 605290633e55595e5e0045840df5c5b1d9de843a
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:37:38 2021 -0700
python 3.7 back-compat
commit 4dbcf8760f8a1d3e3d99f085476f86e6a043c80c
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:32:43 2021 -0700
annotate pdfminer.jbig2
commit 0d40b7c03a8028dc44acd3f457eac71abd681827
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:31:33 2021 -0700
annotate pdf2txt.py
commit 5f82eb4f5646b5d1285252689191e0a14557ec7b
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 09:16:31 2021 -0700
cleanup: make Plane generic
commit 624fc92b88473ff36a174760883f34c22109da2b
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 23:16:51 2021 -0700
bluntly ignore calls to cryptography.hazmat
commit 96b20439c169f40dbb114cabba6a582ad1ebe91e
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 23:01:06 2021 -0700
finish annotating, and disallow_untyped_defs for pdfminer.* _except_ ccitt and jbig2
commit 0ab586347861b72b1d16880dc9293f9ad597e20a
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 21:51:56 2021 -0700
annotate pdffont
commit 4b689f1bcbdaf654feb9de81023e318ca310a12e
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 18:30:02 2021 -0700
annotate a couple more scripts; document sketchy code
commit 291981ff3d273952ec9c92ef8ab948473558b787
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 15:02:01 2021 -0700
pacify flake8
commit 45d2ce91ff333f3b7e34322b16e9c52b99b7a972
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 14:31:48 2021 -0700
annotate dumppdf, and comment likely bugs
commit 7278d83851cb336a1be3803a0993b5ec0ad39b4c
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:49:58 2021 -0700
enable mypy on tests and tools, fix one implicit reexport bug
commit 4a83166ef4e4733cd2113f43188b585a4fda392b
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:25:59 2021 -0700
pdfdocument: per dumppdf.py, get_dest accepts either bytes or str
commit 43701e1bee068df98f378a253c9c2150ee4ad9f7
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:25:00 2021 -0700
layout: LAParams.boxes_flow may be None
commit 164f81652f1788e74837466f0ab593e94079bc0f
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:45:09 2021 -0700
add whitespace, pacify flake8
commit 893b9fb9ec918032b36a30456fc0b7a217da86d8
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:40:33 2021 -0700
support old Python without typing.Protocol
commit dc245084102b7b04c3f5599d75b5d62ba4290787
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:12:03 2021 -0700
Move "# type: ignore" comments to fix mypy on Python < 3.8
The placement of these comments got more flexible in 3.8 due to
https://github.com/python/mypy/issues/1032
Satisfying older Python and fitting in flake8's 79-character line
limit was quite a challenge!
commit da03afe7bd2cf3336e611f467f1c901455940ae8
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 22:59:58 2021 -0700
fix text output from HTMLConverter
commit 5401276a2ed3b74a385ebcab5152485224146161
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 22:40:22 2021 -0700
annotate high_level.py and the immediately-reachable internal APIs (mostly converters)
commit cc490513f8f17a7adc0bcbab2e0e86f37e832300
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 17:04:35 2021 -0700
* expand and improve annotations in cmap, encryption/decompression and fonts
* disallow untyped calls; this way, we have a core set of
typed code that can grow over time
(just not for ccitt, because there's a ton of work lurking there)
* expand "typing: none" comments to suppress a specific error code
commit 92df54ba1d53d5dbbd5442757dd85be5b1851f99
Author: Andrew Baumann <ab@ab.id.au>
Date: Wed Sep 1 20:50:59 2021 -0700
update CHANGELOG
commit f72aaead45d0615e472a9b3190c9551a6b67b36e
Merge: ff787a9 8ea9f10
Author: Andrew Baumann <ab@ab.id.au>
Date: Wed Sep 1 20:47:03 2021 -0700
Merge branch 'develop' into mypy
commit ff787a93986c60361536a97182a41774f4a53ac3
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Aug 21 21:46:14 2021 -0700
be more precise about types on ps/pdf stacks, remove most of the Any annotations
commit be1550189e10717f6827dbb7009d6e8c8b3f4c62
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Aug 21 10:13:58 2021 -0700
silence missing imports, (maybe?) hook to tox
commit ff4b6a9bd46b352583d823d39065652c9a6f05f4
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 22:49:06 2021 -0700
turn on more strict checks, and untangle the layout mess with generics
Status:
$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/pdfdevice.py:191: error: Argument 1 to "write" of "IO" has incompatible type "str"; expected "bytes"
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
Found 5 errors in 4 files (checked 27 source files)
pdfdevice.py:191 appears to be a real bug
commit 5c9c0b19d26ae391aea0e69c2c819261cc04460c
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 17:22:41 2021 -0700
finish annotating layout
commit 0e6871c16abb29df2868ab145b4ce451b4b6c777
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 16:54:46 2021 -0700
general progress on annotations
* finish utils
* annotate more of pdfinterp, pdfdevice
* document reason for # type: ignore comments
* fix cyclic imports
* satisfy flake8
commit 17d59f42917fbf9b2b2eb844d3e83a8f2a3f123a
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Aug 19 21:38:50 2021 -0700
WIP on type annotations
With the possible exception of psparser.py, this is far from complete.
$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
2021-10-09 14:23:28 +00:00
|
|
|
from typing import Callable, Tuple
|
2021-09-06 20:00:23 +00:00
|
|
|
import unicodedata
|
|
|
|
|
|
|
|
# RFC4013 section 2.3 prohibited output.
|
Add type annotations (#661)
Squashed commit of the following:
commit fa229f7b7591c07aea4e5a4545f9e0c34246e1cd
Merge: eaab3c6 c3e3499
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 20:33:06 2021 -0700
Merge branch 'develop' into mypy (and fixed types)
commit eaab3c65e2e3ab5f1f400cfc5186a3834c4ffe34
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 20:00:45 2021 -0700
reformat all multi-line function defs to one-arg-per-line
commit 3fe2b69eed9197009d9da6776462f580ebf0dfa3
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:58:48 2021 -0700
ccitt nit -- avoid casting needlessly
commit 15983d8c1e7162632fde43752c9d1c15938cd980
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:58:36 2021 -0700
tweak CHANGELOG
commit 13dc0babf782938e7d5b5e482d4c5adf92d82702
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:43:46 2021 -0700
add failing tests for dumppdf crash
commit 6b509c517876b8c15ac5a98a963884e23bd2e4d8
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:24:23 2021 -0700
ccitt: apply misc PR feedback
commit feb031ba86d3f22e41cfbbda13f17c039359f1e6
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:18:26 2021 -0700
add missing None return type to all __init__ methods
commit c0d62d6c54c7ec37b40bea54a3f6a7a618ec0ec6
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:13:08 2021 -0700
minor cleanup, remove a few more Any types
commit b52a0594e1998a492c172538a9b35491c5fc5f52
Author: Andrew Baumann <ab@ab.id.au>
Date: Sun Sep 5 22:37:28 2021 -0700
tighten up types, avoid Any in favour of explicit casts
commit e58fd48bd14f31bebd2de8259f12630ac02756d6
Author: Andrew Baumann <ab@ab.id.au>
Date: Sun Sep 5 14:10:49 2021 -0700
annotate ccitt.py, and fix one definite bug (array.tostring was renamed tobytes)
commit 605290633e55595e5e0045840df5c5b1d9de843a
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:37:38 2021 -0700
python 3.7 back-compat
commit 4dbcf8760f8a1d3e3d99f085476f86e6a043c80c
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:32:43 2021 -0700
annotate pdfminer.jbig2
commit 0d40b7c03a8028dc44acd3f457eac71abd681827
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:31:33 2021 -0700
annotate pdf2txt.py
commit 5f82eb4f5646b5d1285252689191e0a14557ec7b
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 09:16:31 2021 -0700
cleanup: make Plane generic
commit 624fc92b88473ff36a174760883f34c22109da2b
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 23:16:51 2021 -0700
bluntly ignore calls to cryptography.hazmat
commit 96b20439c169f40dbb114cabba6a582ad1ebe91e
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 23:01:06 2021 -0700
finish annotating, and disallow_untyped_defs for pdfminer.* _except_ ccitt and jbig2
commit 0ab586347861b72b1d16880dc9293f9ad597e20a
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 21:51:56 2021 -0700
annotate pdffont
commit 4b689f1bcbdaf654feb9de81023e318ca310a12e
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 18:30:02 2021 -0700
annotate a couple more scripts; document sketchy code
commit 291981ff3d273952ec9c92ef8ab948473558b787
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 15:02:01 2021 -0700
pacify flake8
commit 45d2ce91ff333f3b7e34322b16e9c52b99b7a972
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 14:31:48 2021 -0700
annotate dumppdf, and comment likely bugs
commit 7278d83851cb336a1be3803a0993b5ec0ad39b4c
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:49:58 2021 -0700
enable mypy on tests and tools, fix one implicit reexport bug
commit 4a83166ef4e4733cd2113f43188b585a4fda392b
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:25:59 2021 -0700
pdfdocument: per dumppdf.py, get_dest accepts either bytes or str
commit 43701e1bee068df98f378a253c9c2150ee4ad9f7
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:25:00 2021 -0700
layout: LAParams.boxes_flow may be None
commit 164f81652f1788e74837466f0ab593e94079bc0f
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:45:09 2021 -0700
add whitespace, pacify flake8
commit 893b9fb9ec918032b36a30456fc0b7a217da86d8
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:40:33 2021 -0700
support old Python without typing.Protocol
commit dc245084102b7b04c3f5599d75b5d62ba4290787
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:12:03 2021 -0700
Move "# type: ignore" comments to fix mypy on Python < 3.8
The placement of these comments got more flexible in 3.8 due to
https://github.com/python/mypy/issues/1032
Satisfying older Python and fitting in flake8's 79-character line
limit was quite a challenge!
commit da03afe7bd2cf3336e611f467f1c901455940ae8
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 22:59:58 2021 -0700
fix text output from HTMLConverter
commit 5401276a2ed3b74a385ebcab5152485224146161
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 22:40:22 2021 -0700
annotate high_level.py and the immediately-reachable internal APIs (mostly converters)
commit cc490513f8f17a7adc0bcbab2e0e86f37e832300
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 17:04:35 2021 -0700
* expand and improve annotations in cmap, encryption/decompression and fonts
* disallow untyped calls; this way, we have a core set of
typed code that can grow over time
(just not for ccitt, because there's a ton of work lurking there)
* expand "typing: none" comments to suppress a specific error code
commit 92df54ba1d53d5dbbd5442757dd85be5b1851f99
Author: Andrew Baumann <ab@ab.id.au>
Date: Wed Sep 1 20:50:59 2021 -0700
update CHANGELOG
commit f72aaead45d0615e472a9b3190c9551a6b67b36e
Merge: ff787a9 8ea9f10
Author: Andrew Baumann <ab@ab.id.au>
Date: Wed Sep 1 20:47:03 2021 -0700
Merge branch 'develop' into mypy
commit ff787a93986c60361536a97182a41774f4a53ac3
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Aug 21 21:46:14 2021 -0700
be more precise about types on ps/pdf stacks, remove most of the Any annotations
commit be1550189e10717f6827dbb7009d6e8c8b3f4c62
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Aug 21 10:13:58 2021 -0700
silence missing imports, (maybe?) hook to tox
commit ff4b6a9bd46b352583d823d39065652c9a6f05f4
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 22:49:06 2021 -0700
turn on more strict checks, and untangle the layout mess with generics
Status:
$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/pdfdevice.py:191: error: Argument 1 to "write" of "IO" has incompatible type "str"; expected "bytes"
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
Found 5 errors in 4 files (checked 27 source files)
pdfdevice.py:191 appears to be a real bug
commit 5c9c0b19d26ae391aea0e69c2c819261cc04460c
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 17:22:41 2021 -0700
finish annotating layout
commit 0e6871c16abb29df2868ab145b4ce451b4b6c777
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 16:54:46 2021 -0700
general progress on annotations
* finish utils
* annotate more of pdfinterp, pdfdevice
* document reason for # type: ignore comments
* fix cyclic imports
* satisfy flake8
commit 17d59f42917fbf9b2b2eb844d3e83a8f2a3f123a
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Aug 19 21:38:50 2021 -0700
WIP on type annotations
With the possible exception of psparser.py, this is far from complete.
$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
2021-10-09 14:23:28 +00:00
|
|
|
_PROHIBITED: Tuple[Callable[[str], bool], ...] = (
|
2021-09-06 20:00:23 +00:00
|
|
|
# A strict reading of RFC 4013 requires table c12 here, but
|
|
|
|
# characters from it are mapped to SPACE in the Map step. Can
|
|
|
|
# normalization reintroduce them somehow?
|
|
|
|
stringprep.in_table_c12,
|
|
|
|
stringprep.in_table_c21_c22,
|
|
|
|
stringprep.in_table_c3,
|
|
|
|
stringprep.in_table_c4,
|
|
|
|
stringprep.in_table_c5,
|
|
|
|
stringprep.in_table_c6,
|
|
|
|
stringprep.in_table_c7,
|
|
|
|
stringprep.in_table_c8,
|
|
|
|
stringprep.in_table_c9)
|
|
|
|
|
|
|
|
|
Add type annotations (#661)
Squashed commit of the following:
commit fa229f7b7591c07aea4e5a4545f9e0c34246e1cd
Merge: eaab3c6 c3e3499
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 20:33:06 2021 -0700
Merge branch 'develop' into mypy (and fixed types)
commit eaab3c65e2e3ab5f1f400cfc5186a3834c4ffe34
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 20:00:45 2021 -0700
reformat all multi-line function defs to one-arg-per-line
commit 3fe2b69eed9197009d9da6776462f580ebf0dfa3
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:58:48 2021 -0700
ccitt nit -- avoid casting needlessly
commit 15983d8c1e7162632fde43752c9d1c15938cd980
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:58:36 2021 -0700
tweak CHANGELOG
commit 13dc0babf782938e7d5b5e482d4c5adf92d82702
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:43:46 2021 -0700
add failing tests for dumppdf crash
commit 6b509c517876b8c15ac5a98a963884e23bd2e4d8
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:24:23 2021 -0700
ccitt: apply misc PR feedback
commit feb031ba86d3f22e41cfbbda13f17c039359f1e6
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:18:26 2021 -0700
add missing None return type to all __init__ methods
commit c0d62d6c54c7ec37b40bea54a3f6a7a618ec0ec6
Author: Andrew Baumann <ab@ab.id.au>
Date: Mon Sep 6 15:13:08 2021 -0700
minor cleanup, remove a few more Any types
commit b52a0594e1998a492c172538a9b35491c5fc5f52
Author: Andrew Baumann <ab@ab.id.au>
Date: Sun Sep 5 22:37:28 2021 -0700
tighten up types, avoid Any in favour of explicit casts
commit e58fd48bd14f31bebd2de8259f12630ac02756d6
Author: Andrew Baumann <ab@ab.id.au>
Date: Sun Sep 5 14:10:49 2021 -0700
annotate ccitt.py, and fix one definite bug (array.tostring was renamed tobytes)
commit 605290633e55595e5e0045840df5c5b1d9de843a
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:37:38 2021 -0700
python 3.7 back-compat
commit 4dbcf8760f8a1d3e3d99f085476f86e6a043c80c
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:32:43 2021 -0700
annotate pdfminer.jbig2
commit 0d40b7c03a8028dc44acd3f457eac71abd681827
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 22:31:33 2021 -0700
annotate pdf2txt.py
commit 5f82eb4f5646b5d1285252689191e0a14557ec7b
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Sep 4 09:16:31 2021 -0700
cleanup: make Plane generic
commit 624fc92b88473ff36a174760883f34c22109da2b
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 23:16:51 2021 -0700
bluntly ignore calls to cryptography.hazmat
commit 96b20439c169f40dbb114cabba6a582ad1ebe91e
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 23:01:06 2021 -0700
finish annotating, and disallow_untyped_defs for pdfminer.* _except_ ccitt and jbig2
commit 0ab586347861b72b1d16880dc9293f9ad597e20a
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 21:51:56 2021 -0700
annotate pdffont
commit 4b689f1bcbdaf654feb9de81023e318ca310a12e
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 18:30:02 2021 -0700
annotate a couple more scripts; document sketchy code
commit 291981ff3d273952ec9c92ef8ab948473558b787
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 15:02:01 2021 -0700
pacify flake8
commit 45d2ce91ff333f3b7e34322b16e9c52b99b7a972
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 14:31:48 2021 -0700
annotate dumppdf, and comment likely bugs
commit 7278d83851cb336a1be3803a0993b5ec0ad39b4c
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:49:58 2021 -0700
enable mypy on tests and tools, fix one implicit reexport bug
commit 4a83166ef4e4733cd2113f43188b585a4fda392b
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:25:59 2021 -0700
pdfdocument: per dumppdf.py, get_dest accepts either bytes or str
commit 43701e1bee068df98f378a253c9c2150ee4ad9f7
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 13:25:00 2021 -0700
layout: LAParams.boxes_flow may be None
commit 164f81652f1788e74837466f0ab593e94079bc0f
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:45:09 2021 -0700
add whitespace, pacify flake8
commit 893b9fb9ec918032b36a30456fc0b7a217da86d8
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:40:33 2021 -0700
support old Python without typing.Protocol
commit dc245084102b7b04c3f5599d75b5d62ba4290787
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Sep 3 09:12:03 2021 -0700
Move "# type: ignore" comments to fix mypy on Python < 3.8
The placement of these comments got more flexible in 3.8 due to
https://github.com/python/mypy/issues/1032
Satisfying older Python and fitting in flake8's 79-character line
limit was quite a challenge!
commit da03afe7bd2cf3336e611f467f1c901455940ae8
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 22:59:58 2021 -0700
fix text output from HTMLConverter
commit 5401276a2ed3b74a385ebcab5152485224146161
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 22:40:22 2021 -0700
annotate high_level.py and the immediately-reachable internal APIs (mostly converters)
commit cc490513f8f17a7adc0bcbab2e0e86f37e832300
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Sep 2 17:04:35 2021 -0700
* expand and improve annotations in cmap, encryption/decompression and fonts
* disallow untyped calls; this way, we have a core set of
typed code that can grow over time
(just not for ccitt, because there's a ton of work lurking there)
* expand "typing: none" comments to suppress a specific error code
commit 92df54ba1d53d5dbbd5442757dd85be5b1851f99
Author: Andrew Baumann <ab@ab.id.au>
Date: Wed Sep 1 20:50:59 2021 -0700
update CHANGELOG
commit f72aaead45d0615e472a9b3190c9551a6b67b36e
Merge: ff787a9 8ea9f10
Author: Andrew Baumann <ab@ab.id.au>
Date: Wed Sep 1 20:47:03 2021 -0700
Merge branch 'develop' into mypy
commit ff787a93986c60361536a97182a41774f4a53ac3
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Aug 21 21:46:14 2021 -0700
be more precise about types on ps/pdf stacks, remove most of the Any annotations
commit be1550189e10717f6827dbb7009d6e8c8b3f4c62
Author: Andrew Baumann <ab@ab.id.au>
Date: Sat Aug 21 10:13:58 2021 -0700
silence missing imports, (maybe?) hook to tox
commit ff4b6a9bd46b352583d823d39065652c9a6f05f4
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 22:49:06 2021 -0700
turn on more strict checks, and untangle the layout mess with generics
Status:
$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/pdfdevice.py:191: error: Argument 1 to "write" of "IO" has incompatible type "str"; expected "bytes"
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
Found 5 errors in 4 files (checked 27 source files)
pdfdevice.py:191 appears to be a real bug
commit 5c9c0b19d26ae391aea0e69c2c819261cc04460c
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 17:22:41 2021 -0700
finish annotating layout
commit 0e6871c16abb29df2868ab145b4ce451b4b6c777
Author: Andrew Baumann <ab@ab.id.au>
Date: Fri Aug 20 16:54:46 2021 -0700
general progress on annotations
* finish utils
* annotate more of pdfinterp, pdfdevice
* document reason for # type: ignore comments
* fix cyclic imports
* satisfy flake8
commit 17d59f42917fbf9b2b2eb844d3e83a8f2a3f123a
Author: Andrew Baumann <ab@ab.id.au>
Date: Thu Aug 19 21:38:50 2021 -0700
WIP on type annotations
With the possible exception of psparser.py, this is far from complete.
$ mypy pdfminer
pdfminer/ccitt.py:565: error: Cannot find implementation or library stub for module named "pygame"
pdfminer/ccitt.py:565: note: See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
pdfminer/pdfdocument.py:7: error: Skipping analyzing "cryptography.hazmat.backends": found module but no type hints or library stubs
pdfminer/pdfdocument.py:8: error: Skipping analyzing "cryptography.hazmat.primitives.ciphers": found module but no type hints or library stubs
pdfminer/image.py:84: error: Cannot find implementation or library stub for module named "PIL"
2021-10-09 14:23:28 +00:00
|
|
|
def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
|
2021-09-06 20:00:23 +00:00
|
|
|
"""An implementation of RFC4013 SASLprep.
|
|
|
|
:param data:
|
|
|
|
The string to SASLprep.
|
|
|
|
:param prohibit_unassigned_code_points:
|
|
|
|
RFC 3454 and RFCs for various SASL mechanisms distinguish between
|
|
|
|
`queries` (unassigned code points allowed) and
|
|
|
|
`stored strings` (unassigned code points prohibited). Defaults
|
|
|
|
to ``True`` (unassigned code points are prohibited).
|
|
|
|
:return: The SASLprep'ed version of `data`.
|
|
|
|
"""
|
|
|
|
if prohibit_unassigned_code_points:
|
|
|
|
prohibited = _PROHIBITED + (stringprep.in_table_a1,)
|
|
|
|
else:
|
|
|
|
prohibited = _PROHIBITED
|
|
|
|
|
|
|
|
# RFC3454 section 2, step 1 - Map
|
|
|
|
# RFC4013 section 2.1 mappings
|
|
|
|
# Map Non-ASCII space characters to SPACE (U+0020). Map
|
|
|
|
# commonly mapped to nothing characters to, well, nothing.
|
|
|
|
in_table_c12 = stringprep.in_table_c12
|
|
|
|
in_table_b1 = stringprep.in_table_b1
|
|
|
|
data = "".join(
|
|
|
|
["\u0020" if in_table_c12(elt) else elt
|
|
|
|
for elt in data if not in_table_b1(elt)])
|
|
|
|
|
|
|
|
# RFC3454 section 2, step 2 - Normalize
|
|
|
|
# RFC4013 section 2.2 normalization
|
|
|
|
data = unicodedata.ucd_3_2_0.normalize('NFKC', data)
|
|
|
|
|
|
|
|
in_table_d1 = stringprep.in_table_d1
|
|
|
|
if in_table_d1(data[0]):
|
|
|
|
if not in_table_d1(data[-1]):
|
|
|
|
# RFC3454, Section 6, #3. If a string contains any
|
|
|
|
# RandALCat character, the first and last characters
|
|
|
|
# MUST be RandALCat characters.
|
|
|
|
raise ValueError("SASLprep: failed bidirectional check")
|
|
|
|
# RFC3454, Section 6, #2. If a string contains any RandALCat
|
|
|
|
# character, it MUST NOT contain any LCat character.
|
|
|
|
prohibited = prohibited + (stringprep.in_table_d2,)
|
|
|
|
else:
|
|
|
|
# RFC3454, Section 6, #3. Following the logic of #3, if
|
|
|
|
# the first character is not a RandALCat, no other character
|
|
|
|
# can be either.
|
|
|
|
prohibited = prohibited + (in_table_d1,)
|
|
|
|
|
|
|
|
# RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
|
|
|
|
for char in data:
|
|
|
|
if any(in_table(char) for in_table in prohibited):
|
|
|
|
raise ValueError(
|
|
|
|
"SASLprep: failed prohibited character check")
|
|
|
|
|
|
|
|
return data
|