Add support for ISO 32000-2 AES256 encryption (#614)
* feat: Add support for ISO 32000-2 AES256 encryption * feat: Applies review suggestionspull/661/head
parent
8ea9f1091a
commit
c3e3499a6b
|
@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
- Add support for PDF 2.0 (ISO 32000-2) AES-256 encryption ([#614](https://github.com/pdfminer/pdfminer.six/pull/614))
|
||||||
- Support for Paeth PNG filter compression (predictor value = 4) ([#537](https://github.com/pdfminer/pdfminer.six/pull/537))
|
- Support for Paeth PNG filter compression (predictor value = 4) ([#537](https://github.com/pdfminer/pdfminer.six/pull/537))
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
|
@ -51,3 +51,8 @@ Contributing
|
||||||
------------
|
------------
|
||||||
|
|
||||||
Be sure to read the [contribution guidelines](https://github.com/pdfminer/pdfminer.six/blob/master/CONTRIBUTING.md).
|
Be sure to read the [contribution guidelines](https://github.com/pdfminer/pdfminer.six/blob/master/CONTRIBUTING.md).
|
||||||
|
|
||||||
|
Acknowledgement
|
||||||
|
---------------
|
||||||
|
|
||||||
|
This repository includes code from `pyHanko` ; the original license has been included [here](/docs/licenses/LICENSE.pyHanko).
|
|
@ -0,0 +1,23 @@
|
||||||
|
This package contains various elements based on code from the pyHanko project, of which we reproduce the license below.
|
||||||
|
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2020 Matthias Valvekens
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
|
@ -0,0 +1,94 @@
|
||||||
|
# Copyright 2016-present MongoDB, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
# Some changes copyright 2021-present Matthias Valvekens,
|
||||||
|
# licensed under the license of the pyHanko project (see LICENSE file).
|
||||||
|
|
||||||
|
|
||||||
|
"""An implementation of RFC4013 SASLprep."""
|
||||||
|
|
||||||
|
__all__ = ['saslprep']
|
||||||
|
|
||||||
|
import stringprep
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
# RFC4013 section 2.3 prohibited output.
|
||||||
|
_PROHIBITED = (
|
||||||
|
# A strict reading of RFC 4013 requires table c12 here, but
|
||||||
|
# characters from it are mapped to SPACE in the Map step. Can
|
||||||
|
# normalization reintroduce them somehow?
|
||||||
|
stringprep.in_table_c12,
|
||||||
|
stringprep.in_table_c21_c22,
|
||||||
|
stringprep.in_table_c3,
|
||||||
|
stringprep.in_table_c4,
|
||||||
|
stringprep.in_table_c5,
|
||||||
|
stringprep.in_table_c6,
|
||||||
|
stringprep.in_table_c7,
|
||||||
|
stringprep.in_table_c8,
|
||||||
|
stringprep.in_table_c9)
|
||||||
|
|
||||||
|
|
||||||
|
def saslprep(data: str, prohibit_unassigned_code_points=True) -> str:
|
||||||
|
"""An implementation of RFC4013 SASLprep.
|
||||||
|
:param data:
|
||||||
|
The string to SASLprep.
|
||||||
|
:param prohibit_unassigned_code_points:
|
||||||
|
RFC 3454 and RFCs for various SASL mechanisms distinguish between
|
||||||
|
`queries` (unassigned code points allowed) and
|
||||||
|
`stored strings` (unassigned code points prohibited). Defaults
|
||||||
|
to ``True`` (unassigned code points are prohibited).
|
||||||
|
:return: The SASLprep'ed version of `data`.
|
||||||
|
"""
|
||||||
|
if prohibit_unassigned_code_points:
|
||||||
|
prohibited = _PROHIBITED + (stringprep.in_table_a1,)
|
||||||
|
else:
|
||||||
|
prohibited = _PROHIBITED
|
||||||
|
|
||||||
|
# RFC3454 section 2, step 1 - Map
|
||||||
|
# RFC4013 section 2.1 mappings
|
||||||
|
# Map Non-ASCII space characters to SPACE (U+0020). Map
|
||||||
|
# commonly mapped to nothing characters to, well, nothing.
|
||||||
|
in_table_c12 = stringprep.in_table_c12
|
||||||
|
in_table_b1 = stringprep.in_table_b1
|
||||||
|
data = "".join(
|
||||||
|
["\u0020" if in_table_c12(elt) else elt
|
||||||
|
for elt in data if not in_table_b1(elt)])
|
||||||
|
|
||||||
|
# RFC3454 section 2, step 2 - Normalize
|
||||||
|
# RFC4013 section 2.2 normalization
|
||||||
|
data = unicodedata.ucd_3_2_0.normalize('NFKC', data)
|
||||||
|
|
||||||
|
in_table_d1 = stringprep.in_table_d1
|
||||||
|
if in_table_d1(data[0]):
|
||||||
|
if not in_table_d1(data[-1]):
|
||||||
|
# RFC3454, Section 6, #3. If a string contains any
|
||||||
|
# RandALCat character, the first and last characters
|
||||||
|
# MUST be RandALCat characters.
|
||||||
|
raise ValueError("SASLprep: failed bidirectional check")
|
||||||
|
# RFC3454, Section 6, #2. If a string contains any RandALCat
|
||||||
|
# character, it MUST NOT contain any LCat character.
|
||||||
|
prohibited = prohibited + (stringprep.in_table_d2,)
|
||||||
|
else:
|
||||||
|
# RFC3454, Section 6, #3. Following the logic of #3, if
|
||||||
|
# the first character is not a RandALCat, no other character
|
||||||
|
# can be either.
|
||||||
|
prohibited = prohibited + (in_table_d1,)
|
||||||
|
|
||||||
|
# RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
|
||||||
|
for char in data:
|
||||||
|
if any(in_table(char) for in_table in prohibited):
|
||||||
|
raise ValueError(
|
||||||
|
"SASLprep: failed prohibited character check")
|
||||||
|
|
||||||
|
return data
|
|
@ -1,7 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
from hashlib import sha256, md5
|
from hashlib import sha256, md5, sha384, sha512
|
||||||
|
|
||||||
from cryptography.hazmat.backends import default_backend
|
from cryptography.hazmat.backends import default_backend
|
||||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||||
|
@ -477,7 +477,7 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
|
||||||
|
|
||||||
class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
|
class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
|
||||||
|
|
||||||
supported_revisions = (5,)
|
supported_revisions = (5, 6)
|
||||||
|
|
||||||
def init_params(self):
|
def init_params(self):
|
||||||
super().init_params()
|
super().init_params()
|
||||||
|
@ -499,29 +499,84 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def authenticate(self, password):
|
def authenticate(self, password):
|
||||||
password = password.encode('utf-8')[:127]
|
password = self._normalize_password(password)
|
||||||
hash = sha256(password)
|
hash = self._password_hash(password, self.o_validation_salt, self.u)
|
||||||
hash.update(self.o_validation_salt)
|
if hash == self.o_hash:
|
||||||
hash.update(self.u)
|
hash = self._password_hash(password, self.o_key_salt, self.u)
|
||||||
if hash.digest() == self.o_hash:
|
cipher = Cipher(algorithms.AES(hash),
|
||||||
hash = sha256(password)
|
|
||||||
hash.update(self.o_key_salt)
|
|
||||||
hash.update(self.u)
|
|
||||||
cipher = Cipher(algorithms.AES(hash.digest()),
|
|
||||||
modes.CBC(b'\0' * 16),
|
modes.CBC(b'\0' * 16),
|
||||||
backend=default_backend())
|
backend=default_backend())
|
||||||
return cipher.decryptor().update(self.oe)
|
return cipher.decryptor().update(self.oe)
|
||||||
hash = sha256(password)
|
hash = self._password_hash(password, self.u_validation_salt)
|
||||||
hash.update(self.u_validation_salt)
|
if hash == self.u_hash:
|
||||||
if hash.digest() == self.u_hash:
|
hash = self._password_hash(password, self.u_key_salt)
|
||||||
hash = sha256(password)
|
cipher = Cipher(algorithms.AES(hash),
|
||||||
hash.update(self.u_key_salt)
|
|
||||||
cipher = Cipher(algorithms.AES(hash.digest()),
|
|
||||||
modes.CBC(b'\0' * 16),
|
modes.CBC(b'\0' * 16),
|
||||||
backend=default_backend())
|
backend=default_backend())
|
||||||
return cipher.decryptor().update(self.ue)
|
return cipher.decryptor().update(self.ue)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _normalize_password(self, password):
|
||||||
|
if self.r == 6:
|
||||||
|
# saslprep expects non-empty strings, apparently
|
||||||
|
if not password:
|
||||||
|
return b''
|
||||||
|
from ._saslprep import saslprep
|
||||||
|
password = saslprep(password)
|
||||||
|
return password.encode('utf-8')[:127]
|
||||||
|
|
||||||
|
def _password_hash(self, password, salt, vector=None):
|
||||||
|
"""
|
||||||
|
Compute password hash depending on revision number
|
||||||
|
"""
|
||||||
|
if self.r == 5:
|
||||||
|
return self._r5_password(password, salt, vector)
|
||||||
|
return self._r6_password(password, salt[0:8], vector)
|
||||||
|
|
||||||
|
def _r5_password(self, password, salt, vector):
|
||||||
|
"""
|
||||||
|
Compute the password for revision 5
|
||||||
|
"""
|
||||||
|
hash = sha256(password)
|
||||||
|
hash.update(salt)
|
||||||
|
if vector is not None:
|
||||||
|
hash.update(vector)
|
||||||
|
return hash.digest()
|
||||||
|
|
||||||
|
def _r6_password(self, password, salt, vector):
|
||||||
|
"""
|
||||||
|
Compute the password for revision 6
|
||||||
|
"""
|
||||||
|
initial_hash = sha256(password)
|
||||||
|
initial_hash.update(salt)
|
||||||
|
if vector is not None:
|
||||||
|
initial_hash.update(vector)
|
||||||
|
k = initial_hash.digest()
|
||||||
|
hashes = (sha256, sha384, sha512)
|
||||||
|
round_no = last_byte_val = 0
|
||||||
|
while round_no < 64 or last_byte_val > round_no - 32:
|
||||||
|
k1 = (password + k + (vector or b'')) * 64
|
||||||
|
e = self._aes_cbc_encrypt(
|
||||||
|
key=k[:16], iv=k[16:32], data=k1
|
||||||
|
)
|
||||||
|
# compute the first 16 bytes of e,
|
||||||
|
# interpreted as an unsigned integer mod 3
|
||||||
|
next_hash = hashes[self._bytes_mod_3(e[:16])]
|
||||||
|
k = next_hash(e).digest()
|
||||||
|
last_byte_val = e[len(e) - 1]
|
||||||
|
round_no += 1
|
||||||
|
return k[:32]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _bytes_mod_3(input_bytes):
|
||||||
|
# 256 is 1 mod 3, so we can just sum 'em
|
||||||
|
return sum(b % 3 for b in input_bytes) % 3
|
||||||
|
|
||||||
|
def _aes_cbc_encrypt(self, key, iv, data):
|
||||||
|
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
|
||||||
|
encryptor = cipher.encryptor()
|
||||||
|
return encryptor.update(data) + encryptor.finalize()
|
||||||
|
|
||||||
def decrypt_aes256(self, objid, genno, data):
|
def decrypt_aes256(self, objid, genno, data):
|
||||||
initialization_vector = data[:16]
|
initialization_vector = data[:16]
|
||||||
ciphertext = data[16:]
|
ciphertext = data[16:]
|
||||||
|
|
Binary file not shown.
|
@ -96,6 +96,12 @@ class TestPdf2Txt():
|
||||||
def test_encryption_aes256m(self):
|
def test_encryption_aes256m(self):
|
||||||
run('encryption/aes-256-m.pdf', '-P foo')
|
run('encryption/aes-256-m.pdf', '-P foo')
|
||||||
|
|
||||||
|
def test_encryption_aes256_r6_user(self):
|
||||||
|
run('encryption/aes-256-r6.pdf', '-P usersecret')
|
||||||
|
|
||||||
|
def test_encryption_aes256_r6_owner(self):
|
||||||
|
run('encryption/aes-256-r6.pdf', '-P ownersecret')
|
||||||
|
|
||||||
def test_encryption_base(self):
|
def test_encryption_base(self):
|
||||||
run('encryption/base.pdf', '-P foo')
|
run('encryption/base.pdf', '-P foo')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue