Add support for ISO 32000-2 AES256 encryption (#614)

* feat: Add support for ISO 32000-2 AES256 encryption

* feat: Applies review suggestions
pull/661/head
Raphaël Cohen 2021-09-06 22:00:23 +02:00 committed by GitHub
parent 8ea9f1091a
commit c3e3499a6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 201 additions and 17 deletions

View File

@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [Unreleased]
### Added
- Add support for PDF 2.0 (ISO 32000-2) AES-256 encryption ([#614](https://github.com/pdfminer/pdfminer.six/pull/614))
- Support for Paeth PNG filter compression (predictor value = 4) ([#537](https://github.com/pdfminer/pdfminer.six/pull/537))
### Fixed

View File

@ -51,3 +51,8 @@ Contributing
------------
Be sure to read the [contribution guidelines](https://github.com/pdfminer/pdfminer.six/blob/master/CONTRIBUTING.md).
Acknowledgement
---------------
This repository includes code from `pyHanko` ; the original license has been included [here](/docs/licenses/LICENSE.pyHanko).

View File

@ -0,0 +1,23 @@
This package contains various elements based on code from the pyHanko project, of which we reproduce the license below.
MIT License
Copyright (c) 2020 Matthias Valvekens
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

94
pdfminer/_saslprep.py Normal file
View File

@ -0,0 +1,94 @@
# Copyright 2016-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Some changes copyright 2021-present Matthias Valvekens,
# licensed under the license of the pyHanko project (see LICENSE file).
"""An implementation of RFC4013 SASLprep."""
__all__ = ['saslprep']
import stringprep
import unicodedata
# RFC4013 section 2.3 prohibited output.
_PROHIBITED = (
# A strict reading of RFC 4013 requires table c12 here, but
# characters from it are mapped to SPACE in the Map step. Can
# normalization reintroduce them somehow?
stringprep.in_table_c12,
stringprep.in_table_c21_c22,
stringprep.in_table_c3,
stringprep.in_table_c4,
stringprep.in_table_c5,
stringprep.in_table_c6,
stringprep.in_table_c7,
stringprep.in_table_c8,
stringprep.in_table_c9)
def saslprep(data: str, prohibit_unassigned_code_points=True) -> str:
"""An implementation of RFC4013 SASLprep.
:param data:
The string to SASLprep.
:param prohibit_unassigned_code_points:
RFC 3454 and RFCs for various SASL mechanisms distinguish between
`queries` (unassigned code points allowed) and
`stored strings` (unassigned code points prohibited). Defaults
to ``True`` (unassigned code points are prohibited).
:return: The SASLprep'ed version of `data`.
"""
if prohibit_unassigned_code_points:
prohibited = _PROHIBITED + (stringprep.in_table_a1,)
else:
prohibited = _PROHIBITED
# RFC3454 section 2, step 1 - Map
# RFC4013 section 2.1 mappings
# Map Non-ASCII space characters to SPACE (U+0020). Map
# commonly mapped to nothing characters to, well, nothing.
in_table_c12 = stringprep.in_table_c12
in_table_b1 = stringprep.in_table_b1
data = "".join(
["\u0020" if in_table_c12(elt) else elt
for elt in data if not in_table_b1(elt)])
# RFC3454 section 2, step 2 - Normalize
# RFC4013 section 2.2 normalization
data = unicodedata.ucd_3_2_0.normalize('NFKC', data)
in_table_d1 = stringprep.in_table_d1
if in_table_d1(data[0]):
if not in_table_d1(data[-1]):
# RFC3454, Section 6, #3. If a string contains any
# RandALCat character, the first and last characters
# MUST be RandALCat characters.
raise ValueError("SASLprep: failed bidirectional check")
# RFC3454, Section 6, #2. If a string contains any RandALCat
# character, it MUST NOT contain any LCat character.
prohibited = prohibited + (stringprep.in_table_d2,)
else:
# RFC3454, Section 6, #3. Following the logic of #3, if
# the first character is not a RandALCat, no other character
# can be either.
prohibited = prohibited + (in_table_d1,)
# RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
for char in data:
if any(in_table(char) for in_table in prohibited):
raise ValueError(
"SASLprep: failed prohibited character check")
return data

View File

@ -1,7 +1,7 @@
import logging
import re
import struct
from hashlib import sha256, md5
from hashlib import sha256, md5, sha384, sha512
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
@ -477,7 +477,7 @@ class PDFStandardSecurityHandlerV4(PDFStandardSecurityHandler):
class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
supported_revisions = (5,)
supported_revisions = (5, 6)
def init_params(self):
super().init_params()
@ -499,29 +499,84 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
return None
def authenticate(self, password):
password = password.encode('utf-8')[:127]
hash = sha256(password)
hash.update(self.o_validation_salt)
hash.update(self.u)
if hash.digest() == self.o_hash:
hash = sha256(password)
hash.update(self.o_key_salt)
hash.update(self.u)
cipher = Cipher(algorithms.AES(hash.digest()),
password = self._normalize_password(password)
hash = self._password_hash(password, self.o_validation_salt, self.u)
if hash == self.o_hash:
hash = self._password_hash(password, self.o_key_salt, self.u)
cipher = Cipher(algorithms.AES(hash),
modes.CBC(b'\0' * 16),
backend=default_backend())
return cipher.decryptor().update(self.oe)
hash = sha256(password)
hash.update(self.u_validation_salt)
if hash.digest() == self.u_hash:
hash = sha256(password)
hash.update(self.u_key_salt)
cipher = Cipher(algorithms.AES(hash.digest()),
hash = self._password_hash(password, self.u_validation_salt)
if hash == self.u_hash:
hash = self._password_hash(password, self.u_key_salt)
cipher = Cipher(algorithms.AES(hash),
modes.CBC(b'\0' * 16),
backend=default_backend())
return cipher.decryptor().update(self.ue)
return None
def _normalize_password(self, password):
if self.r == 6:
# saslprep expects non-empty strings, apparently
if not password:
return b''
from ._saslprep import saslprep
password = saslprep(password)
return password.encode('utf-8')[:127]
def _password_hash(self, password, salt, vector=None):
"""
Compute password hash depending on revision number
"""
if self.r == 5:
return self._r5_password(password, salt, vector)
return self._r6_password(password, salt[0:8], vector)
def _r5_password(self, password, salt, vector):
"""
Compute the password for revision 5
"""
hash = sha256(password)
hash.update(salt)
if vector is not None:
hash.update(vector)
return hash.digest()
def _r6_password(self, password, salt, vector):
"""
Compute the password for revision 6
"""
initial_hash = sha256(password)
initial_hash.update(salt)
if vector is not None:
initial_hash.update(vector)
k = initial_hash.digest()
hashes = (sha256, sha384, sha512)
round_no = last_byte_val = 0
while round_no < 64 or last_byte_val > round_no - 32:
k1 = (password + k + (vector or b'')) * 64
e = self._aes_cbc_encrypt(
key=k[:16], iv=k[16:32], data=k1
)
# compute the first 16 bytes of e,
# interpreted as an unsigned integer mod 3
next_hash = hashes[self._bytes_mod_3(e[:16])]
k = next_hash(e).digest()
last_byte_val = e[len(e) - 1]
round_no += 1
return k[:32]
@staticmethod
def _bytes_mod_3(input_bytes):
# 256 is 1 mod 3, so we can just sum 'em
return sum(b % 3 for b in input_bytes) % 3
def _aes_cbc_encrypt(self, key, iv, data):
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
encryptor = cipher.encryptor()
return encryptor.update(data) + encryptor.finalize()
def decrypt_aes256(self, objid, genno, data):
initialization_vector = data[:16]
ciphertext = data[16:]

Binary file not shown.

View File

@ -96,6 +96,12 @@ class TestPdf2Txt():
def test_encryption_aes256m(self):
run('encryption/aes-256-m.pdf', '-P foo')
def test_encryption_aes256_r6_user(self):
run('encryption/aes-256-r6.pdf', '-P usersecret')
def test_encryption_aes256_r6_owner(self):
run('encryption/aes-256-r6.pdf', '-P ownersecret')
def test_encryption_base(self):
run('encryption/base.pdf', '-P foo')