diff --git a/CHANGELOG.md b/CHANGELOG.md index a81cdd2..dcd2350 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] -Nothing here +### Fixed +- Interpret two's complement integer as unsigned integer ([#352](https://github.com/pdfminer/pdfminer.six/pull/352)) ## [20200104] - 2019-01-04 diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 3031a38..d8be5c6 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -1,34 +1,21 @@ +import hashlib as md5 +import logging import re import struct -import logging -import hashlib as md5 + try: - from Crypto.Cipher import ARC4 - from Crypto.Cipher import AES + from Crypto.Cipher import ARC4, AES from Crypto.Hash import SHA256 except ImportError: AES = SHA256 = None from . import arcfour as ARC4 -from .psparser import PSEOF -from .psparser import literal_name -from .psparser import LIT -from .psparser import KWD +from .psparser import PSEOF, literal_name, LIT, KWD from . import settings -from .pdftypes import PDFException -from .pdftypes import PDFTypeError -from .pdftypes import PDFStream -from .pdftypes import PDFObjectNotFound -from .pdftypes import decipher_all -from .pdftypes import int_value -from .pdftypes import str_value -from .pdftypes import list_value -from .pdftypes import dict_value -from .pdftypes import stream_value -from .pdfparser import PDFSyntaxError -from .pdfparser import PDFStreamParser -from .utils import choplist -from .utils import nunpack -from .utils import decode_text +from .pdftypes import PDFException, uint_value, PDFTypeError, PDFStream, \ + PDFObjectNotFound, decipher_all, int_value, str_value, list_value, \ + dict_value, stream_value +from .pdfparser import PDFSyntaxError, PDFStreamParser +from .utils import choplist, nunpack, decode_text log = logging.getLogger(__name__) @@ -307,7 +294,7 @@ class PDFStandardSecurityHandler: def init_params(self): self.v = int_value(self.param.get('V', 0)) self.r = int_value(self.param['R']) - self.p = int_value(self.param['P']) + self.p = uint_value(self.param['P'], 32) self.o = str_value(self.param['O']) self.u = str_value(self.param['U']) self.length = int_value(self.param.get('Length', 40)) @@ -348,7 +335,8 @@ class PDFStandardSecurityHandler: password = (password + self.PASSWORD_PADDING)[:32] # 1 hash = md5.md5(password) # 2 hash.update(self.o) # 3 - hash.update(struct.pack('= 4: if not self.encrypt_metadata: diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index f1252b1..14c729b 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -139,6 +139,15 @@ def num_value(x): return x +def uint_value(x, n_bits): + """Resolve number and interpret it as a two's-complement unsigned number""" + x = int_value(x) + if x > 0: + return x + else: + return x + 2**n_bits + + def str_value(x): x = resolve1(x) if not isinstance(x, bytes): diff --git a/pdfminer/utils.py b/pdfminer/utils.py index fa4fc52..feaa8a3 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -124,7 +124,7 @@ def apply_matrix_norm(m, v): # Utility functions def isnumber(x): - return isinstance(x, ((int,), float)) + return isinstance(x, (int, float)) def uniq(objs): diff --git a/samples/contrib/issue-00352-hash-twos-complement.pdf b/samples/contrib/issue-00352-hash-twos-complement.pdf new file mode 100644 index 0000000..d6fb340 Binary files /dev/null and b/samples/contrib/issue-00352-hash-twos-complement.pdf differ diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py index d192f80..30bc3c8 100644 --- a/tests/test_tools_pdf2txt.py +++ b/tests/test_tools_pdf2txt.py @@ -35,7 +35,8 @@ class TestDumpPDF(): def test_nonfree_175(self): """Regression test for: - https://github.com/pdfminer/pdfminer.six/issues/65""" + https://github.com/pdfminer/pdfminer.six/issues/65 + """ run('nonfree/175.pdf') def test_nonfree_dmca(self): @@ -63,6 +64,13 @@ class TestDumpPDF(): """Regression test for # https://github.com/euske/pdfminer/issues/96""" run('scancode/patchelf.pdf') + def test_contrib_hash_two_complement(self): + """Check that unsigned integer is added correctly to encryption hash. + + See https://github.com/pdfminer/pdfminer.six/issues/186 + """ + run('contrib/issue-00352-hash-twos-complement.pdf') + class TestDumpImages: