diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 2a1c5f3..feb8557 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -383,10 +383,16 @@ class TrueTypeFont(object): self.fp = fp self.tables = {} self.fonttype = fp.read(4) - (ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8)) - for _ in range(ntables): - (name, tsum, offset, length) = struct.unpack('>4sLLL', fp.read(16)) - self.tables[name] = (offset, length) + try: + (ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8)) + for _ in range(ntables): + (name, tsum, offset, length) = struct.unpack('>4sLLL', fp.read(16)) + self.tables[name] = (offset, length) + except struct.error: + # Do not fail if there are not enough bytes to read. Even for + # corrupted PDFs we would like to get as much information as + # possible, so continue. + pass return def create_unicode_map(self): diff --git a/samples/scancode/patchelf.pdf b/samples/scancode/patchelf.pdf new file mode 100644 index 0000000..b28f78e Binary files /dev/null and b/samples/scancode/patchelf.pdf differ diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py index 70e6cf9..18be203 100644 --- a/tests/test_tools_pdf2txt.py +++ b/tests/test_tools_pdf2txt.py @@ -47,11 +47,15 @@ class TestDumpPDF(): def test_7(self): run('../samples/contrib/','stamp-no') """ - + def test_8(self): run('../samples/contrib/','2b','-A -t xml') def test_9(self): run('../samples/nonfree/','175') # https://github.com/pdfminer/pdfminer.six/issues/65 + + def test_10(self): + run('../samples/scancode/','patchelf') # https://github.com/euske/pdfminer/issues/96 + if __name__ == '__main__': nose.runmodule()