From 047a24651287a1003b27eeba83e60cb9914ca2a2 Mon Sep 17 00:00:00 2001 From: estshorter <1430311+estshorter@users.noreply.github.com> Date: Tue, 31 Aug 2021 04:31:32 +0900 Subject: [PATCH] Fix `AttributeError` when dumping a TOC with bytes destinations (#600) * Fix an error when dumping a TOC * Fix a bug that a TOC title variable is a bytes type * Update CHANGELOG.md * Update CHANGELOG.md * Rename e() to escape() and merge two isinstance() checks Co-authored-by: Pieter Marsman --- CHANGELOG.md | 3 ++- tools/dumppdf.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f1e1c3..0c55ccb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - `KeyError` when `'Encrypt'` but not `'ID'` present in `trailer` ([#594](https://github.com/pdfminer/pdfminer.six/pull/594)) - Fix issue of ValueError and KeyError rasied in PDFdocument and PDFparser ([#573](https://github.com/pdfminer/pdfminer.six/pull/574)) - Fix issue of TypeError: cannot unpack non-iterable PDFObjRef object, when unpacking the value of 'DW2' ([#529](https://github.com/pdfminer/pdfminer.six/pull/529)) -- `PermissionError` when creating temporary filepaths on windows when running tests ([#469](https://github.com/pdfminer/pdfminer.six/issues/469)) +- Fix `PermissionError` when creating temporary filepaths on windows when running tests ([#484](https://github.com/pdfminer/pdfminer.six/pull/484)) +- Fix `AttributeError` when dumping a TOC with bytes destinations ([#600](https://github.com/pdfminer/pdfminer.six/pull/600)) - Fix issue of some Chinese characters can not be extracted correctly ([#593](https://github.com/pdfminer/pdfminer.six/pull/593)) - Detecting trailer correctly when surrounded with needless whitespace ([#535](https://github.com/pdfminer/pdfminer.six/pull/535)) - Fix `.paint_path` logic for handling single line segments and extracting point-on-curve positions of BeziƩr path commands ([#530](https://github.com/pdfminer/pdfminer.six/pull/530)) diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 0aa7b45..8baddc8 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -22,7 +22,7 @@ logging.basicConfig() ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]') -def e(s): +def escape(s): if isinstance(s, bytes): s = str(s, 'latin-1') return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), s) @@ -52,7 +52,7 @@ def dumpxml(out, obj, codec=None): return if isinstance(obj, ((str,), bytes)): - out.write('%s' % (len(obj), e(obj))) + out.write('%s' % (len(obj), escape(obj))) return if isinstance(obj, PDFStream): @@ -66,7 +66,7 @@ def dumpxml(out, obj, codec=None): out.write('\n\n') if codec == 'text': data = obj.get_data() - out.write('%s\n' % (len(data), e(data))) + out.write('%s\n' % (len(data), escape(data))) out.write('') return @@ -135,7 +135,7 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', in enumerate(PDFPage.create_pages(doc), 1)} def resolve_dest(dest): - if isinstance(dest, str): + if isinstance(dest, (str, bytes)): dest = resolve1(doc.get_dest(dest)) elif isinstance(dest, PSLiteral): dest = resolve1(doc.get_dest(dest.name)) @@ -161,7 +161,7 @@ def dumpoutline(outfp, fname, objids, pagenos, password='', 'D'): dest = resolve_dest(action['D']) pageno = pages[dest[0].objid] - s = e(title).encode('utf-8', 'xmlcharrefreplace') + s = escape(title) outfp.write('\n'.format(level, s)) if dest is not None: outfp.write('')