Fix `AttributeError` when dumping a TOC with bytes destinations (#600)
* Fix an error when dumping a TOC * Fix a bug that a TOC title variable is a bytes type * Update CHANGELOG.md * Update CHANGELOG.md * Rename e() to escape() and merge two isinstance() checks Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>pull/609/head
parent
a70f08818d
commit
047a246512
|
@ -12,7 +12,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
- `KeyError` when `'Encrypt'` but not `'ID'` present in `trailer` ([#594](https://github.com/pdfminer/pdfminer.six/pull/594))
|
- `KeyError` when `'Encrypt'` but not `'ID'` present in `trailer` ([#594](https://github.com/pdfminer/pdfminer.six/pull/594))
|
||||||
- Fix issue of ValueError and KeyError rasied in PDFdocument and PDFparser ([#573](https://github.com/pdfminer/pdfminer.six/pull/574))
|
- Fix issue of ValueError and KeyError rasied in PDFdocument and PDFparser ([#573](https://github.com/pdfminer/pdfminer.six/pull/574))
|
||||||
- Fix issue of TypeError: cannot unpack non-iterable PDFObjRef object, when unpacking the value of 'DW2' ([#529](https://github.com/pdfminer/pdfminer.six/pull/529))
|
- Fix issue of TypeError: cannot unpack non-iterable PDFObjRef object, when unpacking the value of 'DW2' ([#529](https://github.com/pdfminer/pdfminer.six/pull/529))
|
||||||
- `PermissionError` when creating temporary filepaths on windows when running tests ([#469](https://github.com/pdfminer/pdfminer.six/issues/469))
|
- Fix `PermissionError` when creating temporary filepaths on windows when running tests ([#484](https://github.com/pdfminer/pdfminer.six/pull/484))
|
||||||
|
- Fix `AttributeError` when dumping a TOC with bytes destinations ([#600](https://github.com/pdfminer/pdfminer.six/pull/600))
|
||||||
- Fix issue of some Chinese characters can not be extracted correctly ([#593](https://github.com/pdfminer/pdfminer.six/pull/593))
|
- Fix issue of some Chinese characters can not be extracted correctly ([#593](https://github.com/pdfminer/pdfminer.six/pull/593))
|
||||||
- Detecting trailer correctly when surrounded with needless whitespace ([#535](https://github.com/pdfminer/pdfminer.six/pull/535))
|
- Detecting trailer correctly when surrounded with needless whitespace ([#535](https://github.com/pdfminer/pdfminer.six/pull/535))
|
||||||
- Fix `.paint_path` logic for handling single line segments and extracting point-on-curve positions of Beziér path commands ([#530](https://github.com/pdfminer/pdfminer.six/pull/530))
|
- Fix `.paint_path` logic for handling single line segments and extracting point-on-curve positions of Beziér path commands ([#530](https://github.com/pdfminer/pdfminer.six/pull/530))
|
||||||
|
|
|
@ -22,7 +22,7 @@ logging.basicConfig()
|
||||||
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
||||||
|
|
||||||
|
|
||||||
def e(s):
|
def escape(s):
|
||||||
if isinstance(s, bytes):
|
if isinstance(s, bytes):
|
||||||
s = str(s, 'latin-1')
|
s = str(s, 'latin-1')
|
||||||
return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), s)
|
return ESC_PAT.sub(lambda m: '&#%d;' % ord(m.group(0)), s)
|
||||||
|
@ -52,7 +52,7 @@ def dumpxml(out, obj, codec=None):
|
||||||
return
|
return
|
||||||
|
|
||||||
if isinstance(obj, ((str,), bytes)):
|
if isinstance(obj, ((str,), bytes)):
|
||||||
out.write('<string size="%d">%s</string>' % (len(obj), e(obj)))
|
out.write('<string size="%d">%s</string>' % (len(obj), escape(obj)))
|
||||||
return
|
return
|
||||||
|
|
||||||
if isinstance(obj, PDFStream):
|
if isinstance(obj, PDFStream):
|
||||||
|
@ -66,7 +66,7 @@ def dumpxml(out, obj, codec=None):
|
||||||
out.write('\n</props>\n')
|
out.write('\n</props>\n')
|
||||||
if codec == 'text':
|
if codec == 'text':
|
||||||
data = obj.get_data()
|
data = obj.get_data()
|
||||||
out.write('<data size="%d">%s</data>\n' % (len(data), e(data)))
|
out.write('<data size="%d">%s</data>\n' % (len(data), escape(data)))
|
||||||
out.write('</stream>')
|
out.write('</stream>')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -135,7 +135,7 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
|
||||||
in enumerate(PDFPage.create_pages(doc), 1)}
|
in enumerate(PDFPage.create_pages(doc), 1)}
|
||||||
|
|
||||||
def resolve_dest(dest):
|
def resolve_dest(dest):
|
||||||
if isinstance(dest, str):
|
if isinstance(dest, (str, bytes)):
|
||||||
dest = resolve1(doc.get_dest(dest))
|
dest = resolve1(doc.get_dest(dest))
|
||||||
elif isinstance(dest, PSLiteral):
|
elif isinstance(dest, PSLiteral):
|
||||||
dest = resolve1(doc.get_dest(dest.name))
|
dest = resolve1(doc.get_dest(dest.name))
|
||||||
|
@ -161,7 +161,7 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
|
||||||
'D'):
|
'D'):
|
||||||
dest = resolve_dest(action['D'])
|
dest = resolve_dest(action['D'])
|
||||||
pageno = pages[dest[0].objid]
|
pageno = pages[dest[0].objid]
|
||||||
s = e(title).encode('utf-8', 'xmlcharrefreplace')
|
s = escape(title)
|
||||||
outfp.write('<outline level="{!r}" title="{}">\n'.format(level, s))
|
outfp.write('<outline level="{!r}" title="{}">\n'.format(level, s))
|
||||||
if dest is not None:
|
if dest is not None:
|
||||||
outfp.write('<dest>')
|
outfp.write('<dest>')
|
||||||
|
|
Loading…
Reference in New Issue