Convert fontname to str if it is bytes in HTMLConverter (#734)
* Convert fontname to str if it is bytes * Add CHANGELOG.mdpull/733/head^2
parent
ae7f315746
commit
e27cd54aff
|
@ -1,17 +1,26 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
All notable changes in pdfminer.six will be documented in this file.
|
All notable changes in pdfminer.six will be documented in this file.
|
||||||
|
|
||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
- `TypeError` in HTMLConverter when using a bytes fontname ([#734](https://github.com/pdfminer/pdfminer.six/pull/734))
|
||||||
|
|
||||||
## [20220319]
|
## [20220319]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
- Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679))
|
- Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679))
|
||||||
- Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626))
|
- Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626))
|
||||||
- Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680))
|
- Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680))
|
||||||
- Installation of Pillow as an optional extra dependency ([#714](https://github.com/pdfminer/pdfminer.six/pull/714))
|
- Installation of Pillow as an optional extra dependency ([#714](https://github.com/pdfminer/pdfminer.six/pull/714))
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
|
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
|
||||||
- Regression (since 20191107) in `LTLayoutContainer.group_textboxes` that returned some text lines out of order ([#659](https://github.com/pdfminer/pdfminer.six/pull/659))
|
- Regression (since 20191107) in `LTLayoutContainer.group_textboxes` that returned some text lines out of order ([#659](https://github.com/pdfminer/pdfminer.six/pull/659))
|
||||||
- Add handling of JPXDecode filter to enable extraction of images for some pdfs ([#645](https://github.com/pdfminer/pdfminer.six/pull/645))
|
- Add handling of JPXDecode filter to enable extraction of images for some pdfs ([#645](https://github.com/pdfminer/pdfminer.six/pull/645))
|
||||||
|
|
|
@ -40,7 +40,7 @@ from .pdffont import PDFUnicodeNotDefined
|
||||||
from .pdfinterp import PDFGraphicState, PDFResourceManager
|
from .pdfinterp import PDFGraphicState, PDFResourceManager
|
||||||
from .pdfpage import PDFPage
|
from .pdfpage import PDFPage
|
||||||
from .pdftypes import PDFStream
|
from .pdftypes import PDFStream
|
||||||
from .utils import AnyIO, Point, Matrix, Rect, PathSegment
|
from .utils import AnyIO, Point, Matrix, Rect, PathSegment, make_compat_str
|
||||||
from .utils import apply_matrix_pt
|
from .utils import apply_matrix_pt
|
||||||
from .utils import bbox2str
|
from .utils import bbox2str
|
||||||
from .utils import enc
|
from .utils import enc
|
||||||
|
@ -633,7 +633,8 @@ class HTMLConverter(PDFConverter[AnyIO]):
|
||||||
render(child)
|
render(child)
|
||||||
self.end_div("textbox")
|
self.end_div("textbox")
|
||||||
elif isinstance(item, LTChar):
|
elif isinstance(item, LTChar):
|
||||||
self.put_text(item.get_text(), item.fontname, item.size)
|
fontname = make_compat_str(item.fontname)
|
||||||
|
self.put_text(item.get_text(), fontname, item.size)
|
||||||
elif isinstance(item, LTText):
|
elif isinstance(item, LTText):
|
||||||
self.write_text(item.get_text())
|
self.write_text(item.get_text())
|
||||||
return
|
return
|
||||||
|
|
|
@ -76,7 +76,10 @@ def make_compat_str(o: object) -> str:
|
||||||
"""Converts everything to string, if bytes guessing the encoding."""
|
"""Converts everything to string, if bytes guessing the encoding."""
|
||||||
if isinstance(o, bytes):
|
if isinstance(o, bytes):
|
||||||
enc = chardet.detect(o)
|
enc = chardet.detect(o)
|
||||||
return o.decode(enc["encoding"])
|
try:
|
||||||
|
return o.decode(enc["encoding"])
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return str(o)
|
||||||
else:
|
else:
|
||||||
return str(o)
|
return str(o)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue