Convert fontname to str if it is bytes in HTMLConverter (#734)
* Convert fontname to str if it is bytes * Add CHANGELOG.mdpull/733/head^2
parent
ae7f315746
commit
e27cd54aff
|
@ -1,17 +1,26 @@
|
|||
# Changelog
|
||||
|
||||
All notable changes in pdfminer.six will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Fixes
|
||||
|
||||
- `TypeError` in HTMLConverter when using a bytes fontname ([#734](https://github.com/pdfminer/pdfminer.six/pull/734))
|
||||
|
||||
## [20220319]
|
||||
|
||||
### Added
|
||||
|
||||
- Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679))
|
||||
- Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626))
|
||||
- Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680))
|
||||
- Installation of Pillow as an optional extra dependency ([#714](https://github.com/pdfminer/pdfminer.six/pull/714))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))
|
||||
- Regression (since 20191107) in `LTLayoutContainer.group_textboxes` that returned some text lines out of order ([#659](https://github.com/pdfminer/pdfminer.six/pull/659))
|
||||
- Add handling of JPXDecode filter to enable extraction of images for some pdfs ([#645](https://github.com/pdfminer/pdfminer.six/pull/645))
|
||||
|
|
|
@ -40,7 +40,7 @@ from .pdffont import PDFUnicodeNotDefined
|
|||
from .pdfinterp import PDFGraphicState, PDFResourceManager
|
||||
from .pdfpage import PDFPage
|
||||
from .pdftypes import PDFStream
|
||||
from .utils import AnyIO, Point, Matrix, Rect, PathSegment
|
||||
from .utils import AnyIO, Point, Matrix, Rect, PathSegment, make_compat_str
|
||||
from .utils import apply_matrix_pt
|
||||
from .utils import bbox2str
|
||||
from .utils import enc
|
||||
|
@ -633,7 +633,8 @@ class HTMLConverter(PDFConverter[AnyIO]):
|
|||
render(child)
|
||||
self.end_div("textbox")
|
||||
elif isinstance(item, LTChar):
|
||||
self.put_text(item.get_text(), item.fontname, item.size)
|
||||
fontname = make_compat_str(item.fontname)
|
||||
self.put_text(item.get_text(), fontname, item.size)
|
||||
elif isinstance(item, LTText):
|
||||
self.write_text(item.get_text())
|
||||
return
|
||||
|
|
|
@ -76,7 +76,10 @@ def make_compat_str(o: object) -> str:
|
|||
"""Converts everything to string, if bytes guessing the encoding."""
|
||||
if isinstance(o, bytes):
|
||||
enc = chardet.detect(o)
|
||||
try:
|
||||
return o.decode(enc["encoding"])
|
||||
except UnicodeDecodeError:
|
||||
return str(o)
|
||||
else:
|
||||
return str(o)
|
||||
|
||||
|
|
Loading…
Reference in New Issue