diff --git a/pdflib/layout.py b/pdflib/layout.py index 99651e9..f6395af 100644 --- a/pdflib/layout.py +++ b/pdflib/layout.py @@ -365,7 +365,8 @@ class TextBox(LayoutContainer): s = '' y0 = -INF for obj in line: - margin = abs(obj.fontsize * ratio) + if not isinstance(obj, TextItem): continue + margin = obj.get_margin(ratio) if obj.y1+margin < y0: s += ' ' s += obj.text @@ -376,7 +377,8 @@ class TextBox(LayoutContainer): s = '' x1 = INF for obj in line: - margin = abs(obj.fontsize * ratio) + if not isinstance(obj, TextItem): continue + margin = obj.get_margin(ratio) if x1 < obj.x0-margin: s += ' ' s += obj.text diff --git a/pdflib/pdf2txt.py b/pdflib/pdf2txt.py index d26b2f2..70e39f9 100755 --- a/pdflib/pdf2txt.py +++ b/pdflib/pdf2txt.py @@ -263,7 +263,7 @@ def main(argv): debug = 0 cmapdir = 'CMap' cdbcmapdir = 'CDBCMap' - codec = 'ascii' + codec = 'utf-8' pagenos = set() maxpages = 0 outtype = 'html'