From 90957380569a0dd14c1d9b570bf3844ee295ea8a Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Thu, 14 May 2009 14:25:20 +0000 Subject: [PATCH] git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@97 1aa58f4a-7d42-0410-adbc-911cccaed67c --- pdflib/layout.py | 6 ++++-- pdflib/pdf2txt.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pdflib/layout.py b/pdflib/layout.py index 99651e9..f6395af 100644 --- a/pdflib/layout.py +++ b/pdflib/layout.py @@ -365,7 +365,8 @@ class TextBox(LayoutContainer): s = '' y0 = -INF for obj in line: - margin = abs(obj.fontsize * ratio) + if not isinstance(obj, TextItem): continue + margin = obj.get_margin(ratio) if obj.y1+margin < y0: s += ' ' s += obj.text @@ -376,7 +377,8 @@ class TextBox(LayoutContainer): s = '' x1 = INF for obj in line: - margin = abs(obj.fontsize * ratio) + if not isinstance(obj, TextItem): continue + margin = obj.get_margin(ratio) if x1 < obj.x0-margin: s += ' ' s += obj.text diff --git a/pdflib/pdf2txt.py b/pdflib/pdf2txt.py index d26b2f2..70e39f9 100755 --- a/pdflib/pdf2txt.py +++ b/pdflib/pdf2txt.py @@ -263,7 +263,7 @@ def main(argv): debug = 0 cmapdir = 'CMap' cdbcmapdir = 'CDBCMap' - codec = 'ascii' + codec = 'utf-8' pagenos = set() maxpages = 0 outtype = 'html'