From 0a2d90c0510dfb9de6ed9dbb5de01d34aaef97ee Mon Sep 17 00:00:00 2001 From: cybjit Date: Sun, 7 Sep 2014 18:34:11 +0200 Subject: [PATCH] pdf2txt: do not double encode stdout --- pdfminer/converter.py | 4 +++- tools/pdf2txt.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 285d826..b01616e 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -164,7 +164,9 @@ class TextConverter(PDFConverter): return def write_text(self, text): - self.outfp.write(text.encode(self.codec, 'ignore')) + if self.codec: + text = text.encode(self.codec, 'ignore') + self.outfp.write(text) return def receive_layout(self, ltpage): diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index 61d878f..2cf1572 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -85,6 +85,8 @@ def main(argv): outfp = open(outfile, 'wb') else: outfp = sys.stdout + if outfp.encoding is not None: + codec = None if outtype == 'text': device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, imagewriter=imagewriter)