From 65eb0cef826a241787e34332de7994f940b8c9df Mon Sep 17 00:00:00 2001
From: Healthi <hariprasad@healthi.in>
Date: Wed, 20 Jun 2018 17:17:03 +0530
Subject: [PATCH 1/5] decode cid: 160 and 170 to spaces

---
 pdfminer/latin_enc.py | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/pdfminer/latin_enc.py b/pdfminer/latin_enc.py
index 13886c8..ef0f7e8 100644
--- a/pdfminer/latin_enc.py
+++ b/pdfminer/latin_enc.py
@@ -213,6 +213,8 @@ ENCODING = [
   ('six', 54, 54, 54, 54),
   ('slash', 47, 47, 47, 47),
   ('space', 32, 32, 32, 32),
+  ('space', None, 202, 160, None),
+  ('space', None, 202, 173, None),
   ('sterling', 163, 163, 163, 163),
   ('t', 116, 116, 116, 116),
   ('thorn', None, None, 254, 254),

From 95b65536afcaee37f630c33e4564621db652b806 Mon Sep 17 00:00:00 2001
From: Guglielmetti Philippe <philippe.guglielmetti@bobst.com>
Date: Thu, 21 Jun 2018 09:28:55 +0200
Subject: [PATCH 2/5] render_string() now takes 3 parameters, not 5

---
 pdfminer/pdfinterp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index a14f64a..b7a0ca7 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -769,7 +769,7 @@ class PDFPageInterpreter(object):
             if settings.STRICT:
                 raise PDFInterpreterError('No font specified!')
             return
-        self.device.render_string(self.textstate, seq, self.ncs, self.graphicstate.copy())
+        self.device.render_string(self.textstate, seq)#, self.ncs, self.graphicstate.copy())
         return
 
     # show

From 70624a64dd060b995443ab741ef9994502bbdc07 Mon Sep 17 00:00:00 2001
From: Guglielmetti Philippe <philippe.guglielmetti@bobst.com>
Date: Thu, 21 Jun 2018 09:49:45 +0200
Subject: [PATCH 3/5] render_string() now takes 3 parameters, not 5 (reverted
 from commit 95b65536afcaee37f630c33e4564621db652b806)

---
 pdfminer/pdfinterp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index b7a0ca7..a14f64a 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -769,7 +769,7 @@ class PDFPageInterpreter(object):
             if settings.STRICT:
                 raise PDFInterpreterError('No font specified!')
             return
-        self.device.render_string(self.textstate, seq)#, self.ncs, self.graphicstate.copy())
+        self.device.render_string(self.textstate, seq, self.ncs, self.graphicstate.copy())
         return
 
     # show

From 1db260609ecfbf701e260e328c85efc36c03ceb0 Mon Sep 17 00:00:00 2001
From: Goulu <github@goulu.net>
Date: Thu, 21 Jun 2018 10:21:26 +0200
Subject: [PATCH 4/5] render_string must have 5 params in all PDFDevice classes
 (#158)

---
 pdfminer/pdfdevice.py | 394 +++++++++++++++++++++---------------------
 1 file changed, 197 insertions(+), 197 deletions(-)

diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
index ed54fd2..03a5f0e 100644
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@@ -1,197 +1,197 @@
-# -*- coding: utf-8 -*-
-
-import six
-
-from .pdffont import PDFUnicodeNotDefined
-
-from . import utils
-
-##  PDFDevice
-##
-class PDFDevice(object):
-
-    def __init__(self, rsrcmgr):
-        self.rsrcmgr = rsrcmgr
-        self.ctm = None
-        return
-
-    def __repr__(self):
-        return '<PDFDevice>'
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.close()
-
-    def close(self):
-        return
-
-    def set_ctm(self, ctm):
-        self.ctm = ctm
-        return
-
-    def begin_tag(self, tag, props=None):
-        return
-
-    def end_tag(self):
-        return
-
-    def do_tag(self, tag, props=None):
-        return
-
-    def begin_page(self, page, ctm):
-        return
-
-    def end_page(self, page):
-        return
-
-    def begin_figure(self, name, bbox, matrix):
-        return
-
-    def end_figure(self, name):
-        return
-
-    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
-        return
-
-    def render_image(self, name, stream):
-        return
-
-    def render_string(self, textstate, seq):
-        return
-
-
-##  PDFTextDevice
-##
-class PDFTextDevice(PDFDevice):
-
-    def render_string(self, textstate, seq, ncs, graphicstate):
-        matrix = utils.mult_matrix(textstate.matrix, self.ctm)
-        font = textstate.font
-        fontsize = textstate.fontsize
-        scaling = textstate.scaling * .01
-        charspace = textstate.charspace * scaling
-        wordspace = textstate.wordspace * scaling
-        rise = textstate.rise
-        if font.is_multibyte():
-            wordspace = 0
-        dxscale = .001 * fontsize * scaling
-        if font.is_vertical():
-            textstate.linematrix = self.render_string_vertical(
-                seq, matrix, textstate.linematrix, font, fontsize,
-                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
-        else:
-            textstate.linematrix = self.render_string_horizontal(
-                seq, matrix, textstate.linematrix, font, fontsize,
-                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
-        return
-
-    def render_string_horizontal(self, seq, matrix, pos,
-                                 font, fontsize, scaling, charspace, wordspace,
-                                 rise, dxscale, ncs, graphicstate):
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if utils.isnumber(obj):
-                x -= obj*dxscale
-                needcharspace = True
-            else:
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        x += charspace
-                    x += self.render_char(utils.translate_matrix(matrix, (x, y)),
-                                          font, fontsize, scaling, rise, cid,
-                                          ncs, graphicstate)
-                    if cid == 32 and wordspace:
-                        x += wordspace
-                    needcharspace = True
-        return (x, y)
-
-    def render_string_vertical(self, seq, matrix, pos,
-                               font, fontsize, scaling, charspace, wordspace,
-                               rise, dxscale, ncs, graphicstate):
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if utils.isnumber(obj):
-                y -= obj*dxscale
-                needcharspace = True
-            else:
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        y += charspace
-                    y += self.render_char(utils.translate_matrix(matrix, (x, y)),
-                                          font, fontsize, scaling, rise, cid,
-                                          ncs, graphicstate)
-                    if cid == 32 and wordspace:
-                        y += wordspace
-                    needcharspace = True
-        return (x, y)
-
-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate):
-        return 0
-
-
-##  TagExtractor
-##
-class TagExtractor(PDFDevice):
-
-    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
-        PDFDevice.__init__(self, rsrcmgr)
-        self.outfp = outfp
-        self.codec = codec
-        self.pageno = 0
-        self._stack = []
-        return
-
-    def render_string(self, textstate, seq):
-        font = textstate.font
-        text = ''
-        for obj in seq:
-            if isinstance(obj, six.text_type):
-                obj = utils.make_compat_bytes(obj)
-            if not isinstance(obj, six.binary_type):
-                continue
-            chars = font.decode(obj)
-            for cid in chars:
-                try:
-                    char = font.to_unichr(cid)
-                    text += char
-                except PDFUnicodeNotDefined:
-                    print(chars)
-                    pass
-        self.outfp.write(utils.enc(text, self.codec))
-        return
-
-    def begin_page(self, page, ctm):
-        output = '<page id="%s" bbox="%s" rotate="%d">' % (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
-        self.outfp.write(utils.make_compat_bytes(output))
-        return
-
-    def end_page(self, page):
-        self.outfp.write(utils.make_compat_bytes('</page>\n'))
-        self.pageno += 1
-        return
-
-    def begin_tag(self, tag, props=None):
-        s = ''
-        if isinstance(props, dict):
-            s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v)
-                        in sorted(props.iteritems()))
-        out_s = '<%s%s>' % (utils.enc(tag.name), s)
-        self.outfp.write(utils.make_compat_bytes(out_s))
-        self._stack.append(tag)
-        return
-
-    def end_tag(self):
-        assert self._stack, str(self.pageno)
-        tag = self._stack.pop(-1)
-        out_s = '</%s>' % utils.enc(tag.name)
-        self.outfp.write(utils.make_compat_bytes(out_s))
-        return
-
-    def do_tag(self, tag, props=None):
-        self.begin_tag(tag, props)
-        self._stack.pop(-1)
-        return
+# -*- coding: utf-8 -*-
+
+import six
+
+from .pdffont import PDFUnicodeNotDefined
+
+from . import utils
+
+##  PDFDevice
+##
+class PDFDevice(object):
+
+    def __init__(self, rsrcmgr):
+        self.rsrcmgr = rsrcmgr
+        self.ctm = None
+        return
+
+    def __repr__(self):
+        return '<PDFDevice>'
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def close(self):
+        return
+
+    def set_ctm(self, ctm):
+        self.ctm = ctm
+        return
+
+    def begin_tag(self, tag, props=None):
+        return
+
+    def end_tag(self):
+        return
+
+    def do_tag(self, tag, props=None):
+        return
+
+    def begin_page(self, page, ctm):
+        return
+
+    def end_page(self, page):
+        return
+
+    def begin_figure(self, name, bbox, matrix):
+        return
+
+    def end_figure(self, name):
+        return
+
+    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
+        return
+
+    def render_image(self, name, stream):
+        return
+
+    def render_string(self, textstate, seq, ncs, graphicstate):
+        return
+
+
+##  PDFTextDevice
+##
+class PDFTextDevice(PDFDevice):
+
+    def render_string(self, textstate, seq, ncs, graphicstate):
+        matrix = utils.mult_matrix(textstate.matrix, self.ctm)
+        font = textstate.font
+        fontsize = textstate.fontsize
+        scaling = textstate.scaling * .01
+        charspace = textstate.charspace * scaling
+        wordspace = textstate.wordspace * scaling
+        rise = textstate.rise
+        if font.is_multibyte():
+            wordspace = 0
+        dxscale = .001 * fontsize * scaling
+        if font.is_vertical():
+            textstate.linematrix = self.render_string_vertical(
+                seq, matrix, textstate.linematrix, font, fontsize,
+                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
+        else:
+            textstate.linematrix = self.render_string_horizontal(
+                seq, matrix, textstate.linematrix, font, fontsize,
+                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
+        return
+
+    def render_string_horizontal(self, seq, matrix, pos,
+                                 font, fontsize, scaling, charspace, wordspace,
+                                 rise, dxscale, ncs, graphicstate):
+        (x, y) = pos
+        needcharspace = False
+        for obj in seq:
+            if utils.isnumber(obj):
+                x -= obj*dxscale
+                needcharspace = True
+            else:
+                for cid in font.decode(obj):
+                    if needcharspace:
+                        x += charspace
+                    x += self.render_char(utils.translate_matrix(matrix, (x, y)),
+                                          font, fontsize, scaling, rise, cid,
+                                          ncs, graphicstate)
+                    if cid == 32 and wordspace:
+                        x += wordspace
+                    needcharspace = True
+        return (x, y)
+
+    def render_string_vertical(self, seq, matrix, pos,
+                               font, fontsize, scaling, charspace, wordspace,
+                               rise, dxscale, ncs, graphicstate):
+        (x, y) = pos
+        needcharspace = False
+        for obj in seq:
+            if utils.isnumber(obj):
+                y -= obj*dxscale
+                needcharspace = True
+            else:
+                for cid in font.decode(obj):
+                    if needcharspace:
+                        y += charspace
+                    y += self.render_char(utils.translate_matrix(matrix, (x, y)),
+                                          font, fontsize, scaling, rise, cid,
+                                          ncs, graphicstate)
+                    if cid == 32 and wordspace:
+                        y += wordspace
+                    needcharspace = True
+        return (x, y)
+
+    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate):
+        return 0
+
+
+##  TagExtractor
+##
+class TagExtractor(PDFDevice):
+
+    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
+        PDFDevice.__init__(self, rsrcmgr)
+        self.outfp = outfp
+        self.codec = codec
+        self.pageno = 0
+        self._stack = []
+        return
+
+    def render_string(self, textstate, seq, ncs, graphicstate):
+        font = textstate.font
+        text = ''
+        for obj in seq:
+            if isinstance(obj, six.text_type):
+                obj = utils.make_compat_bytes(obj)
+            if not isinstance(obj, six.binary_type):
+                continue
+            chars = font.decode(obj)
+            for cid in chars:
+                try:
+                    char = font.to_unichr(cid)
+                    text += char
+                except PDFUnicodeNotDefined:
+                    print(chars)
+                    pass
+        self.outfp.write(utils.enc(text, self.codec))
+        return
+
+    def begin_page(self, page, ctm):
+        output = '<page id="%s" bbox="%s" rotate="%d">' % (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
+        self.outfp.write(utils.make_compat_bytes(output))
+        return
+
+    def end_page(self, page):
+        self.outfp.write(utils.make_compat_bytes('</page>\n'))
+        self.pageno += 1
+        return
+
+    def begin_tag(self, tag, props=None):
+        s = ''
+        if isinstance(props, dict):
+            s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v)
+                        in sorted(props.iteritems()))
+        out_s = '<%s%s>' % (utils.enc(tag.name), s)
+        self.outfp.write(utils.make_compat_bytes(out_s))
+        self._stack.append(tag)
+        return
+
+    def end_tag(self):
+        assert self._stack, str(self.pageno)
+        tag = self._stack.pop(-1)
+        out_s = '</%s>' % utils.enc(tag.name)
+        self.outfp.write(utils.make_compat_bytes(out_s))
+        return
+
+    def do_tag(self, tag, props=None):
+        self.begin_tag(tag, props)
+        self._stack.pop(-1)
+        return

From 7b08cdbff9c0ff92691174f92abd0552bd5c2fba Mon Sep 17 00:00:00 2001
From: Charles Reid <charlesreid1@gmail.com>
Date: Thu, 21 Jun 2018 12:19:48 -0700
Subject: [PATCH 5/5] apply dos2unix to files in pdfminer/ and tools/ to remove
 \r\n windows line endings

---
 pdfminer/pdfdevice.py | 394 +++++++++++++++++++++---------------------
 tools/pdf2txt.spec    |  60 +++----
 tools/pdfdiff.py      | 234 ++++++++++++-------------
 tools/pdfdiff.spec    |  58 +++----
 4 files changed, 373 insertions(+), 373 deletions(-)

diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
index 03a5f0e..0d4c175 100644
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@@ -1,197 +1,197 @@
-# -*- coding: utf-8 -*-
-
-import six
-
-from .pdffont import PDFUnicodeNotDefined
-
-from . import utils
-
-##  PDFDevice
-##
-class PDFDevice(object):
-
-    def __init__(self, rsrcmgr):
-        self.rsrcmgr = rsrcmgr
-        self.ctm = None
-        return
-
-    def __repr__(self):
-        return '<PDFDevice>'
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.close()
-
-    def close(self):
-        return
-
-    def set_ctm(self, ctm):
-        self.ctm = ctm
-        return
-
-    def begin_tag(self, tag, props=None):
-        return
-
-    def end_tag(self):
-        return
-
-    def do_tag(self, tag, props=None):
-        return
-
-    def begin_page(self, page, ctm):
-        return
-
-    def end_page(self, page):
-        return
-
-    def begin_figure(self, name, bbox, matrix):
-        return
-
-    def end_figure(self, name):
-        return
-
-    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
-        return
-
-    def render_image(self, name, stream):
-        return
-
-    def render_string(self, textstate, seq, ncs, graphicstate):
-        return
-
-
-##  PDFTextDevice
-##
-class PDFTextDevice(PDFDevice):
-
-    def render_string(self, textstate, seq, ncs, graphicstate):
-        matrix = utils.mult_matrix(textstate.matrix, self.ctm)
-        font = textstate.font
-        fontsize = textstate.fontsize
-        scaling = textstate.scaling * .01
-        charspace = textstate.charspace * scaling
-        wordspace = textstate.wordspace * scaling
-        rise = textstate.rise
-        if font.is_multibyte():
-            wordspace = 0
-        dxscale = .001 * fontsize * scaling
-        if font.is_vertical():
-            textstate.linematrix = self.render_string_vertical(
-                seq, matrix, textstate.linematrix, font, fontsize,
-                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
-        else:
-            textstate.linematrix = self.render_string_horizontal(
-                seq, matrix, textstate.linematrix, font, fontsize,
-                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
-        return
-
-    def render_string_horizontal(self, seq, matrix, pos,
-                                 font, fontsize, scaling, charspace, wordspace,
-                                 rise, dxscale, ncs, graphicstate):
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if utils.isnumber(obj):
-                x -= obj*dxscale
-                needcharspace = True
-            else:
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        x += charspace
-                    x += self.render_char(utils.translate_matrix(matrix, (x, y)),
-                                          font, fontsize, scaling, rise, cid,
-                                          ncs, graphicstate)
-                    if cid == 32 and wordspace:
-                        x += wordspace
-                    needcharspace = True
-        return (x, y)
-
-    def render_string_vertical(self, seq, matrix, pos,
-                               font, fontsize, scaling, charspace, wordspace,
-                               rise, dxscale, ncs, graphicstate):
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if utils.isnumber(obj):
-                y -= obj*dxscale
-                needcharspace = True
-            else:
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        y += charspace
-                    y += self.render_char(utils.translate_matrix(matrix, (x, y)),
-                                          font, fontsize, scaling, rise, cid,
-                                          ncs, graphicstate)
-                    if cid == 32 and wordspace:
-                        y += wordspace
-                    needcharspace = True
-        return (x, y)
-
-    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate):
-        return 0
-
-
-##  TagExtractor
-##
-class TagExtractor(PDFDevice):
-
-    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
-        PDFDevice.__init__(self, rsrcmgr)
-        self.outfp = outfp
-        self.codec = codec
-        self.pageno = 0
-        self._stack = []
-        return
-
-    def render_string(self, textstate, seq, ncs, graphicstate):
-        font = textstate.font
-        text = ''
-        for obj in seq:
-            if isinstance(obj, six.text_type):
-                obj = utils.make_compat_bytes(obj)
-            if not isinstance(obj, six.binary_type):
-                continue
-            chars = font.decode(obj)
-            for cid in chars:
-                try:
-                    char = font.to_unichr(cid)
-                    text += char
-                except PDFUnicodeNotDefined:
-                    print(chars)
-                    pass
-        self.outfp.write(utils.enc(text, self.codec))
-        return
-
-    def begin_page(self, page, ctm):
-        output = '<page id="%s" bbox="%s" rotate="%d">' % (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
-        self.outfp.write(utils.make_compat_bytes(output))
-        return
-
-    def end_page(self, page):
-        self.outfp.write(utils.make_compat_bytes('</page>\n'))
-        self.pageno += 1
-        return
-
-    def begin_tag(self, tag, props=None):
-        s = ''
-        if isinstance(props, dict):
-            s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v)
-                        in sorted(props.iteritems()))
-        out_s = '<%s%s>' % (utils.enc(tag.name), s)
-        self.outfp.write(utils.make_compat_bytes(out_s))
-        self._stack.append(tag)
-        return
-
-    def end_tag(self):
-        assert self._stack, str(self.pageno)
-        tag = self._stack.pop(-1)
-        out_s = '</%s>' % utils.enc(tag.name)
-        self.outfp.write(utils.make_compat_bytes(out_s))
-        return
-
-    def do_tag(self, tag, props=None):
-        self.begin_tag(tag, props)
-        self._stack.pop(-1)
-        return
+# -*- coding: utf-8 -*-
+
+import six
+
+from .pdffont import PDFUnicodeNotDefined
+
+from . import utils
+
+##  PDFDevice
+##
+class PDFDevice(object):
+
+    def __init__(self, rsrcmgr):
+        self.rsrcmgr = rsrcmgr
+        self.ctm = None
+        return
+
+    def __repr__(self):
+        return '<PDFDevice>'
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def close(self):
+        return
+
+    def set_ctm(self, ctm):
+        self.ctm = ctm
+        return
+
+    def begin_tag(self, tag, props=None):
+        return
+
+    def end_tag(self):
+        return
+
+    def do_tag(self, tag, props=None):
+        return
+
+    def begin_page(self, page, ctm):
+        return
+
+    def end_page(self, page):
+        return
+
+    def begin_figure(self, name, bbox, matrix):
+        return
+
+    def end_figure(self, name):
+        return
+
+    def paint_path(self, graphicstate, stroke, fill, evenodd, path):
+        return
+
+    def render_image(self, name, stream):
+        return
+
+    def render_string(self, textstate, seq, ncs, graphicstate):
+        return
+
+
+##  PDFTextDevice
+##
+class PDFTextDevice(PDFDevice):
+
+    def render_string(self, textstate, seq, ncs, graphicstate):
+        matrix = utils.mult_matrix(textstate.matrix, self.ctm)
+        font = textstate.font
+        fontsize = textstate.fontsize
+        scaling = textstate.scaling * .01
+        charspace = textstate.charspace * scaling
+        wordspace = textstate.wordspace * scaling
+        rise = textstate.rise
+        if font.is_multibyte():
+            wordspace = 0
+        dxscale = .001 * fontsize * scaling
+        if font.is_vertical():
+            textstate.linematrix = self.render_string_vertical(
+                seq, matrix, textstate.linematrix, font, fontsize,
+                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
+        else:
+            textstate.linematrix = self.render_string_horizontal(
+                seq, matrix, textstate.linematrix, font, fontsize,
+                scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate)
+        return
+
+    def render_string_horizontal(self, seq, matrix, pos,
+                                 font, fontsize, scaling, charspace, wordspace,
+                                 rise, dxscale, ncs, graphicstate):
+        (x, y) = pos
+        needcharspace = False
+        for obj in seq:
+            if utils.isnumber(obj):
+                x -= obj*dxscale
+                needcharspace = True
+            else:
+                for cid in font.decode(obj):
+                    if needcharspace:
+                        x += charspace
+                    x += self.render_char(utils.translate_matrix(matrix, (x, y)),
+                                          font, fontsize, scaling, rise, cid,
+                                          ncs, graphicstate)
+                    if cid == 32 and wordspace:
+                        x += wordspace
+                    needcharspace = True
+        return (x, y)
+
+    def render_string_vertical(self, seq, matrix, pos,
+                               font, fontsize, scaling, charspace, wordspace,
+                               rise, dxscale, ncs, graphicstate):
+        (x, y) = pos
+        needcharspace = False
+        for obj in seq:
+            if utils.isnumber(obj):
+                y -= obj*dxscale
+                needcharspace = True
+            else:
+                for cid in font.decode(obj):
+                    if needcharspace:
+                        y += charspace
+                    y += self.render_char(utils.translate_matrix(matrix, (x, y)),
+                                          font, fontsize, scaling, rise, cid,
+                                          ncs, graphicstate)
+                    if cid == 32 and wordspace:
+                        y += wordspace
+                    needcharspace = True
+        return (x, y)
+
+    def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate):
+        return 0
+
+
+##  TagExtractor
+##
+class TagExtractor(PDFDevice):
+
+    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
+        PDFDevice.__init__(self, rsrcmgr)
+        self.outfp = outfp
+        self.codec = codec
+        self.pageno = 0
+        self._stack = []
+        return
+
+    def render_string(self, textstate, seq, ncs, graphicstate):
+        font = textstate.font
+        text = ''
+        for obj in seq:
+            if isinstance(obj, six.text_type):
+                obj = utils.make_compat_bytes(obj)
+            if not isinstance(obj, six.binary_type):
+                continue
+            chars = font.decode(obj)
+            for cid in chars:
+                try:
+                    char = font.to_unichr(cid)
+                    text += char
+                except PDFUnicodeNotDefined:
+                    print(chars)
+                    pass
+        self.outfp.write(utils.enc(text, self.codec))
+        return
+
+    def begin_page(self, page, ctm):
+        output = '<page id="%s" bbox="%s" rotate="%d">' % (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
+        self.outfp.write(utils.make_compat_bytes(output))
+        return
+
+    def end_page(self, page):
+        self.outfp.write(utils.make_compat_bytes('</page>\n'))
+        self.pageno += 1
+        return
+
+    def begin_tag(self, tag, props=None):
+        s = ''
+        if isinstance(props, dict):
+            s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v)
+                        in sorted(props.iteritems()))
+        out_s = '<%s%s>' % (utils.enc(tag.name), s)
+        self.outfp.write(utils.make_compat_bytes(out_s))
+        self._stack.append(tag)
+        return
+
+    def end_tag(self):
+        assert self._stack, str(self.pageno)
+        tag = self._stack.pop(-1)
+        out_s = '</%s>' % utils.enc(tag.name)
+        self.outfp.write(utils.make_compat_bytes(out_s))
+        return
+
+    def do_tag(self, tag, props=None):
+        self.begin_tag(tag, props)
+        self._stack.pop(-1)
+        return
diff --git a/tools/pdf2txt.spec b/tools/pdf2txt.spec
index 8baeb77..c0073e6 100644
--- a/tools/pdf2txt.spec
+++ b/tools/pdf2txt.spec
@@ -1,30 +1,30 @@
-# -*- mode: python -*-
-
-block_cipher = None
-
-
-a = Analysis(['pdf2txt.py'],
-             pathex=['C:\\Dev\\Python\\pdfminer.six\\tools'],
-             binaries=[],
-             datas=[],
-             hiddenimports=[],
-             hookspath=[],
-             runtime_hooks=[],
-             excludes=['django','matplotlib','PIL','numpy','qt5'],
-             win_no_prefer_redirects=False,
-             win_private_assemblies=False,
-             cipher=block_cipher)
-
-pyz = PYZ(a.pure, a.zipped_data,
-             cipher=block_cipher)
-exe = EXE(pyz,
-          a.scripts,
-          a.binaries,
-          a.zipfiles,
-          a.datas,
-          name='pdf2txt',
-          debug=False,
-          strip=False,
-          upx=True,
-          runtime_tmpdir=None,
-          console=True )
+# -*- mode: python -*-
+
+block_cipher = None
+
+
+a = Analysis(['pdf2txt.py'],
+             pathex=['C:\\Dev\\Python\\pdfminer.six\\tools'],
+             binaries=[],
+             datas=[],
+             hiddenimports=[],
+             hookspath=[],
+             runtime_hooks=[],
+             excludes=['django','matplotlib','PIL','numpy','qt5'],
+             win_no_prefer_redirects=False,
+             win_private_assemblies=False,
+             cipher=block_cipher)
+
+pyz = PYZ(a.pure, a.zipped_data,
+             cipher=block_cipher)
+exe = EXE(pyz,
+          a.scripts,
+          a.binaries,
+          a.zipfiles,
+          a.datas,
+          name='pdf2txt',
+          debug=False,
+          strip=False,
+          upx=True,
+          runtime_tmpdir=None,
+          console=True )
diff --git a/tools/pdfdiff.py b/tools/pdfdiff.py
index b01e2f4..f5b8ac4 100644
--- a/tools/pdfdiff.py
+++ b/tools/pdfdiff.py
@@ -1,117 +1,117 @@
-#!/usr/bin/env python
-
-"""
-compares rwo pdf files.
-"""
-import sys
-import logging
-import six
-import pdfminer.settings
-pdfminer.settings.STRICT = False
-import pdfminer.high_level
-import pdfminer.layout
-
-def compare(file1,file2,**args):
-    if args.get('_py2_no_more_posargs',None) is not None:
-        raise ValueError("Too many positional arguments passed.")
-
-
-    # If any LAParams group arguments were passed, create an LAParams object and
-    # populate with given args. Otherwise, set it to None.
-    if args.get('laparams',None) is None:
-        laparams = pdfminer.layout.LAParams()
-        for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"):
-            paramv = args.get(param, None)
-            if paramv is not None:
-                laparams[param]=paramv
-        args['laparams']=laparams
-                
-    s1=six.StringIO()
-    with open(file1, "rb") as fp:
-        pdfminer.high_level.extract_text_to_fp(fp,s1, **args)
-    
-    s2=six.StringIO()
-    with open(file2, "rb") as fp:
-        pdfminer.high_level.extract_text_to_fp(fp,s2, **args)
-    
-    import difflib
-    s1.seek(0)
-    s2.seek(0)
-    s1,s2=s1.readlines(), s2.readlines()
-    
-    import os.path
-    try:
-        extension = os.path.splitext(args['outfile'])[1][1:4]
-        if extension.lower()=='htm':
-            return difflib.HtmlDiff().make_file(s1,s2)
-    except KeyError:
-        pass
-    return difflib.unified_diff(s1,s2,n=args['context_lines'])
-
-
-# main
-def main(args=None):
-    import argparse
-    P = argparse.ArgumentParser(description=__doc__)
-    P.add_argument("file1", type=str, default=None, help="File 1 to compare.")
-    P.add_argument("file2", type=str, default=None, help="File 2 to compare.")
-    P.add_argument("-o", "--outfile", type=str, default="-", 
-        help="Output file (default/'-' is stdout) \
-        if .htm or .html, create an HTML table (or a complete HTML file containing the table) \
-        showing a side by side, line by line comparison of text with inter-line \
-        and intra-line change highlights. \
-        The table can be generated in either full or contextual difference mode."
-    )
-    P.add_argument("-N", "--context-lines", default=3, type=int, help = "context lines shown")
-    P.add_argument("-d", "--debug", default=False, action="store_true", help="Debug output.")
-
-    # params for pdf2txt
-    P.add_argument("-p", "--pagenos", type=str, help="Comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.")
-    P.add_argument("--page-numbers", type=int, default=None, nargs="+", help="Alternative to --pagenos with space-separated numbers; supercedes --pagenos where it is used.")
-    P.add_argument("-m", "--maxpages", type=int, default=0, help = "Maximum pages to parse")
-    P.add_argument("-P", "--password", type=str, default="", help = "Decryption password for both PDFs")
-    P.add_argument("-t", "--output_type", type=str, default="text", help = "pdf2txt type: text|html|xml|tag (default is text)")
-    P.add_argument("-c", "--codec", type=str, default="utf-8", help = "Text encoding")
-    P.add_argument("-s", "--scale", type=float, default=1.0, help = "Scale")
-    P.add_argument("-A", "--all-texts", default=None, action="store_true", help="LAParams all texts")
-    P.add_argument("-V", "--detect-vertical", default=None, action="store_true", help="LAParams detect vertical")
-    P.add_argument("-W", "--word-margin", type=float, default=None, help = "LAParams word margin")
-    P.add_argument("-M", "--char-margin", type=float, default=None, help = "LAParams char margin")
-    P.add_argument("-L", "--line-margin", type=float, default=None, help = "LAParams line margin")
-    P.add_argument("-F", "--boxes-flow", type=float, default=None, help = "LAParams boxes flow")
-    P.add_argument("-Y", "--layoutmode", default="normal", type=str, help="HTML Layout Mode")
-    P.add_argument("-n", "--no-laparams", default=False, action="store_true", help = "Pass None as LAParams")
-    P.add_argument("-R", "--rotation", default=0, type=int, help = "Rotation")
-    P.add_argument("-O", "--output-dir", default=None, help="Output directory for images")
-    P.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching")
-    P.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode")
-    
-
-    A = P.parse_args(args=args)
-
-    if A.page_numbers:
-        A.page_numbers = set([x-1 for x in A.page_numbers])
-    if A.pagenos:
-        A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")])
-
-    if six.PY2 and sys.stdin.encoding:
-        A.password = A.password.decode(sys.stdin.encoding)
-
-    if A.output_type == "text" and A.outfile != "-":
-        for override, alttype in (  (".htm",  "html"),
-                                    (".html", "html"),
-                                    (".xml",  "xml" ),
-                                    (".tag",  "tag" ) ):
-            if A.outfile.endswith(override):
-                A.output_type = alttype
-
-    if A.outfile == "-":
-        outfp = sys.stdout
-    else:
-        outfp = open(A.outfile, "w", encoding='utf-8')
-    outfp.writelines(compare(**vars(A)))
-    outfp.close()
-    return 0
-
-
-if __name__ == '__main__': sys.exit(main())
+#!/usr/bin/env python
+
+"""
+compares rwo pdf files.
+"""
+import sys
+import logging
+import six
+import pdfminer.settings
+pdfminer.settings.STRICT = False
+import pdfminer.high_level
+import pdfminer.layout
+
+def compare(file1,file2,**args):
+    if args.get('_py2_no_more_posargs',None) is not None:
+        raise ValueError("Too many positional arguments passed.")
+
+
+    # If any LAParams group arguments were passed, create an LAParams object and
+    # populate with given args. Otherwise, set it to None.
+    if args.get('laparams',None) is None:
+        laparams = pdfminer.layout.LAParams()
+        for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"):
+            paramv = args.get(param, None)
+            if paramv is not None:
+                laparams[param]=paramv
+        args['laparams']=laparams
+                
+    s1=six.StringIO()
+    with open(file1, "rb") as fp:
+        pdfminer.high_level.extract_text_to_fp(fp,s1, **args)
+    
+    s2=six.StringIO()
+    with open(file2, "rb") as fp:
+        pdfminer.high_level.extract_text_to_fp(fp,s2, **args)
+    
+    import difflib
+    s1.seek(0)
+    s2.seek(0)
+    s1,s2=s1.readlines(), s2.readlines()
+    
+    import os.path
+    try:
+        extension = os.path.splitext(args['outfile'])[1][1:4]
+        if extension.lower()=='htm':
+            return difflib.HtmlDiff().make_file(s1,s2)
+    except KeyError:
+        pass
+    return difflib.unified_diff(s1,s2,n=args['context_lines'])
+
+
+# main
+def main(args=None):
+    import argparse
+    P = argparse.ArgumentParser(description=__doc__)
+    P.add_argument("file1", type=str, default=None, help="File 1 to compare.")
+    P.add_argument("file2", type=str, default=None, help="File 2 to compare.")
+    P.add_argument("-o", "--outfile", type=str, default="-", 
+        help="Output file (default/'-' is stdout) \
+        if .htm or .html, create an HTML table (or a complete HTML file containing the table) \
+        showing a side by side, line by line comparison of text with inter-line \
+        and intra-line change highlights. \
+        The table can be generated in either full or contextual difference mode."
+    )
+    P.add_argument("-N", "--context-lines", default=3, type=int, help = "context lines shown")
+    P.add_argument("-d", "--debug", default=False, action="store_true", help="Debug output.")
+
+    # params for pdf2txt
+    P.add_argument("-p", "--pagenos", type=str, help="Comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.")
+    P.add_argument("--page-numbers", type=int, default=None, nargs="+", help="Alternative to --pagenos with space-separated numbers; supercedes --pagenos where it is used.")
+    P.add_argument("-m", "--maxpages", type=int, default=0, help = "Maximum pages to parse")
+    P.add_argument("-P", "--password", type=str, default="", help = "Decryption password for both PDFs")
+    P.add_argument("-t", "--output_type", type=str, default="text", help = "pdf2txt type: text|html|xml|tag (default is text)")
+    P.add_argument("-c", "--codec", type=str, default="utf-8", help = "Text encoding")
+    P.add_argument("-s", "--scale", type=float, default=1.0, help = "Scale")
+    P.add_argument("-A", "--all-texts", default=None, action="store_true", help="LAParams all texts")
+    P.add_argument("-V", "--detect-vertical", default=None, action="store_true", help="LAParams detect vertical")
+    P.add_argument("-W", "--word-margin", type=float, default=None, help = "LAParams word margin")
+    P.add_argument("-M", "--char-margin", type=float, default=None, help = "LAParams char margin")
+    P.add_argument("-L", "--line-margin", type=float, default=None, help = "LAParams line margin")
+    P.add_argument("-F", "--boxes-flow", type=float, default=None, help = "LAParams boxes flow")
+    P.add_argument("-Y", "--layoutmode", default="normal", type=str, help="HTML Layout Mode")
+    P.add_argument("-n", "--no-laparams", default=False, action="store_true", help = "Pass None as LAParams")
+    P.add_argument("-R", "--rotation", default=0, type=int, help = "Rotation")
+    P.add_argument("-O", "--output-dir", default=None, help="Output directory for images")
+    P.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching")
+    P.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode")
+    
+
+    A = P.parse_args(args=args)
+
+    if A.page_numbers:
+        A.page_numbers = set([x-1 for x in A.page_numbers])
+    if A.pagenos:
+        A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")])
+
+    if six.PY2 and sys.stdin.encoding:
+        A.password = A.password.decode(sys.stdin.encoding)
+
+    if A.output_type == "text" and A.outfile != "-":
+        for override, alttype in (  (".htm",  "html"),
+                                    (".html", "html"),
+                                    (".xml",  "xml" ),
+                                    (".tag",  "tag" ) ):
+            if A.outfile.endswith(override):
+                A.output_type = alttype
+
+    if A.outfile == "-":
+        outfp = sys.stdout
+    else:
+        outfp = open(A.outfile, "w", encoding='utf-8')
+    outfp.writelines(compare(**vars(A)))
+    outfp.close()
+    return 0
+
+
+if __name__ == '__main__': sys.exit(main())
diff --git a/tools/pdfdiff.spec b/tools/pdfdiff.spec
index e90a37f..6872b32 100644
--- a/tools/pdfdiff.spec
+++ b/tools/pdfdiff.spec
@@ -1,29 +1,29 @@
-# -*- mode: python -*-
-
-block_cipher = None
-
-
-a = Analysis(['pdfdiff.py'],
-             pathex=['C:\\Dev\\Python\\pdfminer.six\\tools'],
-             binaries=[],
-             datas=[],
-             hiddenimports=[],
-             hookspath=[],
-             runtime_hooks=[],
-             excludes=['django','matplotlib','PIL','numpy','qt5'],
-             win_no_prefer_redirects=False,
-             win_private_assemblies=False,
-             cipher=block_cipher)
-pyz = PYZ(a.pure, a.zipped_data,
-             cipher=block_cipher)
-exe = EXE(pyz,
-          a.scripts,
-          a.binaries,
-          a.zipfiles,
-          a.datas,
-          name='pdfdiff',
-          debug=False,
-          strip=False,
-          upx=True,
-          runtime_tmpdir=None,
-          console=True )
+# -*- mode: python -*-
+
+block_cipher = None
+
+
+a = Analysis(['pdfdiff.py'],
+             pathex=['C:\\Dev\\Python\\pdfminer.six\\tools'],
+             binaries=[],
+             datas=[],
+             hiddenimports=[],
+             hookspath=[],
+             runtime_hooks=[],
+             excludes=['django','matplotlib','PIL','numpy','qt5'],
+             win_no_prefer_redirects=False,
+             win_private_assemblies=False,
+             cipher=block_cipher)
+pyz = PYZ(a.pure, a.zipped_data,
+             cipher=block_cipher)
+exe = EXE(pyz,
+          a.scripts,
+          a.binaries,
+          a.zipfiles,
+          a.datas,
+          name='pdfdiff',
+          debug=False,
+          strip=False,
+          upx=True,
+          runtime_tmpdir=None,
+          console=True )