From 1db260609ecfbf701e260e328c85efc36c03ceb0 Mon Sep 17 00:00:00 2001 From: Goulu Date: Thu, 21 Jun 2018 10:21:26 +0200 Subject: [PATCH] render_string must have 5 params in all PDFDevice classes (#158) --- pdfminer/pdfdevice.py | 394 +++++++++++++++++++++--------------------- 1 file changed, 197 insertions(+), 197 deletions(-) diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index ed54fd2..03a5f0e 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -1,197 +1,197 @@ -# -*- coding: utf-8 -*- - -import six - -from .pdffont import PDFUnicodeNotDefined - -from . import utils - -## PDFDevice -## -class PDFDevice(object): - - def __init__(self, rsrcmgr): - self.rsrcmgr = rsrcmgr - self.ctm = None - return - - def __repr__(self): - return '' - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - - def close(self): - return - - def set_ctm(self, ctm): - self.ctm = ctm - return - - def begin_tag(self, tag, props=None): - return - - def end_tag(self): - return - - def do_tag(self, tag, props=None): - return - - def begin_page(self, page, ctm): - return - - def end_page(self, page): - return - - def begin_figure(self, name, bbox, matrix): - return - - def end_figure(self, name): - return - - def paint_path(self, graphicstate, stroke, fill, evenodd, path): - return - - def render_image(self, name, stream): - return - - def render_string(self, textstate, seq): - return - - -## PDFTextDevice -## -class PDFTextDevice(PDFDevice): - - def render_string(self, textstate, seq, ncs, graphicstate): - matrix = utils.mult_matrix(textstate.matrix, self.ctm) - font = textstate.font - fontsize = textstate.fontsize - scaling = textstate.scaling * .01 - charspace = textstate.charspace * scaling - wordspace = textstate.wordspace * scaling - rise = textstate.rise - if font.is_multibyte(): - wordspace = 0 - dxscale = .001 * fontsize * scaling - if font.is_vertical(): - textstate.linematrix = self.render_string_vertical( - seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) - else: - textstate.linematrix = self.render_string_horizontal( - seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) - return - - def render_string_horizontal(self, seq, matrix, pos, - font, fontsize, scaling, charspace, wordspace, - rise, dxscale, ncs, graphicstate): - (x, y) = pos - needcharspace = False - for obj in seq: - if utils.isnumber(obj): - x -= obj*dxscale - needcharspace = True - else: - for cid in font.decode(obj): - if needcharspace: - x += charspace - x += self.render_char(utils.translate_matrix(matrix, (x, y)), - font, fontsize, scaling, rise, cid, - ncs, graphicstate) - if cid == 32 and wordspace: - x += wordspace - needcharspace = True - return (x, y) - - def render_string_vertical(self, seq, matrix, pos, - font, fontsize, scaling, charspace, wordspace, - rise, dxscale, ncs, graphicstate): - (x, y) = pos - needcharspace = False - for obj in seq: - if utils.isnumber(obj): - y -= obj*dxscale - needcharspace = True - else: - for cid in font.decode(obj): - if needcharspace: - y += charspace - y += self.render_char(utils.translate_matrix(matrix, (x, y)), - font, fontsize, scaling, rise, cid, - ncs, graphicstate) - if cid == 32 and wordspace: - y += wordspace - needcharspace = True - return (x, y) - - def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate): - return 0 - - -## TagExtractor -## -class TagExtractor(PDFDevice): - - def __init__(self, rsrcmgr, outfp, codec='utf-8'): - PDFDevice.__init__(self, rsrcmgr) - self.outfp = outfp - self.codec = codec - self.pageno = 0 - self._stack = [] - return - - def render_string(self, textstate, seq): - font = textstate.font - text = '' - for obj in seq: - if isinstance(obj, six.text_type): - obj = utils.make_compat_bytes(obj) - if not isinstance(obj, six.binary_type): - continue - chars = font.decode(obj) - for cid in chars: - try: - char = font.to_unichr(cid) - text += char - except PDFUnicodeNotDefined: - print(chars) - pass - self.outfp.write(utils.enc(text, self.codec)) - return - - def begin_page(self, page, ctm): - output = '' % (self.pageno, utils.bbox2str(page.mediabox), page.rotate) - self.outfp.write(utils.make_compat_bytes(output)) - return - - def end_page(self, page): - self.outfp.write(utils.make_compat_bytes('\n')) - self.pageno += 1 - return - - def begin_tag(self, tag, props=None): - s = '' - if isinstance(props, dict): - s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v) - in sorted(props.iteritems())) - out_s = '<%s%s>' % (utils.enc(tag.name), s) - self.outfp.write(utils.make_compat_bytes(out_s)) - self._stack.append(tag) - return - - def end_tag(self): - assert self._stack, str(self.pageno) - tag = self._stack.pop(-1) - out_s = '' % utils.enc(tag.name) - self.outfp.write(utils.make_compat_bytes(out_s)) - return - - def do_tag(self, tag, props=None): - self.begin_tag(tag, props) - self._stack.pop(-1) - return +# -*- coding: utf-8 -*- + +import six + +from .pdffont import PDFUnicodeNotDefined + +from . import utils + +## PDFDevice +## +class PDFDevice(object): + + def __init__(self, rsrcmgr): + self.rsrcmgr = rsrcmgr + self.ctm = None + return + + def __repr__(self): + return '' + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + return + + def set_ctm(self, ctm): + self.ctm = ctm + return + + def begin_tag(self, tag, props=None): + return + + def end_tag(self): + return + + def do_tag(self, tag, props=None): + return + + def begin_page(self, page, ctm): + return + + def end_page(self, page): + return + + def begin_figure(self, name, bbox, matrix): + return + + def end_figure(self, name): + return + + def paint_path(self, graphicstate, stroke, fill, evenodd, path): + return + + def render_image(self, name, stream): + return + + def render_string(self, textstate, seq, ncs, graphicstate): + return + + +## PDFTextDevice +## +class PDFTextDevice(PDFDevice): + + def render_string(self, textstate, seq, ncs, graphicstate): + matrix = utils.mult_matrix(textstate.matrix, self.ctm) + font = textstate.font + fontsize = textstate.fontsize + scaling = textstate.scaling * .01 + charspace = textstate.charspace * scaling + wordspace = textstate.wordspace * scaling + rise = textstate.rise + if font.is_multibyte(): + wordspace = 0 + dxscale = .001 * fontsize * scaling + if font.is_vertical(): + textstate.linematrix = self.render_string_vertical( + seq, matrix, textstate.linematrix, font, fontsize, + scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) + else: + textstate.linematrix = self.render_string_horizontal( + seq, matrix, textstate.linematrix, font, fontsize, + scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) + return + + def render_string_horizontal(self, seq, matrix, pos, + font, fontsize, scaling, charspace, wordspace, + rise, dxscale, ncs, graphicstate): + (x, y) = pos + needcharspace = False + for obj in seq: + if utils.isnumber(obj): + x -= obj*dxscale + needcharspace = True + else: + for cid in font.decode(obj): + if needcharspace: + x += charspace + x += self.render_char(utils.translate_matrix(matrix, (x, y)), + font, fontsize, scaling, rise, cid, + ncs, graphicstate) + if cid == 32 and wordspace: + x += wordspace + needcharspace = True + return (x, y) + + def render_string_vertical(self, seq, matrix, pos, + font, fontsize, scaling, charspace, wordspace, + rise, dxscale, ncs, graphicstate): + (x, y) = pos + needcharspace = False + for obj in seq: + if utils.isnumber(obj): + y -= obj*dxscale + needcharspace = True + else: + for cid in font.decode(obj): + if needcharspace: + y += charspace + y += self.render_char(utils.translate_matrix(matrix, (x, y)), + font, fontsize, scaling, rise, cid, + ncs, graphicstate) + if cid == 32 and wordspace: + y += wordspace + needcharspace = True + return (x, y) + + def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate): + return 0 + + +## TagExtractor +## +class TagExtractor(PDFDevice): + + def __init__(self, rsrcmgr, outfp, codec='utf-8'): + PDFDevice.__init__(self, rsrcmgr) + self.outfp = outfp + self.codec = codec + self.pageno = 0 + self._stack = [] + return + + def render_string(self, textstate, seq, ncs, graphicstate): + font = textstate.font + text = '' + for obj in seq: + if isinstance(obj, six.text_type): + obj = utils.make_compat_bytes(obj) + if not isinstance(obj, six.binary_type): + continue + chars = font.decode(obj) + for cid in chars: + try: + char = font.to_unichr(cid) + text += char + except PDFUnicodeNotDefined: + print(chars) + pass + self.outfp.write(utils.enc(text, self.codec)) + return + + def begin_page(self, page, ctm): + output = '' % (self.pageno, utils.bbox2str(page.mediabox), page.rotate) + self.outfp.write(utils.make_compat_bytes(output)) + return + + def end_page(self, page): + self.outfp.write(utils.make_compat_bytes('\n')) + self.pageno += 1 + return + + def begin_tag(self, tag, props=None): + s = '' + if isinstance(props, dict): + s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v) + in sorted(props.iteritems())) + out_s = '<%s%s>' % (utils.enc(tag.name), s) + self.outfp.write(utils.make_compat_bytes(out_s)) + self._stack.append(tag) + return + + def end_tag(self): + assert self._stack, str(self.pageno) + tag = self._stack.pop(-1) + out_s = '' % utils.enc(tag.name) + self.outfp.write(utils.make_compat_bytes(out_s)) + return + + def do_tag(self, tag, props=None): + self.begin_tag(tag, props) + self._stack.pop(-1) + return