diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 02545e8..af70348 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -112,7 +112,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): evenodd, gstate.scolor, gstate.ncolor)) return - def render_char(self, matrix, font, fontsize, scaling, rise, cid): + def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate): try: text = font.to_unichr(cid) assert isinstance(text, six.text_type), str(type(text)) @@ -120,7 +120,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): text = self.handle_undefined_char(font, cid) textwidth = font.char_width(cid) textdisp = font.char_disp(cid) - item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth, textdisp) + item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth, textdisp, ncs, graphicstate) self.cur_item.add(item) return item.adv @@ -520,8 +520,9 @@ class XMLConverter(PDFConverter): render(child) self.write('\n') elif isinstance(item, LTChar): - self.write('' % - (enc(item.fontname, None), bbox2str(item.bbox), item.size)) + self.write('' % + (enc(item.fontname, None), bbox2str(item.bbox), + item.ncs.name, item.graphicstate.ncolor, item.size)) self.write_text(item.get_text()) self.write('\n') elif isinstance(item, LTText): diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 451d4e4..587c221 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -228,11 +228,13 @@ class LTAnno(LTItem, LTText): class LTChar(LTComponent, LTText): def __init__(self, matrix, font, fontsize, scaling, rise, - text, textwidth, textdisp): + text, textwidth, textdisp, ncs, graphicstate): LTText.__init__(self) self._text = text self.matrix = matrix self.fontname = font.fontname + self.ncs = ncs + self.graphicstate = graphicstate self.adv = textwidth * fontsize * scaling # compute the boundary rectangle. if font.is_vertical(): diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 9435101..a9799ed 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -63,7 +63,7 @@ class PDFDevice(object): ## class PDFTextDevice(PDFDevice): - def render_string(self, textstate, seq): + def render_string(self, textstate, seq, ncs, graphicstate): matrix = utils.mult_matrix(textstate.matrix, self.ctm) font = textstate.font fontsize = textstate.fontsize @@ -77,15 +77,16 @@ class PDFTextDevice(PDFDevice): if font.is_vertical(): textstate.linematrix = self.render_string_vertical( seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale) + scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) else: textstate.linematrix = self.render_string_horizontal( seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale) + scaling, charspace, wordspace, rise, dxscale, ncs, graphicstate) return def render_string_horizontal(self, seq, matrix, pos, - font, fontsize, scaling, charspace, wordspace, rise, dxscale): + font, fontsize, scaling, charspace, wordspace, + rise, dxscale, ncs, graphicstate): (x, y) = pos needcharspace = False for obj in seq: @@ -97,14 +98,16 @@ class PDFTextDevice(PDFDevice): if needcharspace: x += charspace x += self.render_char(utils.translate_matrix(matrix, (x, y)), - font, fontsize, scaling, rise, cid) + font, fontsize, scaling, rise, cid, + ncs, graphicstate) if cid == 32 and wordspace: x += wordspace needcharspace = True return (x, y) def render_string_vertical(self, seq, matrix, pos, - font, fontsize, scaling, charspace, wordspace, rise, dxscale): + font, fontsize, scaling, charspace, wordspace, + rise, dxscale, ncs, graphicstate): (x, y) = pos needcharspace = False for obj in seq: @@ -116,13 +119,14 @@ class PDFTextDevice(PDFDevice): if needcharspace: y += charspace y += self.render_char(utils.translate_matrix(matrix, (x, y)), - font, fontsize, scaling, rise, cid) + font, fontsize, scaling, rise, cid, + ncs, graphicstate) if cid == 32 and wordspace: y += wordspace needcharspace = True return (x, y) - def render_char(self, matrix, font, fontsize, scaling, rise, cid): + def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs, graphicstate): return 0 diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 0c2328d..a14f64a 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -586,13 +586,13 @@ class PDFPageInterpreter(object): # setgray-stroking def do_G(self, gray): - self.graphicstate.color = gray + self.graphicstate.scolor = gray #self.do_CS(LITERAL_DEVICE_GRAY) return # setgray-non-stroking def do_g(self, gray): - self.graphicstate.color = gray + self.graphicstate.ncolor = gray #self.do_cs(LITERAL_DEVICE_GRAY) return @@ -769,7 +769,7 @@ class PDFPageInterpreter(object): if settings.STRICT: raise PDFInterpreterError('No font specified!') return - self.device.render_string(self.textstate, seq) + self.device.render_string(self.textstate, seq, self.ncs, self.graphicstate.copy()) return # show