diff --git a/pdfminer/converter.py b/pdfminer/converter.py index c92f57d..1e15542 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -85,7 +85,8 @@ class PDFLayoutAnalyzer(PDFTextDevice): (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) if x0 == x1 or y0 == y1: - self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1))) + self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1), + stroke, fill, evenodd, gstate.scolor, gstate.ncolor)) return if shape == 'mlllh': # rectangle @@ -99,14 +100,16 @@ class PDFLayoutAnalyzer(PDFTextDevice): (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): - self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2))) + self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2), + stroke, fill, evenodd, gstate.scolor, gstate.ncolor)) return # other shapes pts = [] for p in path: for i in range(1, len(p), 2): pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1]))) - self.cur_item.add(LTCurve(gstate.linewidth, pts)) + self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill, + evenodd, gstate.scolor, gstate.ncolor)) return def render_char(self, matrix, font, fontsize, scaling, rise, cid): diff --git a/pdfminer/layout.py b/pdfminer/layout.py index f0fe222..88fe370 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -154,10 +154,15 @@ class LTComponent(LTItem): ## class LTCurve(LTComponent): - def __init__(self, linewidth, pts): + def __init__(self, linewidth, pts, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None): LTComponent.__init__(self, get_bound(pts)) self.pts = pts self.linewidth = linewidth + self.stroke = stroke + self.fill = fill + self.evenodd = evenodd + self.stroking_color = stroking_color + self.non_stroking_color = non_stroking_color return def get_pts(self): @@ -168,8 +173,8 @@ class LTCurve(LTComponent): ## class LTLine(LTCurve): - def __init__(self, linewidth, p0, p1): - LTCurve.__init__(self, linewidth, [p0, p1]) + def __init__(self, linewidth, p0, p1, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None): + LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color) return @@ -177,9 +182,9 @@ class LTLine(LTCurve): ## class LTRect(LTCurve): - def __init__(self, linewidth, bbox): + def __init__(self, linewidth, bbox, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None): (x0, y0, x1, y1) = bbox - LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) + LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color) return @@ -515,7 +520,7 @@ class LTLayoutContainer(LTContainer): obj0.voverlap(obj1)) and (obj0.hdistance(obj1) < max(obj0.width, obj1.width) * laparams.char_margin)) - + # valign: obj0 and obj1 is vertically aligned. # # +------+ @@ -537,7 +542,7 @@ class LTLayoutContainer(LTContainer): obj0.hoverlap(obj1)) and (obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin)) - + if ((halign and isinstance(line, LTTextLineHorizontal)) or (valign and isinstance(line, LTTextLineVertical))): line.add(obj1) @@ -631,7 +636,7 @@ class LTLayoutContainer(LTContainer): def key_obj(t): (c,d,_,_) = t return (c,d) - + # XXX this still takes O(n^2) :( dists = [] for i in range(len(boxes)): diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index e7ba104..255dd93 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -109,6 +109,12 @@ class PDFGraphicState(object): self.dash = None self.intent = None self.flatness = None + + # stroking color + self.scolor = None + + # non stroking color + self.ncolor = None return def copy(self): @@ -120,13 +126,17 @@ class PDFGraphicState(object): obj.dash = self.dash obj.intent = self.intent obj.flatness = self.flatness + obj.scolor = self.scolor + obj.ncolor = self.ncolor return obj def __repr__(self): return ('' % + ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, ' + ' stroking color=%r, non stroking color=%r>' % (self.linewidth, self.linecap, self.linejoin, - self.miterlimit, self.dash, self.intent, self.flatness)) + self.miterlimit, self.dash, self.intent, self.flatness, + self.scolor, self.ncolor)) ## Resource Manager @@ -576,31 +586,37 @@ class PDFPageInterpreter(object): # setgray-stroking def do_G(self, gray): + self.graphicstate.color = gray #self.do_CS(LITERAL_DEVICE_GRAY) return # setgray-non-stroking def do_g(self, gray): + self.graphicstate.color = gray #self.do_cs(LITERAL_DEVICE_GRAY) return # setrgb-stroking def do_RG(self, r, g, b): + self.graphicstate.color = (r, g, b) #self.do_CS(LITERAL_DEVICE_RGB) return # setrgb-non-stroking def do_rg(self, r, g, b): + self.graphicstate.color = (r, g, b) #self.do_cs(LITERAL_DEVICE_RGB) return # setcmyk-stroking def do_K(self, c, m, y, k): + self.graphicstate.color = (c, m, y, k) #self.do_CS(LITERAL_DEVICE_CMYK) return # setcmyk-non-stroking def do_k(self, c, m, y, k): + self.graphicstate.color = (c, m, y, k) #self.do_cs(LITERAL_DEVICE_CMYK) return @@ -612,7 +628,7 @@ class PDFPageInterpreter(object): if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 - self.pop(n) + self.graphicstate.scolor = self.pop(n) return def do_scn(self): @@ -622,7 +638,7 @@ class PDFPageInterpreter(object): if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 - self.pop(n) + self.graphicstate.ncolor = self.pop(n) return def do_SC(self):