Added painting information (#37)

* added color support to stroking and non stroking color spaces * extended LTCurve, LTLine and LTRect to save painting information * modified PDFLayoutAnalyzer to populate the shapes with painting information
2016-11-08 16:01:58 -03:00 · 2016-11-08 16:01:58 -03:00 · e6ad15af79
parent 0fdebc6739
commit e6ad15af79
3 changed files with 39 additions and 15 deletions
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@ -85,7 +85,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
            (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
            (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
            if x0 == x1 or y0 == y1:
-                self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1)))
+                self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1),
+                    stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
                return
        if shape == 'mlllh':
            # rectangle
@ -99,14 +100,16 @@ class PDFLayoutAnalyzer(PDFTextDevice):
            (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))
            if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or
                (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)):
-                self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2)))
+                self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2),
+                    stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
                return
        # other shapes
        pts = []
        for p in path:
            for i in range(1, len(p), 2):
                pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
-        self.cur_item.add(LTCurve(gstate.linewidth, pts))
+        self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill,
+            evenodd, gstate.scolor, gstate.ncolor))
        return

    def render_char(self, matrix, font, fontsize, scaling, rise, cid):
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@ -154,10 +154,15 @@ class LTComponent(LTItem):
 ##
 class LTCurve(LTComponent):

-    def __init__(self, linewidth, pts):
+    def __init__(self, linewidth, pts, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
        LTComponent.__init__(self, get_bound(pts))
        self.pts = pts
        self.linewidth = linewidth
+        self.stroke = stroke
+        self.fill = fill
+        self.evenodd = evenodd
+        self.stroking_color = stroking_color
+        self.non_stroking_color = non_stroking_color
        return

    def get_pts(self):
@ -168,8 +173,8 @@ class LTCurve(LTComponent):
 ##
 class LTLine(LTCurve):

-    def __init__(self, linewidth, p0, p1):
-        LTCurve.__init__(self, linewidth, [p0, p1])
+    def __init__(self, linewidth, p0, p1, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
+        LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color)
        return


@ -177,9 +182,9 @@ class LTLine(LTCurve):
 ##
 class LTRect(LTCurve):

-    def __init__(self, linewidth, bbox):
+    def __init__(self, linewidth, bbox, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
        (x0, y0, x1, y1) = bbox
-        LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)])
+        LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color)
        return


@ -515,7 +520,7 @@ class LTLayoutContainer(LTContainer):
                           obj0.voverlap(obj1)) and
                          (obj0.hdistance(obj1) <
                           max(obj0.width, obj1.width) * laparams.char_margin))
-                
+
                # valign: obj0 and obj1 is vertically aligned.
                #
                #   +------+
@ -537,7 +542,7 @@ class LTLayoutContainer(LTContainer):
                           obj0.hoverlap(obj1)) and
                          (obj0.vdistance(obj1) <
                           max(obj0.height, obj1.height) * laparams.char_margin))
-                
+
                if ((halign and isinstance(line, LTTextLineHorizontal)) or
                    (valign and isinstance(line, LTTextLineVertical))):
                    line.add(obj1)
@ -631,7 +636,7 @@ class LTLayoutContainer(LTContainer):
        def key_obj(t):
            (c,d,_,_) = t
            return (c,d)
-        
+
        # XXX this still takes O(n^2)  :(
        dists = []
        for i in range(len(boxes)):
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@ -109,6 +109,12 @@ class PDFGraphicState(object):
        self.dash = None
        self.intent = None
        self.flatness = None
+
+        # stroking color
+        self.scolor = None
+
+        # non stroking color
+        self.ncolor = None
        return

    def copy(self):
@ -120,13 +126,17 @@ class PDFGraphicState(object):
        obj.dash = self.dash
        obj.intent = self.intent
        obj.flatness = self.flatness
+        obj.scolor = self.scolor
+        obj.ncolor = self.ncolor
        return obj

    def __repr__(self):
        return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
-                ' miterlimit=%r, dash=%r, intent=%r, flatness=%r>' %
+                ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
+                ' stroking color=%r, non stroking color=%r>' %
                (self.linewidth, self.linecap, self.linejoin,
-                 self.miterlimit, self.dash, self.intent, self.flatness))
+                 self.miterlimit, self.dash, self.intent, self.flatness,
+                 self.scolor, self.ncolor))


 ##  Resource Manager
@ -576,31 +586,37 @@ class PDFPageInterpreter(object):

    # setgray-stroking
    def do_G(self, gray):
+        self.graphicstate.color = gray
        #self.do_CS(LITERAL_DEVICE_GRAY)
        return

    # setgray-non-stroking
    def do_g(self, gray):
+        self.graphicstate.color = gray
        #self.do_cs(LITERAL_DEVICE_GRAY)
        return

    # setrgb-stroking
    def do_RG(self, r, g, b):
+        self.graphicstate.color = (r, g, b)
        #self.do_CS(LITERAL_DEVICE_RGB)
        return

    # setrgb-non-stroking
    def do_rg(self, r, g, b):
+        self.graphicstate.color = (r, g, b)
        #self.do_cs(LITERAL_DEVICE_RGB)
        return

    # setcmyk-stroking
    def do_K(self, c, m, y, k):
+        self.graphicstate.color = (c, m, y, k)
        #self.do_CS(LITERAL_DEVICE_CMYK)
        return

    # setcmyk-non-stroking
    def do_k(self, c, m, y, k):
+        self.graphicstate.color = (c, m, y, k)
        #self.do_cs(LITERAL_DEVICE_CMYK)
        return

@ -612,7 +628,7 @@ class PDFPageInterpreter(object):
            if settings.STRICT:
                raise PDFInterpreterError('No colorspace specified!')
            n = 1
-        self.pop(n)
+        self.graphicstate.scolor = self.pop(n)
        return

    def do_scn(self):
@ -622,7 +638,7 @@ class PDFPageInterpreter(object):
            if settings.STRICT:
                raise PDFInterpreterError('No colorspace specified!')
            n = 1
-        self.pop(n)
+        self.graphicstate.ncolor = self.pop(n)
        return

    def do_SC(self):