Added painting information (#37)

* added color support to stroking and non stroking color spaces

* extended LTCurve, LTLine and LTRect to save painting information

* modified PDFLayoutAnalyzer to populate the shapes with painting information
pull/41/head
Humberto Pereira 2016-11-08 16:01:58 -03:00 committed by Goulu
parent 0fdebc6739
commit e6ad15af79
3 changed files with 39 additions and 15 deletions

View File

@ -85,7 +85,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
(x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
if x0 == x1 or y0 == y1:
self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1)))
self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1),
stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
return
if shape == 'mlllh':
# rectangle
@ -99,14 +100,16 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))
if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or
(y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)):
self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2)))
self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2),
stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
return
# other shapes
pts = []
for p in path:
for i in range(1, len(p), 2):
pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
self.cur_item.add(LTCurve(gstate.linewidth, pts))
self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill,
evenodd, gstate.scolor, gstate.ncolor))
return
def render_char(self, matrix, font, fontsize, scaling, rise, cid):

View File

@ -154,10 +154,15 @@ class LTComponent(LTItem):
##
class LTCurve(LTComponent):
def __init__(self, linewidth, pts):
def __init__(self, linewidth, pts, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
LTComponent.__init__(self, get_bound(pts))
self.pts = pts
self.linewidth = linewidth
self.stroke = stroke
self.fill = fill
self.evenodd = evenodd
self.stroking_color = stroking_color
self.non_stroking_color = non_stroking_color
return
def get_pts(self):
@ -168,8 +173,8 @@ class LTCurve(LTComponent):
##
class LTLine(LTCurve):
def __init__(self, linewidth, p0, p1):
LTCurve.__init__(self, linewidth, [p0, p1])
def __init__(self, linewidth, p0, p1, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color)
return
@ -177,9 +182,9 @@ class LTLine(LTCurve):
##
class LTRect(LTCurve):
def __init__(self, linewidth, bbox):
def __init__(self, linewidth, bbox, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
(x0, y0, x1, y1) = bbox
LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)])
LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color)
return
@ -515,7 +520,7 @@ class LTLayoutContainer(LTContainer):
obj0.voverlap(obj1)) and
(obj0.hdistance(obj1) <
max(obj0.width, obj1.width) * laparams.char_margin))
# valign: obj0 and obj1 is vertically aligned.
#
# +------+
@ -537,7 +542,7 @@ class LTLayoutContainer(LTContainer):
obj0.hoverlap(obj1)) and
(obj0.vdistance(obj1) <
max(obj0.height, obj1.height) * laparams.char_margin))
if ((halign and isinstance(line, LTTextLineHorizontal)) or
(valign and isinstance(line, LTTextLineVertical))):
line.add(obj1)
@ -631,7 +636,7 @@ class LTLayoutContainer(LTContainer):
def key_obj(t):
(c,d,_,_) = t
return (c,d)
# XXX this still takes O(n^2) :(
dists = []
for i in range(len(boxes)):

View File

@ -109,6 +109,12 @@ class PDFGraphicState(object):
self.dash = None
self.intent = None
self.flatness = None
# stroking color
self.scolor = None
# non stroking color
self.ncolor = None
return
def copy(self):
@ -120,13 +126,17 @@ class PDFGraphicState(object):
obj.dash = self.dash
obj.intent = self.intent
obj.flatness = self.flatness
obj.scolor = self.scolor
obj.ncolor = self.ncolor
return obj
def __repr__(self):
return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
' miterlimit=%r, dash=%r, intent=%r, flatness=%r>' %
' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
' stroking color=%r, non stroking color=%r>' %
(self.linewidth, self.linecap, self.linejoin,
self.miterlimit, self.dash, self.intent, self.flatness))
self.miterlimit, self.dash, self.intent, self.flatness,
self.scolor, self.ncolor))
## Resource Manager
@ -576,31 +586,37 @@ class PDFPageInterpreter(object):
# setgray-stroking
def do_G(self, gray):
self.graphicstate.color = gray
#self.do_CS(LITERAL_DEVICE_GRAY)
return
# setgray-non-stroking
def do_g(self, gray):
self.graphicstate.color = gray
#self.do_cs(LITERAL_DEVICE_GRAY)
return
# setrgb-stroking
def do_RG(self, r, g, b):
self.graphicstate.color = (r, g, b)
#self.do_CS(LITERAL_DEVICE_RGB)
return
# setrgb-non-stroking
def do_rg(self, r, g, b):
self.graphicstate.color = (r, g, b)
#self.do_cs(LITERAL_DEVICE_RGB)
return
# setcmyk-stroking
def do_K(self, c, m, y, k):
self.graphicstate.color = (c, m, y, k)
#self.do_CS(LITERAL_DEVICE_CMYK)
return
# setcmyk-non-stroking
def do_k(self, c, m, y, k):
self.graphicstate.color = (c, m, y, k)
#self.do_cs(LITERAL_DEVICE_CMYK)
return
@ -612,7 +628,7 @@ class PDFPageInterpreter(object):
if settings.STRICT:
raise PDFInterpreterError('No colorspace specified!')
n = 1
self.pop(n)
self.graphicstate.scolor = self.pop(n)
return
def do_scn(self):
@ -622,7 +638,7 @@ class PDFPageInterpreter(object):
if settings.STRICT:
raise PDFInterpreterError('No colorspace specified!')
n = 1
self.pop(n)
self.graphicstate.ncolor = self.pop(n)
return
def do_SC(self):