Added painting information (#37)

* added color support to stroking and non stroking color spaces

* extended LTCurve, LTLine and LTRect to save painting information

* modified PDFLayoutAnalyzer to populate the shapes with painting information
pull/41/head
Humberto Pereira 2016-11-08 16:01:58 -03:00 committed by Goulu
parent 0fdebc6739
commit e6ad15af79
3 changed files with 39 additions and 15 deletions

View File

@ -85,7 +85,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
(x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
if x0 == x1 or y0 == y1: if x0 == x1 or y0 == y1:
self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1))) self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1),
stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
return return
if shape == 'mlllh': if shape == 'mlllh':
# rectangle # rectangle
@ -99,14 +100,16 @@ class PDFLayoutAnalyzer(PDFTextDevice):
(x3, y3) = apply_matrix_pt(self.ctm, (x3, y3)) (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))
if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or
(y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)):
self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2))) self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2),
stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
return return
# other shapes # other shapes
pts = [] pts = []
for p in path: for p in path:
for i in range(1, len(p), 2): for i in range(1, len(p), 2):
pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1]))) pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
self.cur_item.add(LTCurve(gstate.linewidth, pts)) self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill,
evenodd, gstate.scolor, gstate.ncolor))
return return
def render_char(self, matrix, font, fontsize, scaling, rise, cid): def render_char(self, matrix, font, fontsize, scaling, rise, cid):

View File

@ -154,10 +154,15 @@ class LTComponent(LTItem):
## ##
class LTCurve(LTComponent): class LTCurve(LTComponent):
def __init__(self, linewidth, pts): def __init__(self, linewidth, pts, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
LTComponent.__init__(self, get_bound(pts)) LTComponent.__init__(self, get_bound(pts))
self.pts = pts self.pts = pts
self.linewidth = linewidth self.linewidth = linewidth
self.stroke = stroke
self.fill = fill
self.evenodd = evenodd
self.stroking_color = stroking_color
self.non_stroking_color = non_stroking_color
return return
def get_pts(self): def get_pts(self):
@ -168,8 +173,8 @@ class LTCurve(LTComponent):
## ##
class LTLine(LTCurve): class LTLine(LTCurve):
def __init__(self, linewidth, p0, p1): def __init__(self, linewidth, p0, p1, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
LTCurve.__init__(self, linewidth, [p0, p1]) LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color)
return return
@ -177,9 +182,9 @@ class LTLine(LTCurve):
## ##
class LTRect(LTCurve): class LTRect(LTCurve):
def __init__(self, linewidth, bbox): def __init__(self, linewidth, bbox, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
(x0, y0, x1, y1) = bbox (x0, y0, x1, y1) = bbox
LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color)
return return
@ -515,7 +520,7 @@ class LTLayoutContainer(LTContainer):
obj0.voverlap(obj1)) and obj0.voverlap(obj1)) and
(obj0.hdistance(obj1) < (obj0.hdistance(obj1) <
max(obj0.width, obj1.width) * laparams.char_margin)) max(obj0.width, obj1.width) * laparams.char_margin))
# valign: obj0 and obj1 is vertically aligned. # valign: obj0 and obj1 is vertically aligned.
# #
# +------+ # +------+
@ -537,7 +542,7 @@ class LTLayoutContainer(LTContainer):
obj0.hoverlap(obj1)) and obj0.hoverlap(obj1)) and
(obj0.vdistance(obj1) < (obj0.vdistance(obj1) <
max(obj0.height, obj1.height) * laparams.char_margin)) max(obj0.height, obj1.height) * laparams.char_margin))
if ((halign and isinstance(line, LTTextLineHorizontal)) or if ((halign and isinstance(line, LTTextLineHorizontal)) or
(valign and isinstance(line, LTTextLineVertical))): (valign and isinstance(line, LTTextLineVertical))):
line.add(obj1) line.add(obj1)
@ -631,7 +636,7 @@ class LTLayoutContainer(LTContainer):
def key_obj(t): def key_obj(t):
(c,d,_,_) = t (c,d,_,_) = t
return (c,d) return (c,d)
# XXX this still takes O(n^2) :( # XXX this still takes O(n^2) :(
dists = [] dists = []
for i in range(len(boxes)): for i in range(len(boxes)):

View File

@ -109,6 +109,12 @@ class PDFGraphicState(object):
self.dash = None self.dash = None
self.intent = None self.intent = None
self.flatness = None self.flatness = None
# stroking color
self.scolor = None
# non stroking color
self.ncolor = None
return return
def copy(self): def copy(self):
@ -120,13 +126,17 @@ class PDFGraphicState(object):
obj.dash = self.dash obj.dash = self.dash
obj.intent = self.intent obj.intent = self.intent
obj.flatness = self.flatness obj.flatness = self.flatness
obj.scolor = self.scolor
obj.ncolor = self.ncolor
return obj return obj
def __repr__(self): def __repr__(self):
return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, ' return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
' miterlimit=%r, dash=%r, intent=%r, flatness=%r>' % ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
' stroking color=%r, non stroking color=%r>' %
(self.linewidth, self.linecap, self.linejoin, (self.linewidth, self.linecap, self.linejoin,
self.miterlimit, self.dash, self.intent, self.flatness)) self.miterlimit, self.dash, self.intent, self.flatness,
self.scolor, self.ncolor))
## Resource Manager ## Resource Manager
@ -576,31 +586,37 @@ class PDFPageInterpreter(object):
# setgray-stroking # setgray-stroking
def do_G(self, gray): def do_G(self, gray):
self.graphicstate.color = gray
#self.do_CS(LITERAL_DEVICE_GRAY) #self.do_CS(LITERAL_DEVICE_GRAY)
return return
# setgray-non-stroking # setgray-non-stroking
def do_g(self, gray): def do_g(self, gray):
self.graphicstate.color = gray
#self.do_cs(LITERAL_DEVICE_GRAY) #self.do_cs(LITERAL_DEVICE_GRAY)
return return
# setrgb-stroking # setrgb-stroking
def do_RG(self, r, g, b): def do_RG(self, r, g, b):
self.graphicstate.color = (r, g, b)
#self.do_CS(LITERAL_DEVICE_RGB) #self.do_CS(LITERAL_DEVICE_RGB)
return return
# setrgb-non-stroking # setrgb-non-stroking
def do_rg(self, r, g, b): def do_rg(self, r, g, b):
self.graphicstate.color = (r, g, b)
#self.do_cs(LITERAL_DEVICE_RGB) #self.do_cs(LITERAL_DEVICE_RGB)
return return
# setcmyk-stroking # setcmyk-stroking
def do_K(self, c, m, y, k): def do_K(self, c, m, y, k):
self.graphicstate.color = (c, m, y, k)
#self.do_CS(LITERAL_DEVICE_CMYK) #self.do_CS(LITERAL_DEVICE_CMYK)
return return
# setcmyk-non-stroking # setcmyk-non-stroking
def do_k(self, c, m, y, k): def do_k(self, c, m, y, k):
self.graphicstate.color = (c, m, y, k)
#self.do_cs(LITERAL_DEVICE_CMYK) #self.do_cs(LITERAL_DEVICE_CMYK)
return return
@ -612,7 +628,7 @@ class PDFPageInterpreter(object):
if settings.STRICT: if settings.STRICT:
raise PDFInterpreterError('No colorspace specified!') raise PDFInterpreterError('No colorspace specified!')
n = 1 n = 1
self.pop(n) self.graphicstate.scolor = self.pop(n)
return return
def do_scn(self): def do_scn(self):
@ -622,7 +638,7 @@ class PDFPageInterpreter(object):
if settings.STRICT: if settings.STRICT:
raise PDFInterpreterError('No colorspace specified!') raise PDFInterpreterError('No colorspace specified!')
n = 1 n = 1
self.pop(n) self.graphicstate.ncolor = self.pop(n)
return return
def do_SC(self): def do_SC(self):