Added painting information (#37)
* added color support to stroking and non stroking color spaces * extended LTCurve, LTLine and LTRect to save painting information * modified PDFLayoutAnalyzer to populate the shapes with painting informationpull/41/head
parent
0fdebc6739
commit
e6ad15af79
|
@ -85,7 +85,8 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
||||||
(x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
|
(x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
|
||||||
(x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
|
(x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
|
||||||
if x0 == x1 or y0 == y1:
|
if x0 == x1 or y0 == y1:
|
||||||
self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1)))
|
self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1),
|
||||||
|
stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
|
||||||
return
|
return
|
||||||
if shape == 'mlllh':
|
if shape == 'mlllh':
|
||||||
# rectangle
|
# rectangle
|
||||||
|
@ -99,14 +100,16 @@ class PDFLayoutAnalyzer(PDFTextDevice):
|
||||||
(x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))
|
(x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))
|
||||||
if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or
|
if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or
|
||||||
(y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)):
|
(y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)):
|
||||||
self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2)))
|
self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2),
|
||||||
|
stroke, fill, evenodd, gstate.scolor, gstate.ncolor))
|
||||||
return
|
return
|
||||||
# other shapes
|
# other shapes
|
||||||
pts = []
|
pts = []
|
||||||
for p in path:
|
for p in path:
|
||||||
for i in range(1, len(p), 2):
|
for i in range(1, len(p), 2):
|
||||||
pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
|
pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
|
||||||
self.cur_item.add(LTCurve(gstate.linewidth, pts))
|
self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill,
|
||||||
|
evenodd, gstate.scolor, gstate.ncolor))
|
||||||
return
|
return
|
||||||
|
|
||||||
def render_char(self, matrix, font, fontsize, scaling, rise, cid):
|
def render_char(self, matrix, font, fontsize, scaling, rise, cid):
|
||||||
|
|
|
@ -154,10 +154,15 @@ class LTComponent(LTItem):
|
||||||
##
|
##
|
||||||
class LTCurve(LTComponent):
|
class LTCurve(LTComponent):
|
||||||
|
|
||||||
def __init__(self, linewidth, pts):
|
def __init__(self, linewidth, pts, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
|
||||||
LTComponent.__init__(self, get_bound(pts))
|
LTComponent.__init__(self, get_bound(pts))
|
||||||
self.pts = pts
|
self.pts = pts
|
||||||
self.linewidth = linewidth
|
self.linewidth = linewidth
|
||||||
|
self.stroke = stroke
|
||||||
|
self.fill = fill
|
||||||
|
self.evenodd = evenodd
|
||||||
|
self.stroking_color = stroking_color
|
||||||
|
self.non_stroking_color = non_stroking_color
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_pts(self):
|
def get_pts(self):
|
||||||
|
@ -168,8 +173,8 @@ class LTCurve(LTComponent):
|
||||||
##
|
##
|
||||||
class LTLine(LTCurve):
|
class LTLine(LTCurve):
|
||||||
|
|
||||||
def __init__(self, linewidth, p0, p1):
|
def __init__(self, linewidth, p0, p1, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
|
||||||
LTCurve.__init__(self, linewidth, [p0, p1])
|
LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@ -177,9 +182,9 @@ class LTLine(LTCurve):
|
||||||
##
|
##
|
||||||
class LTRect(LTCurve):
|
class LTRect(LTCurve):
|
||||||
|
|
||||||
def __init__(self, linewidth, bbox):
|
def __init__(self, linewidth, bbox, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
|
||||||
(x0, y0, x1, y1) = bbox
|
(x0, y0, x1, y1) = bbox
|
||||||
LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)])
|
LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@ -515,7 +520,7 @@ class LTLayoutContainer(LTContainer):
|
||||||
obj0.voverlap(obj1)) and
|
obj0.voverlap(obj1)) and
|
||||||
(obj0.hdistance(obj1) <
|
(obj0.hdistance(obj1) <
|
||||||
max(obj0.width, obj1.width) * laparams.char_margin))
|
max(obj0.width, obj1.width) * laparams.char_margin))
|
||||||
|
|
||||||
# valign: obj0 and obj1 is vertically aligned.
|
# valign: obj0 and obj1 is vertically aligned.
|
||||||
#
|
#
|
||||||
# +------+
|
# +------+
|
||||||
|
@ -537,7 +542,7 @@ class LTLayoutContainer(LTContainer):
|
||||||
obj0.hoverlap(obj1)) and
|
obj0.hoverlap(obj1)) and
|
||||||
(obj0.vdistance(obj1) <
|
(obj0.vdistance(obj1) <
|
||||||
max(obj0.height, obj1.height) * laparams.char_margin))
|
max(obj0.height, obj1.height) * laparams.char_margin))
|
||||||
|
|
||||||
if ((halign and isinstance(line, LTTextLineHorizontal)) or
|
if ((halign and isinstance(line, LTTextLineHorizontal)) or
|
||||||
(valign and isinstance(line, LTTextLineVertical))):
|
(valign and isinstance(line, LTTextLineVertical))):
|
||||||
line.add(obj1)
|
line.add(obj1)
|
||||||
|
@ -631,7 +636,7 @@ class LTLayoutContainer(LTContainer):
|
||||||
def key_obj(t):
|
def key_obj(t):
|
||||||
(c,d,_,_) = t
|
(c,d,_,_) = t
|
||||||
return (c,d)
|
return (c,d)
|
||||||
|
|
||||||
# XXX this still takes O(n^2) :(
|
# XXX this still takes O(n^2) :(
|
||||||
dists = []
|
dists = []
|
||||||
for i in range(len(boxes)):
|
for i in range(len(boxes)):
|
||||||
|
|
|
@ -109,6 +109,12 @@ class PDFGraphicState(object):
|
||||||
self.dash = None
|
self.dash = None
|
||||||
self.intent = None
|
self.intent = None
|
||||||
self.flatness = None
|
self.flatness = None
|
||||||
|
|
||||||
|
# stroking color
|
||||||
|
self.scolor = None
|
||||||
|
|
||||||
|
# non stroking color
|
||||||
|
self.ncolor = None
|
||||||
return
|
return
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
|
@ -120,13 +126,17 @@ class PDFGraphicState(object):
|
||||||
obj.dash = self.dash
|
obj.dash = self.dash
|
||||||
obj.intent = self.intent
|
obj.intent = self.intent
|
||||||
obj.flatness = self.flatness
|
obj.flatness = self.flatness
|
||||||
|
obj.scolor = self.scolor
|
||||||
|
obj.ncolor = self.ncolor
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
|
return ('<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, '
|
||||||
' miterlimit=%r, dash=%r, intent=%r, flatness=%r>' %
|
' miterlimit=%r, dash=%r, intent=%r, flatness=%r, '
|
||||||
|
' stroking color=%r, non stroking color=%r>' %
|
||||||
(self.linewidth, self.linecap, self.linejoin,
|
(self.linewidth, self.linecap, self.linejoin,
|
||||||
self.miterlimit, self.dash, self.intent, self.flatness))
|
self.miterlimit, self.dash, self.intent, self.flatness,
|
||||||
|
self.scolor, self.ncolor))
|
||||||
|
|
||||||
|
|
||||||
## Resource Manager
|
## Resource Manager
|
||||||
|
@ -576,31 +586,37 @@ class PDFPageInterpreter(object):
|
||||||
|
|
||||||
# setgray-stroking
|
# setgray-stroking
|
||||||
def do_G(self, gray):
|
def do_G(self, gray):
|
||||||
|
self.graphicstate.color = gray
|
||||||
#self.do_CS(LITERAL_DEVICE_GRAY)
|
#self.do_CS(LITERAL_DEVICE_GRAY)
|
||||||
return
|
return
|
||||||
|
|
||||||
# setgray-non-stroking
|
# setgray-non-stroking
|
||||||
def do_g(self, gray):
|
def do_g(self, gray):
|
||||||
|
self.graphicstate.color = gray
|
||||||
#self.do_cs(LITERAL_DEVICE_GRAY)
|
#self.do_cs(LITERAL_DEVICE_GRAY)
|
||||||
return
|
return
|
||||||
|
|
||||||
# setrgb-stroking
|
# setrgb-stroking
|
||||||
def do_RG(self, r, g, b):
|
def do_RG(self, r, g, b):
|
||||||
|
self.graphicstate.color = (r, g, b)
|
||||||
#self.do_CS(LITERAL_DEVICE_RGB)
|
#self.do_CS(LITERAL_DEVICE_RGB)
|
||||||
return
|
return
|
||||||
|
|
||||||
# setrgb-non-stroking
|
# setrgb-non-stroking
|
||||||
def do_rg(self, r, g, b):
|
def do_rg(self, r, g, b):
|
||||||
|
self.graphicstate.color = (r, g, b)
|
||||||
#self.do_cs(LITERAL_DEVICE_RGB)
|
#self.do_cs(LITERAL_DEVICE_RGB)
|
||||||
return
|
return
|
||||||
|
|
||||||
# setcmyk-stroking
|
# setcmyk-stroking
|
||||||
def do_K(self, c, m, y, k):
|
def do_K(self, c, m, y, k):
|
||||||
|
self.graphicstate.color = (c, m, y, k)
|
||||||
#self.do_CS(LITERAL_DEVICE_CMYK)
|
#self.do_CS(LITERAL_DEVICE_CMYK)
|
||||||
return
|
return
|
||||||
|
|
||||||
# setcmyk-non-stroking
|
# setcmyk-non-stroking
|
||||||
def do_k(self, c, m, y, k):
|
def do_k(self, c, m, y, k):
|
||||||
|
self.graphicstate.color = (c, m, y, k)
|
||||||
#self.do_cs(LITERAL_DEVICE_CMYK)
|
#self.do_cs(LITERAL_DEVICE_CMYK)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -612,7 +628,7 @@ class PDFPageInterpreter(object):
|
||||||
if settings.STRICT:
|
if settings.STRICT:
|
||||||
raise PDFInterpreterError('No colorspace specified!')
|
raise PDFInterpreterError('No colorspace specified!')
|
||||||
n = 1
|
n = 1
|
||||||
self.pop(n)
|
self.graphicstate.scolor = self.pop(n)
|
||||||
return
|
return
|
||||||
|
|
||||||
def do_scn(self):
|
def do_scn(self):
|
||||||
|
@ -622,7 +638,7 @@ class PDFPageInterpreter(object):
|
||||||
if settings.STRICT:
|
if settings.STRICT:
|
||||||
raise PDFInterpreterError('No colorspace specified!')
|
raise PDFInterpreterError('No colorspace specified!')
|
||||||
n = 1
|
n = 1
|
||||||
self.pop(n)
|
self.graphicstate.ncolor = self.pop(n)
|
||||||
return
|
return
|
||||||
|
|
||||||
def do_SC(self):
|
def do_SC(self):
|
||||||
|
|
Loading…
Reference in New Issue