CAUTION! changed the way of internal layout handling.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@184 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
2555b38836
commit
23be96c49e
|
@ -19,7 +19,7 @@ Python PDF parser and analyzer
|
||||||
|
|
||||||
<div align=right class=lastmod>
|
<div align=right class=lastmod>
|
||||||
<!-- hhmts start -->
|
<!-- hhmts start -->
|
||||||
Last Modified: Mon Feb 15 14:41:49 UTC 2010
|
Last Modified: Sat Feb 27 03:58:45 UTC 2010
|
||||||
<!-- hhmts end -->
|
<!-- hhmts end -->
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -348,7 +348,8 @@ no stream header is displayed for the ease of saving it to a file.
|
||||||
<hr noshade>
|
<hr noshade>
|
||||||
<h2>Changes</h2>
|
<h2>Changes</h2>
|
||||||
<ul>
|
<ul>
|
||||||
<li> 2010/02/15: Bugfixes. Thanks to Sean.
|
<li> 2010/02/27: Changed the way of internal layout handling. (LTTextItem -> LTChar)
|
||||||
|
<li> 2010/02/15: Several bugfixes. Thanks to Sean.
|
||||||
<li> 2010/02/13: Bugfix and enhancement. Thanks to André Auzi.
|
<li> 2010/02/13: Bugfix and enhancement. Thanks to André Auzi.
|
||||||
<li> 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
|
<li> 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
|
||||||
<li> 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
|
<li> 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
|
||||||
|
|
|
@ -5,9 +5,9 @@ from pdffont import PDFUnicodeNotDefined
|
||||||
from pdftypes import LITERALS_DCT_DECODE
|
from pdftypes import LITERALS_DCT_DECODE
|
||||||
from layout import LayoutContainer
|
from layout import LayoutContainer
|
||||||
from layout import LTPage, LTText, LTLine, LTRect, LTPolygon
|
from layout import LTPage, LTText, LTLine, LTRect, LTPolygon
|
||||||
from layout import LTFigure, LTImage, LTTextItem, LTTextBox, LTTextLine
|
from layout import LTFigure, LTImage, LTChar, LTTextBox, LTTextLine
|
||||||
from utils import apply_matrix_pt, mult_matrix
|
from utils import apply_matrix_pt, mult_matrix
|
||||||
from utils import enc, strbbox
|
from utils import enc, bbox2str
|
||||||
|
|
||||||
|
|
||||||
## PDFPageAggregator
|
## PDFPageAggregator
|
||||||
|
@ -97,9 +97,8 @@ class PDFPageAggregator(PDFTextDevice):
|
||||||
self.cur_item.add(LTPolygon(gstate.linewidth, pts))
|
self.cur_item.add(LTPolygon(gstate.linewidth, pts))
|
||||||
return
|
return
|
||||||
|
|
||||||
def render_chars(self, matrix, font, fontsize, charspace, scaling, chars):
|
def render_char(self, matrix, font, fontsize, scaling, cid):
|
||||||
if not chars: return (0, 0)
|
item = LTChar(matrix, font, fontsize, scaling, cid)
|
||||||
item = LTTextItem(matrix, font, fontsize, charspace, scaling, chars)
|
|
||||||
self.cur_item.add(item)
|
self.cur_item.add(item)
|
||||||
return item.adv
|
return item.adv
|
||||||
|
|
||||||
|
@ -202,15 +201,10 @@ class HTMLConverter(PDFConverter):
|
||||||
self.outfp.write('<a name="%s">Page %s</a></div>\n' % (page.id, page.id))
|
self.outfp.write('<a name="%s">Page %s</a></div>\n' % (page.id, page.id))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
elif isinstance(item, LTTextItem):
|
elif isinstance(item, LTChar):
|
||||||
if item.vertical:
|
self.outfp.write('<span style="position:absolute; left:%dpx; top:%dpx; font-size:%dpx;">' %
|
||||||
wmode = 'tb-rl'
|
(item.x0*self.scale, (self.yoffset-item.y1)*self.scale,
|
||||||
else:
|
item.get_size()*self.scale))
|
||||||
wmode = 'lr-tb'
|
|
||||||
self.outfp.write('<span style="position:absolute; writing-mode:%s;'
|
|
||||||
' left:%dpx; top:%dpx; font-size:%dpx;">' %
|
|
||||||
(wmode, item.x0*self.scale, (self.yoffset-item.y1)*self.scale,
|
|
||||||
item.fontsize*self.scale))
|
|
||||||
self.write(item.text)
|
self.write(item.text)
|
||||||
self.outfp.write('</span>\n')
|
self.outfp.write('</span>\n')
|
||||||
if self.debug:
|
if self.debug:
|
||||||
|
@ -271,35 +265,40 @@ class XMLConverter(PDFConverter):
|
||||||
def render(item):
|
def render(item):
|
||||||
if isinstance(item, LTPage):
|
if isinstance(item, LTPage):
|
||||||
self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' %
|
self.outfp.write('<page id="%s" bbox="%s" rotate="%d">\n' %
|
||||||
(item.id, strbbox(item.bbox), item.rotate))
|
(item.id, bbox2str(item.bbox), item.rotate))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
self.outfp.write('</page>\n')
|
self.outfp.write('</page>\n')
|
||||||
elif isinstance(item, LTLine) and item.direction:
|
elif isinstance(item, LTLine) and item.direction:
|
||||||
self.outfp.write('<line linewidth="%d" direction="%s" bbox="%s" />\n' % (item.linewidth, item.direction, strbbox(item.bbox)))
|
self.outfp.write('<line linewidth="%d" direction="%s" bbox="%s" />\n' %
|
||||||
|
(item.linewidth, item.direction, bbox2str(item.bbox)))
|
||||||
elif isinstance(item, LTRect):
|
elif isinstance(item, LTRect):
|
||||||
self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' % (item.linewidth, strbbox(item.bbox)))
|
self.outfp.write('<rect linewidth="%d" bbox="%s" />\n' %
|
||||||
|
(item.linewidth, bbox2str(item.bbox)))
|
||||||
elif isinstance(item, LTPolygon):
|
elif isinstance(item, LTPolygon):
|
||||||
self.outfp.write('<polygon linewidth="%d" bbox="%s" pts="%s"/>\n' % (item.linewidth, strbbox(item.bbox), item.get_pts()))
|
self.outfp.write('<polygon linewidth="%d" bbox="%s" pts="%s"/>\n' %
|
||||||
|
(item.linewidth, bbox2str(item.bbox), item.get_pts()))
|
||||||
elif isinstance(item, LTFigure):
|
elif isinstance(item, LTFigure):
|
||||||
self.outfp.write('<figure id="%s" bbox="%s">\n' % (item.id, strbbox(item.bbox)))
|
self.outfp.write('<figure id="%s" bbox="%s">\n' %
|
||||||
|
(item.id, bbox2str(item.bbox)))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
self.outfp.write('</figure>\n')
|
self.outfp.write('</figure>\n')
|
||||||
elif isinstance(item, LTTextLine):
|
elif isinstance(item, LTTextLine):
|
||||||
self.outfp.write('<textline bbox="%s">\n' % strbbox(item.bbox))
|
self.outfp.write('<textline bbox="%s">\n' % bbox2str(item.bbox))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
self.outfp.write('</textline>\n')
|
self.outfp.write('</textline>\n')
|
||||||
elif isinstance(item, LTTextBox):
|
elif isinstance(item, LTTextBox):
|
||||||
self.outfp.write('<textbox id="%s" bbox="%s">\n' % (item.id, strbbox(item.bbox)))
|
self.outfp.write('<textbox id="%s" bbox="%s">\n' %
|
||||||
|
(item.id, bbox2str(item.bbox)))
|
||||||
for child in item:
|
for child in item:
|
||||||
render(child)
|
render(child)
|
||||||
self.outfp.write('</textbox>\n')
|
self.outfp.write('</textbox>\n')
|
||||||
elif isinstance(item, LTTextItem):
|
elif isinstance(item, LTChar):
|
||||||
self.outfp.write('<text font="%s" vertical="%s" bbox="%s" fontsize="%.3f">' %
|
self.outfp.write('<text font="%s" vertical="%s" bbox="%s" size="%.3f">' %
|
||||||
(enc(item.font.fontname), item.is_vertical(),
|
(enc(item.font.fontname), item.is_vertical(),
|
||||||
strbbox(item.bbox), item.fontsize))
|
bbox2str(item.bbox), item.get_size()))
|
||||||
self.write(item.text)
|
self.write(item.text)
|
||||||
self.outfp.write('</text>\n')
|
self.outfp.write('</text>\n')
|
||||||
elif isinstance(item, LTText):
|
elif isinstance(item, LTText):
|
||||||
|
@ -310,7 +309,8 @@ class XMLConverter(PDFConverter):
|
||||||
name = self.write_image(item)
|
name = self.write_image(item)
|
||||||
if name:
|
if name:
|
||||||
x = 'name="%s" ' % enc(name)
|
x = 'name="%s" ' % enc(name)
|
||||||
self.outfp.write('<image %stype="%s" width="%d" height="%d" />\n' % (x, item.type, item.width, item.height))
|
self.outfp.write('<image %stype="%s" width="%d" height="%d" />\n' %
|
||||||
|
(x, item.type, item.width, item.height))
|
||||||
else:
|
else:
|
||||||
assert 0, item
|
assert 0, item
|
||||||
return
|
return
|
||||||
|
@ -352,7 +352,7 @@ class TagExtractor(PDFDevice):
|
||||||
|
|
||||||
def begin_page(self, page, ctm):
|
def begin_page(self, page, ctm):
|
||||||
self.outfp.write('<page id="%s" bbox="%s" rotate="%d">' %
|
self.outfp.write('<page id="%s" bbox="%s" rotate="%d">' %
|
||||||
(self.pageno, strbbox(page.mediabox), page.rotate))
|
(self.pageno, bbox2str(page.mediabox), page.rotate))
|
||||||
return
|
return
|
||||||
|
|
||||||
def end_page(self, page):
|
def end_page(self, page):
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
import sys
|
import sys
|
||||||
from sys import maxint as INF
|
from sys import maxint as INF
|
||||||
from utils import apply_matrix_norm, apply_matrix_pt
|
from utils import apply_matrix_norm, apply_matrix_pt
|
||||||
from utils import bsearch, strbbox
|
from utils import bsearch, bbox2str, matrix2str
|
||||||
|
from pdffont import PDFUnicodeNotDefined
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -136,7 +137,7 @@ class LayoutItem(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<item bbox=%s>' % strbbox(self.bbox))
|
return ('<item bbox=%s>' % bbox2str(self.bbox))
|
||||||
|
|
||||||
def set_bbox(self, (x0,y0,x1,y1)):
|
def set_bbox(self, (x0,y0,x1,y1)):
|
||||||
if x1 < x0: (x0,x1) = (x1,x0)
|
if x1 < x0: (x0,x1) = (x1,x0)
|
||||||
|
@ -203,7 +204,7 @@ class LayoutContainer(LayoutItem):
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<group %s>' % strbbox(self.bbox))
|
return ('<group %s>' % bbox2str(self.bbox))
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return iter(self.objs)
|
return iter(self.objs)
|
||||||
|
@ -326,55 +327,59 @@ class LTAnon(LTText):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
## LTTextItem
|
## LTChar
|
||||||
##
|
##
|
||||||
class LTTextItem(LayoutItem, LTText):
|
class LTChar(LayoutItem, LTText):
|
||||||
|
|
||||||
debug = 1
|
debug = 1
|
||||||
|
|
||||||
def __init__(self, matrix, font, fontsize, charspace, scaling, chars):
|
def __init__(self, matrix, font, fontsize, scaling, cid):
|
||||||
assert chars
|
|
||||||
self.matrix = matrix
|
self.matrix = matrix
|
||||||
self.font = font
|
self.font = font
|
||||||
|
self.fontsize = fontsize
|
||||||
self.vertical = font.is_vertical()
|
self.vertical = font.is_vertical()
|
||||||
self.text = ''.join( char for (char,_) in chars )
|
self.adv = font.char_width(cid) * fontsize * scaling
|
||||||
adv = sum( font.char_width(cid) for (_,cid) in chars )
|
try:
|
||||||
adv = (adv * fontsize + (len(chars)-1)*charspace) * scaling
|
text = font.to_unichr(cid)
|
||||||
#size = (font.get_ascent() - font.get_descent()) * fontsize
|
except PDFUnicodeNotDefined:
|
||||||
size = font.get_size() * fontsize
|
text = '?'
|
||||||
(_,_,_,_,tx,ty) = self.matrix
|
LTText.__init__(self, text)
|
||||||
if not self.vertical:
|
# compute the boundary rectangle.
|
||||||
# horizontal text
|
if self.vertical:
|
||||||
self.adv = (adv, 0)
|
# vertical
|
||||||
(dx,dy) = apply_matrix_norm(self.matrix, (adv,size))
|
size = font.get_size() * fontsize
|
||||||
(_,descent) = apply_matrix_norm(self.matrix, (0,font.get_descent() * fontsize))
|
displacement = (1000 - font.char_disp(cid)) * fontsize * .001
|
||||||
|
(_,displacement) = apply_matrix_norm(self.matrix, (0, displacement))
|
||||||
|
(dx,dy) = apply_matrix_norm(self.matrix, (size, self.adv))
|
||||||
|
(_,_,_,_,tx,ty) = self.matrix
|
||||||
|
tx -= dx/2
|
||||||
|
ty += displacement
|
||||||
|
bbox = (tx, ty+dy, tx+dx, ty)
|
||||||
|
else:
|
||||||
|
# horizontal
|
||||||
|
size = font.get_size() * fontsize
|
||||||
|
descent = font.get_descent() * fontsize
|
||||||
|
(_,descent) = apply_matrix_norm(self.matrix, (0, descent))
|
||||||
|
(dx,dy) = apply_matrix_norm(self.matrix, (self.adv, size))
|
||||||
|
(_,_,_,_,tx,ty) = self.matrix
|
||||||
ty += descent
|
ty += descent
|
||||||
bbox = (tx, ty, tx+dx, ty+dy)
|
bbox = (tx, ty, tx+dx, ty+dy)
|
||||||
else:
|
|
||||||
# vertical text
|
|
||||||
self.adv = (0, adv)
|
|
||||||
(_,cid) = chars[0]
|
|
||||||
(_,disp) = apply_matrix_norm(self.matrix, (0, (1000-font.char_disp(cid))*fontsize*.001))
|
|
||||||
(dx,dy) = apply_matrix_norm(self.matrix, (size,adv))
|
|
||||||
tx -= dx/2
|
|
||||||
ty += disp
|
|
||||||
bbox = (tx, ty+dy, tx+dx, ty)
|
|
||||||
self.fontsize = max(apply_matrix_norm(self.matrix, (size,size)))
|
|
||||||
LayoutItem.__init__(self, bbox)
|
LayoutItem.__init__(self, bbox)
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if self.debug:
|
if self.debug:
|
||||||
return ('<text matrix=%s font=%r fontsize=%.1f bbox=%s adv=%s text=%r>' %
|
return ('<char matrix=%s font=%r fontsize=%.1f bbox=%s adv=%s text=%r>' %
|
||||||
('[%.1f, %.1f, %.1f, %.1f, (%.1f, %.1f)]' % self.matrix,
|
(matrix2str(self.matrix), self.font, self.fontsize,
|
||||||
self.font, self.fontsize, strbbox(self.bbox),
|
bbox2str(self.bbox), self.adv, self.text))
|
||||||
'(%.1f, %.1f)' % self.adv,
|
|
||||||
self.text))
|
|
||||||
else:
|
else:
|
||||||
return '<text %r>' % self.text
|
return '<char %r>' % self.text
|
||||||
|
|
||||||
def get_margin(self):
|
def get_margin(self):
|
||||||
return abs(self.fontsize)
|
return min(self.width, self.height)
|
||||||
|
|
||||||
|
def get_size(self):
|
||||||
|
return max(self.width, self.height)
|
||||||
|
|
||||||
def is_vertical(self):
|
def is_vertical(self):
|
||||||
return self.vertical
|
return self.vertical
|
||||||
|
@ -383,7 +388,7 @@ class LTTextItem(LayoutItem, LTText):
|
||||||
(a,b,c,d,e,f) = self.matrix
|
(a,b,c,d,e,f) = self.matrix
|
||||||
return 0 < a*d and b*c <= 0
|
return 0 < a*d and b*c <= 0
|
||||||
|
|
||||||
|
|
||||||
## LTFigure
|
## LTFigure
|
||||||
##
|
##
|
||||||
class LTFigure(LayoutContainer):
|
class LTFigure(LayoutContainer):
|
||||||
|
@ -397,7 +402,8 @@ class LTFigure(LayoutContainer):
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<figure id=%r bbox=%s matrix=%r>' % (self.id, strbbox(self.bbox), self.matrix))
|
return ('<figure id=%r bbox=%s matrix=%s>' %
|
||||||
|
(self.id, bbox2str(self.bbox), matrix2str(self.matrix)))
|
||||||
|
|
||||||
|
|
||||||
## LTTextLine
|
## LTTextLine
|
||||||
|
@ -411,7 +417,7 @@ class LTTextLine(LayoutContainer):
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<textline %s(%s)>' % (strbbox(self.bbox), self.direction))
|
return ('<textline %s(%s)>' % (bbox2str(self.bbox), self.direction))
|
||||||
|
|
||||||
def get_margin(self):
|
def get_margin(self):
|
||||||
return min(self.width, self.height)
|
return min(self.width, self.height)
|
||||||
|
@ -428,7 +434,7 @@ class LTTextLine(LayoutContainer):
|
||||||
if self.direction == 'V':
|
if self.direction == 'V':
|
||||||
y0 = -INF
|
y0 = -INF
|
||||||
for obj in sorted(self.objs, key=lambda obj: -obj.y1):
|
for obj in sorted(self.objs, key=lambda obj: -obj.y1):
|
||||||
if isinstance(obj, LTTextItem) and self.word_margin:
|
if isinstance(obj, LTChar) and self.word_margin:
|
||||||
margin = self.word_margin * obj.get_margin()
|
margin = self.word_margin * obj.get_margin()
|
||||||
if obj.y1+margin < y0:
|
if obj.y1+margin < y0:
|
||||||
objs.append(LTAnon(' '))
|
objs.append(LTAnon(' '))
|
||||||
|
@ -437,7 +443,7 @@ class LTTextLine(LayoutContainer):
|
||||||
else:
|
else:
|
||||||
x1 = INF
|
x1 = INF
|
||||||
for obj in sorted(self.objs, key=lambda obj: obj.x0):
|
for obj in sorted(self.objs, key=lambda obj: obj.x0):
|
||||||
if isinstance(obj, LTTextItem) and self.word_margin:
|
if isinstance(obj, LTChar) and self.word_margin:
|
||||||
margin = self.word_margin * obj.get_margin()
|
margin = self.word_margin * obj.get_margin()
|
||||||
if x1 < obj.x0-margin:
|
if x1 < obj.x0-margin:
|
||||||
objs.append(LTAnon(' '))
|
objs.append(LTAnon(' '))
|
||||||
|
@ -461,7 +467,7 @@ class LTTextBox(LayoutContainer):
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<textbox %s(%s) %r...>' % (strbbox(self.bbox), self.direction, self.get_text()[:20]))
|
return ('<textbox %s(%s) %r...>' % (bbox2str(self.bbox), self.direction, self.get_text()[:20]))
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
return ''.join( obj.get_text() for obj in self.objs if isinstance(obj, LTTextLine) )
|
return ''.join( obj.get_text() for obj in self.objs if isinstance(obj, LTTextLine) )
|
||||||
|
@ -517,7 +523,7 @@ class LTPage(LayoutContainer):
|
||||||
return
|
return
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('<page id=%r bbox=%s rotate=%r>' % (self.id, strbbox(self.bbox), self.rotate))
|
return ('<page id=%r bbox=%s rotate=%r>' % (self.id, bbox2str(self.bbox), self.rotate))
|
||||||
|
|
||||||
def analyze_layout(self, laparams):
|
def analyze_layout(self, laparams):
|
||||||
textobjs = []
|
textobjs = []
|
||||||
|
|
|
@ -59,9 +59,6 @@ class PDFTextDevice(PDFDevice):
|
||||||
print >>sys.stderr, 'undefined: %r, %r' % (cidcoding, cid)
|
print >>sys.stderr, 'undefined: %r, %r' % (cidcoding, cid)
|
||||||
return '?'
|
return '?'
|
||||||
|
|
||||||
def render_chars(self, matrix, font, fontsize, charspace, scaling, chars):
|
|
||||||
return (0, 0)
|
|
||||||
|
|
||||||
def render_string(self, textstate, seq):
|
def render_string(self, textstate, seq):
|
||||||
matrix = mult_matrix(textstate.matrix, self.ctm)
|
matrix = mult_matrix(textstate.matrix, self.ctm)
|
||||||
font = textstate.font
|
font = textstate.font
|
||||||
|
@ -82,76 +79,39 @@ class PDFTextDevice(PDFDevice):
|
||||||
|
|
||||||
def render_string_horizontal(self, seq, matrix, (x,y),
|
def render_string_horizontal(self, seq, matrix, (x,y),
|
||||||
font, fontsize, scaling, charspace, wordspace, dxscale):
|
font, fontsize, scaling, charspace, wordspace, dxscale):
|
||||||
chars = []
|
needcharspace = False
|
||||||
needspace = False
|
|
||||||
for obj in seq:
|
for obj in seq:
|
||||||
if isinstance(obj, int) or isinstance(obj, float):
|
if isinstance(obj, int) or isinstance(obj, float):
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
x -= obj*dxscale
|
||||||
fontsize, charspace, scaling, chars)
|
needcharspace = False
|
||||||
x += dx - obj*dxscale
|
|
||||||
y += dy
|
|
||||||
chars = []
|
|
||||||
needspace = False
|
|
||||||
else:
|
else:
|
||||||
for cid in font.decode(obj):
|
for cid in font.decode(obj):
|
||||||
try:
|
if needcharspace:
|
||||||
char = font.to_unichr(cid)
|
x += charspace
|
||||||
except PDFUnicodeNotDefined, e:
|
x += self.render_char(translate_matrix(matrix, (x,y)),
|
||||||
(cidcoding, cid) = e.args
|
font, fontsize, scaling, cid)
|
||||||
char = self.handle_undefined_char(cidcoding, cid)
|
needcharspace = True
|
||||||
chars.append((char, cid))
|
|
||||||
if cid == 32 and wordspace:
|
if cid == 32 and wordspace:
|
||||||
if needspace:
|
x += wordspace
|
||||||
x += charspace
|
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
|
||||||
fontsize, charspace, scaling, chars)
|
|
||||||
needspace = True
|
|
||||||
x += dx + wordspace
|
|
||||||
y += dy
|
|
||||||
chars = []
|
|
||||||
if chars:
|
|
||||||
if needspace:
|
|
||||||
x += charspace
|
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
|
||||||
fontsize, charspace, scaling, chars)
|
|
||||||
x += dx
|
|
||||||
y += dy
|
|
||||||
return (x, y)
|
return (x, y)
|
||||||
|
|
||||||
def render_string_vertical(self, seq, matrix, (x,y),
|
def render_string_vertical(self, seq, matrix, (x,y),
|
||||||
font, fontsize, scaling, charspace, wordspace, dxscale):
|
font, fontsize, scaling, charspace, wordspace, dxscale):
|
||||||
chars = []
|
needcharspace = False
|
||||||
needspace = False
|
|
||||||
for obj in seq:
|
for obj in seq:
|
||||||
if isinstance(obj, int) or isinstance(obj, float):
|
if isinstance(obj, int) or isinstance(obj, float):
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
y -= obj*dxscale
|
||||||
fontsize, charspace, scaling, chars)
|
needcharspace = False
|
||||||
x += dx
|
|
||||||
y += dy - obj*dxscale
|
|
||||||
chars = []
|
|
||||||
needspace = False
|
|
||||||
else:
|
else:
|
||||||
for cid in font.decode(obj):
|
for cid in font.decode(obj):
|
||||||
try:
|
if needcharspace:
|
||||||
char = font.to_unichr(cid)
|
y += charspace
|
||||||
except PDFUnicodeNotDefined, e:
|
y += self.render_char(translate_matrix(matrix, (x,y)),
|
||||||
(cidcoding, cid) = e.args
|
font, fontsize, scaling, cid)
|
||||||
char = self.handle_undefined_char(cidcoding, cid)
|
needcharspace = True
|
||||||
chars.append((char, cid))
|
|
||||||
if cid == 32 and wordspace:
|
if cid == 32 and wordspace:
|
||||||
if needspace:
|
y += wordspace
|
||||||
y += charspace
|
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
|
||||||
fontsize, charspace, scaling, chars)
|
|
||||||
needspace = True
|
|
||||||
x += dx
|
|
||||||
y += dy + wordspace
|
|
||||||
chars = []
|
|
||||||
if chars:
|
|
||||||
if needspace:
|
|
||||||
y += charspace
|
|
||||||
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
|
|
||||||
fontsize, charspace, scaling, chars)
|
|
||||||
x += dx
|
|
||||||
y += dy
|
|
||||||
return (x, y)
|
return (x, y)
|
||||||
|
|
||||||
|
def render_char(self, matrix, font, fontsize, scaling, cid):
|
||||||
|
return 0
|
||||||
|
|
|
@ -136,9 +136,12 @@ def enc(x, codec='ascii'):
|
||||||
x = x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
|
x = x.replace('&','&').replace('>','>').replace('<','<').replace('"','"')
|
||||||
return x.encode(codec, 'xmlcharrefreplace')
|
return x.encode(codec, 'xmlcharrefreplace')
|
||||||
|
|
||||||
def strbbox((x0,y0,x1,y1)):
|
def bbox2str((x0,y0,x1,y1)):
|
||||||
return '%.3f,%.3f,%.3f,%.3f' % (x0, y0, x1, y1)
|
return '%.3f,%.3f,%.3f,%.3f' % (x0, y0, x1, y1)
|
||||||
|
|
||||||
|
def matrix2str((a,b,c,d,e,f)):
|
||||||
|
return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a,b,c,d,e,f)
|
||||||
|
|
||||||
|
|
||||||
## ObjIdRange
|
## ObjIdRange
|
||||||
##
|
##
|
||||||
|
|
Loading…
Reference in New Issue