added LTText.get_text() and .text property is no longer accessible.

pull/1/head
Yusuke Shinyama 2011-05-14 15:45:08 +09:00
parent 5004e4b28d
commit 038ce4cd0c
2 changed files with 83 additions and 69 deletions

View File

@ -183,7 +183,7 @@ class TextConverter(PDFConverter):
for child in item:
render(child)
elif isinstance(item, LTText):
self.write_text(item.text)
self.write_text(item.get_text())
if isinstance(item, LTTextBox):
self.write_text('\n')
if self.showpageno:
@ -368,7 +368,7 @@ class HTMLConverter(PDFConverter):
render(child)
elif isinstance(item, LTChar):
self.place_border('char', 1, item)
self.place_text('char', item.text, item.x0, item.y1, item.size)
self.place_text('char', item.get_text(), item.x0, item.y1, item.size)
else:
if isinstance(item, LTTextLine):
for child in item:
@ -382,9 +382,9 @@ class HTMLConverter(PDFConverter):
render(child)
self.end_textbox('textbox')
elif isinstance(item, LTChar):
self.put_text(item.text, item.fontname, item.size)
self.put_text(item.get_text(), item.fontname, item.size)
elif isinstance(item, LTText):
self.write_text(item.text)
self.write_text(item.get_text())
return
render(ltpage)
self._yoffset += self.pagemargin
@ -472,10 +472,10 @@ class XMLConverter(PDFConverter):
elif isinstance(item, LTChar):
self.outfp.write('<text font="%s" bbox="%s" size="%.3f">' %
(enc(item.fontname), bbox2str(item.bbox), item.size))
self.write_text(item.text)
self.write_text(item.get_text())
self.outfp.write('</text>\n')
elif isinstance(item, LTText):
self.outfp.write('<text>%s</text>\n' % item.text)
self.outfp.write('<text>%s</text>\n' % item.get_text())
elif isinstance(item, LTImage):
if self.outdir:
name = self.write_image(item)

View File

@ -34,7 +34,29 @@ class LAParams(object):
##
class LTItem(object):
def analyze(self, laparams):
"""Perform the layout analysis."""
return
## LTText
##
class LTText(object):
def __repr__(self):
return ('<%s %r>' %
(self.__class__.__name__, self.get_text()))
def get_text(self):
raise NotImplementedError
## LTComponent
##
class LTComponent(LTItem):
def __init__(self, bbox):
LTItem.__init__(self)
self.set_bbox(bbox)
return
@ -56,54 +78,50 @@ class LTItem(object):
return self.width <= 0 or self.height <= 0
def is_hoverlap(self, obj):
assert isinstance(obj, LTItem)
assert isinstance(obj, LTComponent)
return obj.x0 <= self.x1 and self.x0 <= obj.x1
def hdistance(self, obj):
assert isinstance(obj, LTItem)
assert isinstance(obj, LTComponent)
if self.is_hoverlap(obj):
return 0
else:
return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
def hoverlap(self, obj):
assert isinstance(obj, LTItem)
assert isinstance(obj, LTComponent)
if self.is_hoverlap(obj):
return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
else:
return 0
def is_voverlap(self, obj):
assert isinstance(obj, LTItem)
assert isinstance(obj, LTComponent)
return obj.y0 <= self.y1 and self.y0 <= obj.y1
def vdistance(self, obj):
assert isinstance(obj, LTItem)
assert isinstance(obj, LTComponent)
if self.is_voverlap(obj):
return 0
else:
return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
def voverlap(self, obj):
assert isinstance(obj, LTItem)
assert isinstance(obj, LTComponent)
if self.is_voverlap(obj):
return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
else:
return 0
def analyze(self, laparams):
"""Perform the layout analysis."""
return
## LTCurve
##
class LTCurve(LTItem):
class LTCurve(LTComponent):
def __init__(self, linewidth, pts):
LTComponent.__init__(self, get_bound(pts))
self.pts = pts
self.linewidth = linewidth
LTItem.__init__(self, get_bound(pts))
return
def get_pts(self):
@ -130,10 +148,10 @@ class LTRect(LTCurve):
## LTImage
##
class LTImage(LTItem):
class LTImage(LTComponent):
def __init__(self, name, stream, bbox):
LTItem.__init__(self, bbox)
LTComponent.__init__(self, bbox)
self.name = name
self.stream = stream
self.srcsize = (stream.get_any(('W', 'Width')),
@ -146,41 +164,30 @@ class LTImage(LTItem):
return
def __repr__(self):
(w,h) = self.srcsize
return ('<%s(%s) %s %dx%d>' %
return ('<%s(%s) %s %r>' %
(self.__class__.__name__, self.name,
bbox2str(self.bbox), w, h))
## LTText
##
class LTText(object):
def __init__(self, text):
self.text = text
return
def __repr__(self):
return ('<%s %r>' %
(self.__class__.__name__, self.text))
bbox2str(self.bbox), self.srcsize))
## LTAnon
##
class LTAnon(LTText):
class LTAnon(LTItem, LTText):
def analyze(self, laparams):
def __init__(self, text):
self._text = text
return
def get_text(self):
return self._text
## LTChar
##
class LTChar(LTItem, LTText):
debug = 0
class LTChar(LTComponent, LTText):
def __init__(self, matrix, font, fontsize, scaling, rise, text, textwidth, textdisp):
LTText.__init__(self, text)
LTText.__init__(self)
self._text = text
self.matrix = matrix
self.fontname = font.fontname
self.adv = textwidth * fontsize * scaling
@ -213,7 +220,7 @@ class LTChar(LTItem, LTText):
(x0,x1) = (x1,x0)
if y1 < y0:
(y0,y1) = (y1,y0)
LTItem.__init__(self, (x0,y0,x1,y1))
LTComponent.__init__(self, (x0,y0,x1,y1))
if font.is_vertical():
self.size = self.width
else:
@ -221,13 +228,13 @@ class LTChar(LTItem, LTText):
return
def __repr__(self):
if self.debug:
return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
(self.__class__.__name__, bbox2str(self.bbox),
matrix2str(self.matrix), self.fontname,
self.adv, self.text))
else:
return '<char %r>' % self.text
return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
(self.__class__.__name__, bbox2str(self.bbox),
matrix2str(self.matrix), self.fontname, self.adv,
self.get_text()))
def get_text(self):
return self._text
def is_compatible(self, obj):
"""Returns True if two characters can coexist in the same line."""
@ -236,10 +243,10 @@ class LTChar(LTItem, LTText):
## LTContainer
##
class LTContainer(LTItem):
class LTContainer(LTComponent):
def __init__(self, bbox):
LTItem.__init__(self, bbox)
LTComponent.__init__(self, bbox)
self._objs = []
return
@ -279,23 +286,36 @@ class LTExpandableContainer(LTContainer):
return
## LTTextContainer
##
class LTTextContainer(LTExpandableContainer, LTText):
def __init__(self):
LTText.__init__(self)
LTExpandableContainer.__init__(self)
return
def get_text(self):
return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) )
## LTTextLine
##
class LTTextLine(LTExpandableContainer, LTText):
class LTTextLine(LTTextContainer):
def __init__(self, word_margin):
LTExpandableContainer.__init__(self)
LTTextContainer.__init__(self)
self.word_margin = word_margin
return
def __repr__(self):
return ('<%s %s %r>' %
(self.__class__.__name__, bbox2str(self.bbox), self.text))
(self.__class__.__name__, bbox2str(self.bbox),
self.get_text()))
def analyze(self, laparams):
LTExpandableContainer.analyze(self, laparams)
LTTextContainer.analyze(self, laparams)
LTContainer.add(self, LTAnon('\n'))
self.text = ''.join( obj.text for obj in self if isinstance(obj, LTText) )
return
def find_neighbors(self, plane, ratio):
@ -349,23 +369,17 @@ class LTTextLineVertical(LTTextLine):
## A set of text objects that are grouped within
## a certain rectangular area.
##
class LTTextBox(LTExpandableContainer):
class LTTextBox(LTTextContainer):
def __init__(self):
LTExpandableContainer.__init__(self)
LTTextContainer.__init__(self)
self.index = None
self.text = None
return
def __repr__(self):
return ('<%s(%s) %s %r>' %
(self.__class__.__name__, self.index,
bbox2str(self.bbox), self.text))
def analyze(self, laparams):
LTExpandableContainer.analyze(self, laparams)
self.text = ''.join( obj.text for obj in self if isinstance(obj, LTTextLine) )
return
(self.__class__.__name__,
self.index, bbox2str(self.bbox), self.get_text()))
class LTTextBoxHorizontal(LTTextBox):
@ -390,10 +404,10 @@ class LTTextBoxVertical(LTTextBox):
## LTTextGroup
##
class LTTextGroup(LTExpandableContainer):
class LTTextGroup(LTTextContainer):
def __init__(self, objs):
LTExpandableContainer.__init__(self)
LTTextContainer.__init__(self)
self.extend(objs)
return