diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index bac6f19..b963443 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -183,7 +183,7 @@ class TextConverter(PDFConverter):
for child in item:
render(child)
elif isinstance(item, LTText):
- self.write_text(item.text)
+ self.write_text(item.get_text())
if isinstance(item, LTTextBox):
self.write_text('\n')
if self.showpageno:
@@ -368,7 +368,7 @@ class HTMLConverter(PDFConverter):
render(child)
elif isinstance(item, LTChar):
self.place_border('char', 1, item)
- self.place_text('char', item.text, item.x0, item.y1, item.size)
+ self.place_text('char', item.get_text(), item.x0, item.y1, item.size)
else:
if isinstance(item, LTTextLine):
for child in item:
@@ -382,9 +382,9 @@ class HTMLConverter(PDFConverter):
render(child)
self.end_textbox('textbox')
elif isinstance(item, LTChar):
- self.put_text(item.text, item.fontname, item.size)
+ self.put_text(item.get_text(), item.fontname, item.size)
elif isinstance(item, LTText):
- self.write_text(item.text)
+ self.write_text(item.get_text())
return
render(ltpage)
self._yoffset += self.pagemargin
@@ -472,10 +472,10 @@ class XMLConverter(PDFConverter):
elif isinstance(item, LTChar):
self.outfp.write('' %
(enc(item.fontname), bbox2str(item.bbox), item.size))
- self.write_text(item.text)
+ self.write_text(item.get_text())
self.outfp.write('\n')
elif isinstance(item, LTText):
- self.outfp.write('%s\n' % item.text)
+ self.outfp.write('%s\n' % item.get_text())
elif isinstance(item, LTImage):
if self.outdir:
name = self.write_image(item)
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index 560bb30..db4a33c 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -34,7 +34,29 @@ class LAParams(object):
##
class LTItem(object):
+ def analyze(self, laparams):
+ """Perform the layout analysis."""
+ return
+
+
+## LTText
+##
+class LTText(object):
+
+ def __repr__(self):
+ return ('<%s %r>' %
+ (self.__class__.__name__, self.get_text()))
+
+ def get_text(self):
+ raise NotImplementedError
+
+
+## LTComponent
+##
+class LTComponent(LTItem):
+
def __init__(self, bbox):
+ LTItem.__init__(self)
self.set_bbox(bbox)
return
@@ -56,54 +78,50 @@ class LTItem(object):
return self.width <= 0 or self.height <= 0
def is_hoverlap(self, obj):
- assert isinstance(obj, LTItem)
+ assert isinstance(obj, LTComponent)
return obj.x0 <= self.x1 and self.x0 <= obj.x1
def hdistance(self, obj):
- assert isinstance(obj, LTItem)
+ assert isinstance(obj, LTComponent)
if self.is_hoverlap(obj):
return 0
else:
return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
def hoverlap(self, obj):
- assert isinstance(obj, LTItem)
+ assert isinstance(obj, LTComponent)
if self.is_hoverlap(obj):
return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
else:
return 0
def is_voverlap(self, obj):
- assert isinstance(obj, LTItem)
+ assert isinstance(obj, LTComponent)
return obj.y0 <= self.y1 and self.y0 <= obj.y1
def vdistance(self, obj):
- assert isinstance(obj, LTItem)
+ assert isinstance(obj, LTComponent)
if self.is_voverlap(obj):
return 0
else:
return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
def voverlap(self, obj):
- assert isinstance(obj, LTItem)
+ assert isinstance(obj, LTComponent)
if self.is_voverlap(obj):
return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
else:
return 0
- def analyze(self, laparams):
- """Perform the layout analysis."""
- return
-
## LTCurve
##
-class LTCurve(LTItem):
+class LTCurve(LTComponent):
def __init__(self, linewidth, pts):
+ LTComponent.__init__(self, get_bound(pts))
self.pts = pts
self.linewidth = linewidth
- LTItem.__init__(self, get_bound(pts))
return
def get_pts(self):
@@ -130,10 +148,10 @@ class LTRect(LTCurve):
## LTImage
##
-class LTImage(LTItem):
+class LTImage(LTComponent):
def __init__(self, name, stream, bbox):
- LTItem.__init__(self, bbox)
+ LTComponent.__init__(self, bbox)
self.name = name
self.stream = stream
self.srcsize = (stream.get_any(('W', 'Width')),
@@ -146,41 +164,30 @@ class LTImage(LTItem):
return
def __repr__(self):
- (w,h) = self.srcsize
- return ('<%s(%s) %s %dx%d>' %
+ return ('<%s(%s) %s %r>' %
(self.__class__.__name__, self.name,
- bbox2str(self.bbox), w, h))
-
-
-## LTText
-##
-class LTText(object):
-
- def __init__(self, text):
- self.text = text
- return
-
- def __repr__(self):
- return ('<%s %r>' %
- (self.__class__.__name__, self.text))
+ bbox2str(self.bbox), self.srcsize))
## LTAnon
##
-class LTAnon(LTText):
+class LTAnon(LTItem, LTText):
- def analyze(self, laparams):
+ def __init__(self, text):
+ self._text = text
return
+ def get_text(self):
+ return self._text
+
## LTChar
##
-class LTChar(LTItem, LTText):
-
- debug = 0
+class LTChar(LTComponent, LTText):
def __init__(self, matrix, font, fontsize, scaling, rise, text, textwidth, textdisp):
- LTText.__init__(self, text)
+ LTText.__init__(self)
+ self._text = text
self.matrix = matrix
self.fontname = font.fontname
self.adv = textwidth * fontsize * scaling
@@ -213,7 +220,7 @@ class LTChar(LTItem, LTText):
(x0,x1) = (x1,x0)
if y1 < y0:
(y0,y1) = (y1,y0)
- LTItem.__init__(self, (x0,y0,x1,y1))
+ LTComponent.__init__(self, (x0,y0,x1,y1))
if font.is_vertical():
self.size = self.width
else:
@@ -221,13 +228,13 @@ class LTChar(LTItem, LTText):
return
def __repr__(self):
- if self.debug:
- return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
- (self.__class__.__name__, bbox2str(self.bbox),
- matrix2str(self.matrix), self.fontname,
- self.adv, self.text))
- else:
- return '' % self.text
+ return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
+ (self.__class__.__name__, bbox2str(self.bbox),
+ matrix2str(self.matrix), self.fontname, self.adv,
+ self.get_text()))
+
+ def get_text(self):
+ return self._text
def is_compatible(self, obj):
"""Returns True if two characters can coexist in the same line."""
@@ -236,10 +243,10 @@ class LTChar(LTItem, LTText):
## LTContainer
##
-class LTContainer(LTItem):
+class LTContainer(LTComponent):
def __init__(self, bbox):
- LTItem.__init__(self, bbox)
+ LTComponent.__init__(self, bbox)
self._objs = []
return
@@ -279,23 +286,36 @@ class LTExpandableContainer(LTContainer):
return
+## LTTextContainer
+##
+class LTTextContainer(LTExpandableContainer, LTText):
+
+ def __init__(self):
+ LTText.__init__(self)
+ LTExpandableContainer.__init__(self)
+ return
+
+ def get_text(self):
+ return ''.join( obj.get_text() for obj in self if isinstance(obj, LTText) )
+
+
## LTTextLine
##
-class LTTextLine(LTExpandableContainer, LTText):
+class LTTextLine(LTTextContainer):
def __init__(self, word_margin):
- LTExpandableContainer.__init__(self)
+ LTTextContainer.__init__(self)
self.word_margin = word_margin
return
def __repr__(self):
return ('<%s %s %r>' %
- (self.__class__.__name__, bbox2str(self.bbox), self.text))
+ (self.__class__.__name__, bbox2str(self.bbox),
+ self.get_text()))
def analyze(self, laparams):
- LTExpandableContainer.analyze(self, laparams)
+ LTTextContainer.analyze(self, laparams)
LTContainer.add(self, LTAnon('\n'))
- self.text = ''.join( obj.text for obj in self if isinstance(obj, LTText) )
return
def find_neighbors(self, plane, ratio):
@@ -349,23 +369,17 @@ class LTTextLineVertical(LTTextLine):
## A set of text objects that are grouped within
## a certain rectangular area.
##
-class LTTextBox(LTExpandableContainer):
+class LTTextBox(LTTextContainer):
def __init__(self):
- LTExpandableContainer.__init__(self)
+ LTTextContainer.__init__(self)
self.index = None
- self.text = None
return
def __repr__(self):
return ('<%s(%s) %s %r>' %
- (self.__class__.__name__, self.index,
- bbox2str(self.bbox), self.text))
-
- def analyze(self, laparams):
- LTExpandableContainer.analyze(self, laparams)
- self.text = ''.join( obj.text for obj in self if isinstance(obj, LTTextLine) )
- return
+ (self.__class__.__name__,
+ self.index, bbox2str(self.bbox), self.get_text()))
class LTTextBoxHorizontal(LTTextBox):
@@ -390,10 +404,10 @@ class LTTextBoxVertical(LTTextBox):
## LTTextGroup
##
-class LTTextGroup(LTExpandableContainer):
+class LTTextGroup(LTTextContainer):
def __init__(self, objs):
- LTExpandableContainer.__init__(self)
+ LTTextContainer.__init__(self)
self.extend(objs)
return