diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index ab58f2d..8334ab0 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -5,7 +5,7 @@ from pdffont import PDFUnicodeNotDefined
from pdftypes import LITERALS_DCT_DECODE
from layout import LayoutContainer
from layout import LTPage, LTText, LTLine, LTRect, LTPolygon
-from layout import LTFigure, LTImage, LTChar, LTTextBox, LTTextLine
+from layout import LTFigure, LTImage, LTChar, LTTextBox, LTTextFlow, LTTextLine
from utils import apply_matrix_pt, mult_matrix
from utils import enc, bbox2str
@@ -218,6 +218,11 @@ class HTMLConverter(PDFConverter):
self.write_rect('blue', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
for child in item:
render(child)
+ elif isinstance(item, LTTextFlow):
+ for child in item:
+ render(child)
+ if self.debug:
+ self.write_rect('red', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
elif isinstance(item, LTFigure):
self.write_rect('green', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
for child in item:
@@ -294,6 +299,11 @@ class XMLConverter(PDFConverter):
for child in item:
render(child)
self.outfp.write('\n')
+ elif isinstance(item, LTTextFlow):
+ self.outfp.write('\n' % bbox2str(item.bbox))
+ for child in item:
+ render(child)
+ self.outfp.write('\n')
elif isinstance(item, LTChar):
self.outfp.write('' %
(enc(item.font.fontname), item.is_vertical(),
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index b182bde..8e9c8b2 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -50,6 +50,375 @@ class LAParams(object):
(self.direction, self.char_margin, self.line_margin, self.word_margin))
+## LayoutItem
+##
+class LayoutItem(object):
+
+ def __init__(self, bbox):
+ self.set_bbox(bbox)
+ return
+
+ def __repr__(self):
+ return ('- ' % bbox2str(self.bbox))
+
+ def set_bbox(self, (x0,y0,x1,y1)):
+ if x1 < x0: (x0,x1) = (x1,x0)
+ if y1 < y0: (y0,y1) = (y1,y0)
+ self.x0 = x0
+ self.y0 = y0
+ self.x1 = x1
+ self.y1 = y1
+ self.width = x1-x0
+ self.height = y1-y0
+ self.bbox = (x0, y0, x1, y1)
+ return
+
+ def is_hoverlap(self, obj):
+ assert isinstance(obj, LayoutItem)
+ return obj.x0 <= self.x1 and self.x0 <= obj.x1
+
+ def hdistance(self, obj):
+ assert isinstance(obj, LayoutItem)
+ if self.is_hoverlap(obj):
+ return 0
+ else:
+ return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
+
+ def hoverlap(self, obj):
+ assert isinstance(obj, LayoutItem)
+ if self.is_hoverlap(obj):
+ return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
+ else:
+ return 0
+
+ def is_voverlap(self, obj):
+ assert isinstance(obj, LayoutItem)
+ return obj.y0 <= self.y1 and self.y0 <= obj.y1
+
+ def vdistance(self, obj):
+ assert isinstance(obj, LayoutItem)
+ if self.is_voverlap(obj):
+ return 0
+ else:
+ return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
+
+ def voverlap(self, obj):
+ assert isinstance(obj, LayoutItem)
+ if self.is_voverlap(obj):
+ return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
+ else:
+ return 0
+
+
+## LayoutContainer
+##
+class LayoutContainer(LayoutItem):
+
+ def __init__(self, bbox, objs=None):
+ LayoutItem.__init__(self, bbox)
+ if objs:
+ self.objs = objs[:]
+ else:
+ self.objs = []
+ return
+
+ def __repr__(self):
+ return ('' % bbox2str(self.bbox))
+
+ def __iter__(self):
+ return iter(self.objs)
+
+ def __len__(self):
+ return len(self.objs)
+
+ def add(self, obj):
+ self.objs.append(obj)
+ return
+
+ def merge(self, container):
+ self.objs.extend(container.objs)
+ return
+
+ # fixate(): determines its boundery and writing direction.
+ def fixate(self):
+ if not self.width and self.objs:
+ (bx0, by0, bx1, by1) = (INF, INF, -INF, -INF)
+ for obj in self.objs:
+ bx0 = min(bx0, obj.x0)
+ by0 = min(by0, obj.y0)
+ bx1 = max(bx1, obj.x1)
+ by1 = max(by1, obj.y1)
+ self.set_bbox((bx0, by0, bx1, by1))
+ return
+
+
+## LTPolygon
+##
+class LTPolygon(LayoutItem):
+
+ def __init__(self, linewidth, pts):
+ LayoutItem.__init__(self, get_bounds(pts))
+ self.pts = pts
+ self.linewidth = linewidth
+ return
+
+ def get_pts(self):
+ return ','.join( '%.3f,%.3f' % p for p in self.pts )
+
+
+## LTLine
+##
+class LTLine(LTPolygon):
+
+ def __init__(self, linewidth, p0, p1):
+ LTPolygon.__init__(self, linewidth, [p0, p1])
+ return
+
+
+## LTRect
+##
+class LTRect(LTPolygon):
+
+ def __init__(self, linewidth, (x0,y0,x1,y1)):
+ LTPolygon.__init__(self, linewidth, [(x0,y0), (x1,y0), (x1,y1), (x0,y1)])
+ return
+
+
+## LTImage
+##
+class LTImage(LayoutItem):
+
+ def __init__(self, name, type, srcsize, bbox, data):
+ LayoutItem.__init__(self, bbox)
+ self.name = name
+ self.type = type
+ self.srcsize = srcsize
+ self.data = data
+ return
+
+ def __repr__(self):
+ (w,h) = self.srcsize
+ return '' % (self.name, self.type, w, h)
+
+
+## LTText
+##
+class LTText(object):
+
+ def __init__(self, text):
+ self.text = text
+ return
+
+ def __repr__(self):
+ return '' % self.text
+
+ def is_upright(self):
+ return True
+
+
+## LTAnon
+##
+class LTAnon(LTText):
+
+ pass
+
+
+## LTChar
+##
+class LTChar(LayoutItem, LTText):
+
+ debug = 1
+
+ def __init__(self, matrix, font, fontsize, scaling, cid):
+ self.matrix = matrix
+ self.font = font
+ self.fontsize = fontsize
+ self.vertical = font.is_vertical()
+ self.adv = font.char_width(cid) * fontsize * scaling
+ try:
+ text = font.to_unichr(cid)
+ except PDFUnicodeNotDefined:
+ text = '?'
+ LTText.__init__(self, text)
+ # compute the boundary rectangle.
+ if self.vertical:
+ # vertical
+ size = font.get_size() * fontsize
+ displacement = (1000 - font.char_disp(cid)) * fontsize * .001
+ (_,displacement) = apply_matrix_norm(self.matrix, (0, displacement))
+ (dx,dy) = apply_matrix_norm(self.matrix, (size, self.adv))
+ (_,_,_,_,tx,ty) = self.matrix
+ tx -= dx/2
+ ty += displacement
+ bbox = (tx, ty+dy, tx+dx, ty)
+ else:
+ # horizontal
+ size = font.get_size() * fontsize
+ descent = font.get_descent() * fontsize
+ (_,descent) = apply_matrix_norm(self.matrix, (0, descent))
+ (dx,dy) = apply_matrix_norm(self.matrix, (self.adv, size))
+ (_,_,_,_,tx,ty) = self.matrix
+ ty += descent
+ bbox = (tx, ty, tx+dx, ty+dy)
+ LayoutItem.__init__(self, bbox)
+ return
+
+ def __repr__(self):
+ if self.debug:
+ return ('' %
+ (matrix2str(self.matrix), self.font, self.fontsize,
+ bbox2str(self.bbox), self.adv, self.text))
+ else:
+ return '' % self.text
+
+ def get_size(self):
+ return max(self.width, self.height)
+
+ def is_vertical(self):
+ return self.vertical
+
+ def is_upright(self):
+ (a,b,c,d,e,f) = self.matrix
+ return 0 < a*d and b*c <= 0
+
+
+## LTFigure
+##
+class LTFigure(LayoutContainer):
+
+ def __init__(self, id, bbox, matrix):
+ (x,y,w,h) = bbox
+ bbox = get_bounds( apply_matrix_pt(matrix, (p,q))
+ for (p,q) in ((x,y), (x+w,y), (x,y+h), (x+w,y+h)) )
+ self.id = id
+ self.matrix = matrix
+ LayoutContainer.__init__(self, bbox)
+ return
+
+ def __repr__(self):
+ return ('