rename: LayoutItem -> LTItem, LayoutContainer -> LTContainer

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@203 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-04-10 11:29:30 +00:00
parent c81142aa44
commit 609c6e1f5f
2 changed files with 87 additions and 86 deletions

View File

@ -4,8 +4,7 @@ from pdfdevice import PDFDevice, PDFTextDevice
from pdffont import PDFUnicodeNotDefined from pdffont import PDFUnicodeNotDefined
from pdftypes import LITERALS_DCT_DECODE from pdftypes import LITERALS_DCT_DECODE
from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
from layout import LayoutContainer from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTPolygon
from layout import LTPage, LTText, LTLine, LTRect, LTPolygon
from layout import LTFigure, LTImage, LTChar, LTTextLine, LTTextBox, LTTextGroup from layout import LTFigure, LTImage, LTChar, LTTextLine, LTTextBox, LTTextGroup
from utils import apply_matrix_pt, mult_matrix from utils import apply_matrix_pt, mult_matrix
from utils import enc, bbox2str, create_bmp from utils import enc, bbox2str, create_bmp
@ -150,7 +149,7 @@ class TextConverter(PDFConverter):
def render(item): def render(item):
if isinstance(item, LTText): if isinstance(item, LTText):
self.write(item.text) self.write(item.text)
elif isinstance(item, LayoutContainer): elif isinstance(item, LTContainer):
for child in item: for child in item:
render(child) render(child)
if isinstance(item, LTTextBox): if isinstance(item, LTTextBox):

View File

@ -47,22 +47,24 @@ class LAParams(object):
line_overlap=0.5, line_overlap=0.5,
char_margin=3.0, char_margin=3.0,
line_margin=0.5, line_margin=0.5,
word_margin=0.1): word_margin=0.1,
all_texts=False):
self.writing_mode = writing_mode self.writing_mode = writing_mode
self.line_overlap = line_overlap self.line_overlap = line_overlap
self.char_margin = char_margin self.char_margin = char_margin
self.line_margin = line_margin self.line_margin = line_margin
self.word_margin = word_margin self.word_margin = word_margin
self.all_texts = all_texts
return return
def __repr__(self): def __repr__(self):
return ('<LAParams: writing_mode=%r, char_margin=%.1f, line_margin=%.1f, word_margin=%.1f>' % return ('<LAParams: writing_mode=%r, char_margin=%.1f, line_margin=%.1f, word_margin=%.1f all_texts=%r>' %
(self.writing_mode, self.char_margin, self.line_margin, self.word_margin)) (self.writing_mode, self.char_margin, self.line_margin, self.word_margin, self.all_texts))
## LayoutItem ## LTItem
## ##
class LayoutItem(object): class LTItem(object):
def __init__(self, bbox): def __init__(self, bbox):
self.set_bbox(bbox) self.set_bbox(bbox)
@ -84,90 +86,48 @@ class LayoutItem(object):
return return
def is_hoverlap(self, obj): def is_hoverlap(self, obj):
assert isinstance(obj, LayoutItem) assert isinstance(obj, LTItem)
return obj.x0 <= self.x1 and self.x0 <= obj.x1 return obj.x0 <= self.x1 and self.x0 <= obj.x1
def hdistance(self, obj): def hdistance(self, obj):
assert isinstance(obj, LayoutItem) assert isinstance(obj, LTItem)
if self.is_hoverlap(obj): if self.is_hoverlap(obj):
return 0 return 0
else: else:
return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0)) return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
def hoverlap(self, obj): def hoverlap(self, obj):
assert isinstance(obj, LayoutItem) assert isinstance(obj, LTItem)
if self.is_hoverlap(obj): if self.is_hoverlap(obj):
return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0)) return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
else: else:
return 0 return 0
def is_voverlap(self, obj): def is_voverlap(self, obj):
assert isinstance(obj, LayoutItem) assert isinstance(obj, LTItem)
return obj.y0 <= self.y1 and self.y0 <= obj.y1 return obj.y0 <= self.y1 and self.y0 <= obj.y1
def vdistance(self, obj): def vdistance(self, obj):
assert isinstance(obj, LayoutItem) assert isinstance(obj, LTItem)
if self.is_voverlap(obj): if self.is_voverlap(obj):
return 0 return 0
else: else:
return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0)) return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
def voverlap(self, obj): def voverlap(self, obj):
assert isinstance(obj, LayoutItem) assert isinstance(obj, LTItem)
if self.is_voverlap(obj): if self.is_voverlap(obj):
return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0)) return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
else: else:
return 0 return 0
## LayoutContainer
##
class LayoutContainer(LayoutItem):
def __init__(self, bbox, objs=None):
LayoutItem.__init__(self, bbox)
if objs:
self.objs = objs[:]
else:
self.objs = []
return
def __repr__(self):
return ('<container %s>' % bbox2str(self.bbox))
def __iter__(self):
return iter(self.objs)
def __len__(self):
return len(self.objs)
def add(self, obj):
self.objs.append(obj)
return
def merge(self, container):
self.objs.extend(container.objs)
return
# fixate(): determines its boundery.
def fixate(self):
if not self.width and self.objs:
(bx0, by0, bx1, by1) = (INF, INF, -INF, -INF)
for obj in self.objs:
bx0 = min(bx0, obj.x0)
by0 = min(by0, obj.y0)
bx1 = max(bx1, obj.x1)
by1 = max(by1, obj.y1)
self.set_bbox((bx0, by0, bx1, by1))
return
## LTPolygon ## LTPolygon
## ##
class LTPolygon(LayoutItem): class LTPolygon(LTItem):
def __init__(self, linewidth, pts): def __init__(self, linewidth, pts):
LayoutItem.__init__(self, get_bounds(pts)) LTItem.__init__(self, get_bounds(pts))
self.pts = pts self.pts = pts
self.linewidth = linewidth self.linewidth = linewidth
return return
@ -196,10 +156,10 @@ class LTRect(LTPolygon):
## LTImage ## LTImage
## ##
class LTImage(LayoutItem): class LTImage(LTItem):
def __init__(self, name, stream, bbox): def __init__(self, name, stream, bbox):
LayoutItem.__init__(self, bbox) LTItem.__init__(self, bbox)
self.name = name self.name = name
self.stream = stream self.stream = stream
self.srcsize = (stream.get_any(('W', 'Width')), self.srcsize = (stream.get_any(('W', 'Width')),
@ -240,7 +200,7 @@ class LTAnon(LTText):
## LTChar ## LTChar
## ##
class LTChar(LayoutItem, LTText): class LTChar(LTItem, LTText):
debug = 0 debug = 0
@ -275,7 +235,7 @@ class LTChar(LayoutItem, LTText):
(_,_,_,_,tx,ty) = self.matrix (_,_,_,_,tx,ty) = self.matrix
ty += descent ty += descent
bbox = (tx, ty, tx+dx, ty+dy) bbox = (tx, ty, tx+dx, ty+dy)
LayoutItem.__init__(self, bbox) LTItem.__init__(self, bbox)
return return
def __repr__(self): def __repr__(self):
@ -297,30 +257,54 @@ class LTChar(LayoutItem, LTText):
return 0 < a*d and b*c <= 0 return 0 < a*d and b*c <= 0
## LTFigure ## LTContainer
## ##
class LTFigure(LayoutContainer): class LTContainer(LTItem):
def __init__(self, name, bbox, matrix): def __init__(self, bbox, objs=None):
(x,y,w,h) = bbox LTItem.__init__(self, bbox)
bbox = get_bounds( apply_matrix_pt(matrix, (p,q)) if objs:
for (p,q) in ((x,y), (x+w,y), (x,y+h), (x+w,y+h)) ) self.objs = objs[:]
self.name = name else:
self.matrix = matrix self.objs = []
LayoutContainer.__init__(self, bbox)
return return
def __repr__(self): def __repr__(self):
return ('<figure %r bbox=%s matrix=%s>' % return ('<container %s>' % bbox2str(self.bbox))
(self.name, bbox2str(self.bbox), matrix2str(self.matrix)))
def __iter__(self):
return iter(self.objs)
def __len__(self):
return len(self.objs)
def add(self, obj):
self.objs.append(obj)
return
def merge(self, container):
self.objs.extend(container.objs)
return
# fixate(): determines its boundery.
def fixate(self):
if not self.width and self.objs:
(bx0, by0, bx1, by1) = (INF, INF, -INF, -INF)
for obj in self.objs:
bx0 = min(bx0, obj.x0)
by0 = min(by0, obj.y0)
bx1 = max(bx1, obj.x1)
by1 = max(by1, obj.y1)
self.set_bbox((bx0, by0, bx1, by1))
return
## LTTextLine ## LTTextLine
## ##
class LTTextLine(LayoutContainer): class LTTextLine(LTContainer):
def __init__(self, objs): def __init__(self, objs):
LayoutContainer.__init__(self, (0,0,0,0), objs) LTContainer.__init__(self, (0,0,0,0), objs)
return return
def __repr__(self): def __repr__(self):
@ -336,7 +320,7 @@ class LTTextLineHorizontal(LTTextLine):
def __init__(self, objs, word_margin): def __init__(self, objs, word_margin):
LTTextLine.__init__(self, objs) LTTextLine.__init__(self, objs)
LayoutContainer.fixate(self) LTContainer.fixate(self)
objs = [] objs = []
x1 = INF x1 = INF
for obj in csort(self.objs, key=lambda obj: obj.x0): for obj in csort(self.objs, key=lambda obj: obj.x0):
@ -357,7 +341,7 @@ class LTTextLineVertical(LTTextLine):
def __init__(self, objs, word_margin): def __init__(self, objs, word_margin):
LTTextLine.__init__(self, objs) LTTextLine.__init__(self, objs)
LayoutContainer.fixate(self) LTContainer.fixate(self)
objs = [] objs = []
y0 = -INF y0 = -INF
for obj in csort(self.objs, key=lambda obj: -obj.y1): for obj in csort(self.objs, key=lambda obj: -obj.y1):
@ -380,10 +364,10 @@ class LTTextLineVertical(LTTextLine):
## A set of text objects that are grouped within ## A set of text objects that are grouped within
## a certain rectangular area. ## a certain rectangular area.
## ##
class LTTextBox(LayoutContainer): class LTTextBox(LTContainer):
def __init__(self, objs): def __init__(self, objs):
LayoutContainer.__init__(self, (0,0,0,0), objs) LTContainer.__init__(self, (0,0,0,0), objs)
self.index = None self.index = None
return return
@ -410,12 +394,12 @@ class LTTextBoxVertical(LTTextBox):
## LTTextGroup ## LTTextGroup
## ##
class LTTextGroup(LayoutContainer): class LTTextGroup(LTContainer):
def __init__(self, objs): def __init__(self, objs):
assert objs assert objs
LayoutContainer.__init__(self, (0,0,0,0), objs) LTContainer.__init__(self, (0,0,0,0), objs)
LayoutContainer.fixate(self) LTContainer.fixate(self)
return return
class LTTextGroupLRTB(LTTextGroup): class LTTextGroupLRTB(LTTextGroup):
@ -456,7 +440,7 @@ class Plane(object):
# place(obj): place an object in a certain area. # place(obj): place an object in a certain area.
def place(self, obj): def place(self, obj):
assert isinstance(obj, LayoutItem) assert isinstance(obj, LTItem)
self.xobjs.append((obj.x0, obj)) self.xobjs.append((obj.x0, obj))
self.xobjs.append((obj.x1, obj)) self.xobjs.append((obj.x1, obj))
self.yobjs.append((obj.y0, obj)) self.yobjs.append((obj.y0, obj))
@ -566,12 +550,30 @@ def group_boxes(groupfunc, objs, distfunc, debug=0):
return objs.pop() return objs.pop()
## LTFigure
##
class LTFigure(LTContainer):
def __init__(self, name, bbox, matrix):
(x,y,w,h) = bbox
bbox = get_bounds( apply_matrix_pt(matrix, (p,q))
for (p,q) in ((x,y), (x+w,y), (x,y+h), (x+w,y+h)) )
self.name = name
self.matrix = matrix
LTContainer.__init__(self, bbox)
return
def __repr__(self):
return ('<figure %r bbox=%s matrix=%s>' %
(self.name, bbox2str(self.bbox), matrix2str(self.matrix)))
## LTPage ## LTPage
## ##
class LTPage(LayoutContainer): class LTPage(LTContainer):
def __init__(self, pageid, bbox, rotate=0): def __init__(self, pageid, bbox, rotate=0):
LayoutContainer.__init__(self, bbox) LTContainer.__init__(self, bbox)
self.pageid = pageid self.pageid = pageid
self.rotate = rotate self.rotate = rotate
self.layout = None self.layout = None
@ -582,7 +584,7 @@ class LTPage(LayoutContainer):
def fixate(self, laparams): def fixate(self, laparams):
"""Perform the layout analysis.""" """Perform the layout analysis."""
LayoutContainer.fixate(self) LTContainer.fixate(self)
(textobjs, otherobjs) = self.get_textobjs() (textobjs, otherobjs) = self.get_textobjs()
if not laparams or not textobjs: return if not laparams or not textobjs: return
if laparams.writing_mode not in ('lr-tb', 'tb-rl'): if laparams.writing_mode not in ('lr-tb', 'tb-rl'):