diff --git a/docs/index.html b/docs/index.html
index b119389..bf81c2e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Last Modified: Mon Feb 15 14:41:49 UTC 2010
+Last Modified: Sat Feb 27 03:58:45 UTC 2010
@@ -348,7 +348,8 @@ no stream header is displayed for the ease of saving it to a file.
Changes
-- 2010/02/15: Bugfixes. Thanks to Sean.
+
- 2010/02/27: Changed the way of internal layout handling. (LTTextItem -> LTChar)
+
- 2010/02/15: Several bugfixes. Thanks to Sean.
- 2010/02/13: Bugfix and enhancement. Thanks to André Auzi.
- 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
- 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index 973d3c6..170387d 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -5,9 +5,9 @@ from pdffont import PDFUnicodeNotDefined
from pdftypes import LITERALS_DCT_DECODE
from layout import LayoutContainer
from layout import LTPage, LTText, LTLine, LTRect, LTPolygon
-from layout import LTFigure, LTImage, LTTextItem, LTTextBox, LTTextLine
+from layout import LTFigure, LTImage, LTChar, LTTextBox, LTTextLine
from utils import apply_matrix_pt, mult_matrix
-from utils import enc, strbbox
+from utils import enc, bbox2str
## PDFPageAggregator
@@ -97,9 +97,8 @@ class PDFPageAggregator(PDFTextDevice):
self.cur_item.add(LTPolygon(gstate.linewidth, pts))
return
- def render_chars(self, matrix, font, fontsize, charspace, scaling, chars):
- if not chars: return (0, 0)
- item = LTTextItem(matrix, font, fontsize, charspace, scaling, chars)
+ def render_char(self, matrix, font, fontsize, scaling, cid):
+ item = LTChar(matrix, font, fontsize, scaling, cid)
self.cur_item.add(item)
return item.adv
@@ -202,15 +201,10 @@ class HTMLConverter(PDFConverter):
self.outfp.write('Page %s\n' % (page.id, page.id))
for child in item:
render(child)
- elif isinstance(item, LTTextItem):
- if item.vertical:
- wmode = 'tb-rl'
- else:
- wmode = 'lr-tb'
- self.outfp.write('' %
- (wmode, item.x0*self.scale, (self.yoffset-item.y1)*self.scale,
- item.fontsize*self.scale))
+ elif isinstance(item, LTChar):
+ self.outfp.write('' %
+ (item.x0*self.scale, (self.yoffset-item.y1)*self.scale,
+ item.get_size()*self.scale))
self.write(item.text)
self.outfp.write('\n')
if self.debug:
@@ -271,35 +265,40 @@ class XMLConverter(PDFConverter):
def render(item):
if isinstance(item, LTPage):
self.outfp.write('\n' %
- (item.id, strbbox(item.bbox), item.rotate))
+ (item.id, bbox2str(item.bbox), item.rotate))
for child in item:
render(child)
self.outfp.write('\n')
elif isinstance(item, LTLine) and item.direction:
- self.outfp.write('\n' % (item.linewidth, item.direction, strbbox(item.bbox)))
+ self.outfp.write('\n' %
+ (item.linewidth, item.direction, bbox2str(item.bbox)))
elif isinstance(item, LTRect):
- self.outfp.write('\n' % (item.linewidth, strbbox(item.bbox)))
+ self.outfp.write('\n' %
+ (item.linewidth, bbox2str(item.bbox)))
elif isinstance(item, LTPolygon):
- self.outfp.write('\n' % (item.linewidth, strbbox(item.bbox), item.get_pts()))
+ self.outfp.write('\n' %
+ (item.linewidth, bbox2str(item.bbox), item.get_pts()))
elif isinstance(item, LTFigure):
- self.outfp.write('