From 4eb6083c0966f3fd753639ffc9941c39af806080 Mon Sep 17 00:00:00 2001 From: Yusuke Shinyama Date: Mon, 3 Jan 2011 18:11:22 +0900 Subject: [PATCH] code cleanup --- pdfminer/converter.py | 62 ++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 0de43cb..d59b34f 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -257,6 +257,10 @@ class HTMLConverter(PDFConverter): w*self.scale, h*self.scale)) return + def place_border(self, color, borderwidth, item): + self.place_rect(color, borderwidth, item.x0, item.y1, item.width, item.height) + return + def place_image(self, item, borderwidth, x, y, w, h): if self.outdir is not None: name = self.write_image(item) @@ -309,20 +313,28 @@ class HTMLConverter(PDFConverter): return def receive_layout(self, ltpage): + def show_layout(item): + if isinstance(item, LTTextGroup): + self.place_border('textgroup', 1, item) + for child in item: + show_layout(child) + return def render(item): if isinstance(item, LTPage): self.yoffset += item.y1 - self.place_rect('page', 1, item.x0, item.y1, item.width, item.height) + self.place_border('page', 1, item) if self.showpageno: self.write('
' % ((self.yoffset-item.y1)*self.scale)) self.write('Page %s
\n' % (item.pageid, item.pageid)) for child in item: render(child) + if item.layout: + show_layout(item.layout) elif isinstance(item, LTPolygon): - self.place_rect('polygon', 1, item.x0, item.y1, item.width, item.height) + self.place_border('polygon', 1, item) elif isinstance(item, LTFigure): - self.place_rect('figure', 1, item.x0, item.y1, item.width, item.height) + self.place_border('figure', 1, item) for child in item: render(child) elif isinstance(item, LTImage): @@ -330,16 +342,16 @@ class HTMLConverter(PDFConverter): else: if self.layoutmode == 'exact': if isinstance(item, LTTextLine): - self.place_rect('textline', 1, item.x0, item.y1, item.width, item.height) + self.place_border('textline', 1, item) for child in item: render(child) elif isinstance(item, LTTextBox): - self.place_rect('textbox', 1, item.x0, item.y1, item.width, item.height) + self.place_border('textbox', 1, item) self.place_text('textbox', str(item.index+1), item.x0, item.y1, 20) for child in item: render(child) elif isinstance(item, LTChar): - self.place_rect('char', 1, item.x0, item.y1, item.width, item.height) + self.place_border('char', 1, item) self.place_text('char', item.text, item.x0, item.y1, item.size) else: if isinstance(item, LTTextLine): @@ -359,14 +371,6 @@ class HTMLConverter(PDFConverter): self.write_text(item.text) return render(ltpage) - if ltpage.layout: - def show_layout(item): - if isinstance(item, LTTextGroup): - self.place_rect('textgroup', 1, item.x0, item.y1, item.width, item.height) - for child in item: - show_layout(child) - return - show_layout(ltpage.layout) self.yoffset += self.pagemargin return @@ -399,12 +403,26 @@ class XMLConverter(PDFConverter): return def receive_layout(self, ltpage): + def show_layout(item): + if isinstance(item, LTTextBox): + self.outfp.write('\n' % + (item.index, bbox2str(item.bbox))) + elif isinstance(item, LTTextGroup): + self.outfp.write('\n' % bbox2str(item.bbox)) + for child in item: + show_layout(child) + self.outfp.write('\n') + return def render(item): if isinstance(item, LTPage): self.outfp.write('\n' % (item.pageid, bbox2str(item.bbox), item.rotate)) for child in item: render(child) + if item.layout: + self.outfp.write('\n') + show_layout(item.layout) + self.outfp.write('\n') self.outfp.write('\n') elif isinstance(item, LTLine): self.outfp.write('\n' % @@ -430,7 +448,8 @@ class XMLConverter(PDFConverter): wmode = '' if isinstance(item, LTTextBoxVertical): wmode = ' wmode="vertical"' - self.outfp.write('\n' % (item.index, bbox2str(item.bbox), wmode)) + self.outfp.write('\n' % + (item.index, bbox2str(item.bbox), wmode)) for child in item: render(child) self.outfp.write('\n') @@ -452,20 +471,7 @@ class XMLConverter(PDFConverter): else: assert 0, item return - def show_layout(item): - if isinstance(item, LTTextBox): - self.outfp.write('\n' % (item.index, bbox2str(item.bbox))) - elif isinstance(item, LTTextGroup): - self.outfp.write('\n' % bbox2str(item.bbox)) - for child in item: - show_layout(child) - self.outfp.write('\n') - return render(ltpage) - if ltpage.layout: - self.outfp.write('\n') - show_layout(ltpage.layout) - self.outfp.write('\n') return def close(self):