diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 4a64dbe..6264750 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -10,9 +10,9 @@ from utils import apply_matrix_pt, mult_matrix from utils import enc, bbox2str, create_bmp -## PDFPageAggregator +## PDFLayoutAnalyzer ## -class PDFPageAggregator(PDFTextDevice): +class PDFLayoutAnalyzer(PDFTextDevice): def __init__(self, rsrcmgr, pageno=1, laparams=None): PDFTextDevice.__init__(self, rsrcmgr) @@ -29,13 +29,14 @@ class PDFPageAggregator(PDFTextDevice): self.cur_item = LTPage(self.pageno, mediabox) return - def end_page(self, _): + def end_page(self, page): assert not self.stack assert isinstance(self.cur_item, LTPage) self.cur_item.fixate() self.cur_item.analyze(self.laparams) self.pageno += 1 - return self.cur_item + self.receive_layout(self.cur_item) + return def begin_figure(self, name, bbox, matrix): self.stack.append(self.cur_item) @@ -95,13 +96,33 @@ class PDFPageAggregator(PDFTextDevice): self.cur_item.add(item) return item.adv + def receive_layout(self, ltpage): + return + + +## PDFPageAggregator +## +class PDFPageAggregator(PDFLayoutAnalyzer): + + def __init__(self, rsrcmgr, pageno=1, laparams=None): + PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) + self.result = None + return + + def receive_layout(self, ltpage): + self.result = ltpage + return + + def get_result(self): + return self.result + ## PDFConverter ## -class PDFConverter(PDFPageAggregator): +class PDFConverter(PDFLayoutAnalyzer): def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None): - PDFPageAggregator.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) + PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) self.outfp = outfp self.codec = codec return @@ -148,7 +169,7 @@ class TextConverter(PDFConverter): self.outfp.write(text.encode(self.codec, 'ignore')) return - def end_page(self, page): + def receive_layout(self, ltpage): def render(item): if isinstance(item, LTText): self.write(item.text) @@ -157,10 +178,9 @@ class TextConverter(PDFConverter): render(child) if isinstance(item, LTTextBox): self.write('\n') - page = PDFConverter.end_page(self, page) if self.showpageno: - self.write('Page %s\n' % page.pageid) - render(page) + self.write('Page %s\n' % ltpage.pageid) + render(ltpage) self.write('\f') return @@ -198,7 +218,7 @@ class HTMLConverter(PDFConverter): self.outfp.write('\n') return - def end_page(self, page): + def receive_layout(self, ltpage): def render(item): if isinstance(item, LTPage): self.yoffset += item.y1 @@ -206,7 +226,7 @@ class HTMLConverter(PDFConverter): if self.showpageno: self.outfp.write('