From b2d13db29ad9504d6bdcce35807b8d4bf3a2228a Mon Sep 17 00:00:00 2001 From: Yusuke Shinyama Date: Mon, 14 Feb 2011 22:51:20 +0900 Subject: [PATCH] code cleanup --- pdfminer/converter.py | 16 +++++++++------- pdfminer/pdfdevice.py | 13 +++++++------ pdfminer/utils.py | 6 +++++- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index d59b34f..aa46ad9 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -17,9 +17,9 @@ class PDFLayoutAnalyzer(PDFTextDevice): def __init__(self, rsrcmgr, pageno=1, laparams=None): PDFTextDevice.__init__(self, rsrcmgr) - self.laparams = laparams self.pageno = pageno - self.stack = [] + self.laparams = laparams + self._stack = [] return def begin_page(self, page, ctm): @@ -31,7 +31,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): return def end_page(self, page): - assert not self.stack + assert not self._stack assert isinstance(self.cur_item, LTPage) if self.laparams is not None: self.cur_item.analyze(self.laparams) @@ -40,7 +40,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): return def begin_figure(self, name, bbox, matrix): - self.stack.append(self.cur_item) + self._stack.append(self.cur_item) self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm)) return @@ -49,7 +49,7 @@ class PDFLayoutAnalyzer(PDFTextDevice): assert isinstance(self.cur_item, LTFigure) if self.laparams is not None: self.cur_item.analyze(self.laparams) - self.cur_item = self.stack.pop() + self.cur_item = self._stack.pop() self.cur_item.add(fig) return @@ -222,6 +222,8 @@ class HTMLConverter(PDFConverter): self.pagemargin = pagemargin self.outdir = outdir self.yoffset = self.pagemargin + self.rect_colors = self.RECT_COLORS + self.text_colors = self.TEXT_COLORS self._font = None self._fontstack = [] self.write_header() @@ -248,7 +250,7 @@ class HTMLConverter(PDFConverter): return def place_rect(self, color, borderwidth, x, y, w, h): - color = self.RECT_COLORS.get(color) + color = self.rect_colors.get(color) if color is not None: self.write('\n' % @@ -272,7 +274,7 @@ class HTMLConverter(PDFConverter): return def place_text(self, color, text, x, y, size): - color = self.TEXT_COLORS.get(color) + color = self.text_colors.get(color) if color is not None: self.write('' % (color, x*self.scale, (self.yoffset-y)*self.scale, size*self.scale*self.fontscale)) diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index e724ff3..5980abd 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -119,12 +119,13 @@ class PDFTextDevice(PDFDevice): ## class TagExtractor(PDFDevice): - def __init__(self, rsrcmgr, outfp, codec='utf-8'): + def __init__(self, rsrcmgr, outfp, codec='utf-8', debug=0): PDFDevice.__init__(self, rsrcmgr) self.outfp = outfp self.codec = codec + self.debug = debug self.pageno = 0 - self.stack = [] + self._stack = [] return def render_string(self, textstate, seq): @@ -158,16 +159,16 @@ class TagExtractor(PDFDevice): s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v) in sorted(props.iteritems()) ) self.outfp.write('<%s%s>' % (enc(tag.name), s)) - self.stack.append(tag) + self._stack.append(tag) return def end_tag(self): - assert self.stack - tag = self.stack.pop(-1) + assert self._stack + tag = self._stack.pop(-1) self.outfp.write('' % enc(tag.name)) return def do_tag(self, tag, props=None): self.begin_tag(tag, props) - self.stack.pop(-1) + self._stack.pop(-1) return diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 627e701..0eee82d 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -191,10 +191,14 @@ class ObjIdRange(object): ## class Plane(object): - def __init__(self, objs): + def __init__(self, objs=None): self._idxs = {} self._xobjs = [] self._yobjs = [] + if objs is not None: + for obj in objs: + self.add(obj) + self.finish() return def __repr__(self):