From 3f831c81043c888dafeefc820309beb21982cfb5 Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Sun, 13 Jun 2010 04:02:30 +0000 Subject: [PATCH] bugfixes. thanks to Jakub Wilk git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@226 1aa58f4a-7d42-0410-adbc-911cccaed67c --- pdfminer/layout.py | 5 +++-- pdfminer/pdfdevice.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pdfminer/layout.py b/pdfminer/layout.py index 79e8f9d..1b756bb 100644 --- a/pdfminer/layout.py +++ b/pdfminer/layout.py @@ -528,8 +528,9 @@ def group_lines(groupfunc, objs, findfunc, debug=0): ## group_boxes ## -def group_boxes(groupfunc, objs, distfunc, debug=0): - assert objs +def group_boxes(groupfunc, objs0, distfunc, debug=0): + assert objs0 + objs = objs0[:] while 2 <= len(objs): mindist = INF minpair = None diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 2e3c347..e3af5fb 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import sys -from utils import mult_matrix -from utils import translate_matrix +from utils import mult_matrix, translate_matrix +from utils import enc, bbox2str from pdffont import PDFUnicodeNotDefined @@ -129,7 +129,7 @@ class TagExtractor(PDFDevice): self.outfp = outfp self.codec = codec self.pageno = 0 - self.tag = None + self.stack = [] return def render_string(self, textstate, seq): @@ -163,16 +163,16 @@ class TagExtractor(PDFDevice): s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v) in sorted(props.iteritems()) ) self.outfp.write('<%s%s>' % (enc(tag.name), s)) - self.tag = tag + self.stack.append(tag) return def end_tag(self): - assert self.tag - self.outfp.write('' % enc(self.tag.name)) - self.tag = None + assert self.stack + tag = self.stack.pop(-1) + self.outfp.write('' % enc(tag.name)) return def do_tag(self, tag, props=None): self.begin_tag(tag, props) - self.tag = None + self.stack.pop(-1) return