bugfixes. thanks to Jakub Wilk

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@226 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-06-13 04:02:30 +00:00
parent 5181f265ce
commit 3f831c8104
2 changed files with 11 additions and 10 deletions

View File

@ -528,8 +528,9 @@ def group_lines(groupfunc, objs, findfunc, debug=0):
## group_boxes
##
def group_boxes(groupfunc, objs, distfunc, debug=0):
assert objs
def group_boxes(groupfunc, objs0, distfunc, debug=0):
assert objs0
objs = objs0[:]
while 2 <= len(objs):
mindist = INF
minpair = None

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
import sys
from utils import mult_matrix
from utils import translate_matrix
from utils import mult_matrix, translate_matrix
from utils import enc, bbox2str
from pdffont import PDFUnicodeNotDefined
@ -129,7 +129,7 @@ class TagExtractor(PDFDevice):
self.outfp = outfp
self.codec = codec
self.pageno = 0
self.tag = None
self.stack = []
return
def render_string(self, textstate, seq):
@ -163,16 +163,16 @@ class TagExtractor(PDFDevice):
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
in sorted(props.iteritems()) )
self.outfp.write('<%s%s>' % (enc(tag.name), s))
self.tag = tag
self.stack.append(tag)
return
def end_tag(self):
assert self.tag
self.outfp.write('</%s>' % enc(self.tag.name))
self.tag = None
assert self.stack
tag = self.stack.pop(-1)
self.outfp.write('</%s>' % enc(tag.name))
return
def do_tag(self, tag, props=None):
self.begin_tag(tag, props)
self.tag = None
self.stack.pop(-1)
return