bugfixes. thanks to Jakub Wilk
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@226 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
5181f265ce
commit
3f831c8104
|
@ -528,8 +528,9 @@ def group_lines(groupfunc, objs, findfunc, debug=0):
|
||||||
|
|
||||||
## group_boxes
|
## group_boxes
|
||||||
##
|
##
|
||||||
def group_boxes(groupfunc, objs, distfunc, debug=0):
|
def group_boxes(groupfunc, objs0, distfunc, debug=0):
|
||||||
assert objs
|
assert objs0
|
||||||
|
objs = objs0[:]
|
||||||
while 2 <= len(objs):
|
while 2 <= len(objs):
|
||||||
mindist = INF
|
mindist = INF
|
||||||
minpair = None
|
minpair = None
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys
|
import sys
|
||||||
from utils import mult_matrix
|
from utils import mult_matrix, translate_matrix
|
||||||
from utils import translate_matrix
|
from utils import enc, bbox2str
|
||||||
from pdffont import PDFUnicodeNotDefined
|
from pdffont import PDFUnicodeNotDefined
|
||||||
|
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ class TagExtractor(PDFDevice):
|
||||||
self.outfp = outfp
|
self.outfp = outfp
|
||||||
self.codec = codec
|
self.codec = codec
|
||||||
self.pageno = 0
|
self.pageno = 0
|
||||||
self.tag = None
|
self.stack = []
|
||||||
return
|
return
|
||||||
|
|
||||||
def render_string(self, textstate, seq):
|
def render_string(self, textstate, seq):
|
||||||
|
@ -163,16 +163,16 @@ class TagExtractor(PDFDevice):
|
||||||
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
|
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
|
||||||
in sorted(props.iteritems()) )
|
in sorted(props.iteritems()) )
|
||||||
self.outfp.write('<%s%s>' % (enc(tag.name), s))
|
self.outfp.write('<%s%s>' % (enc(tag.name), s))
|
||||||
self.tag = tag
|
self.stack.append(tag)
|
||||||
return
|
return
|
||||||
|
|
||||||
def end_tag(self):
|
def end_tag(self):
|
||||||
assert self.tag
|
assert self.stack
|
||||||
self.outfp.write('</%s>' % enc(self.tag.name))
|
tag = self.stack.pop(-1)
|
||||||
self.tag = None
|
self.outfp.write('</%s>' % enc(tag.name))
|
||||||
return
|
return
|
||||||
|
|
||||||
def do_tag(self, tag, props=None):
|
def do_tag(self, tag, props=None):
|
||||||
self.begin_tag(tag, props)
|
self.begin_tag(tag, props)
|
||||||
self.tag = None
|
self.stack.pop(-1)
|
||||||
return
|
return
|
||||||
|
|
Loading…
Reference in New Issue