git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@126 1aa58f4a-7d42-0410-adbc-911cccaed67c

pull/1/head
yusuke.shinyama.dummy 2009-08-24 06:54:28 +00:00
parent 5306109a0a
commit c813854ca2
3 changed files with 15 additions and 7 deletions

View File

@ -18,7 +18,7 @@ Python PDF parser and analyzer
<div align=right class=lastmod> <div align=right class=lastmod>
<!-- hhmts start --> <!-- hhmts start -->
Last Modified: Tue Jul 21 23:22:42 JST 2009 Last Modified: Mon Aug 24 15:53:58 JST 2009
<!-- hhmts end --> <!-- hhmts end -->
</div> </div>
@ -319,6 +319,7 @@ no stream header is displayed for the ease of saving it to a file.
<hr noshade> <hr noshade>
<h2>Changes</h2> <h2>Changes</h2>
<ul> <ul>
<li> 2009/08/24: Fixed a bug in character placing.
<li> 2009/07/21: Improvement in layout analysis. <li> 2009/07/21: Improvement in layout analysis.
<li> 2009/07/11: Improvement in layout analysis. Thanks to Lubos Pintes. <li> 2009/07/11: Improvement in layout analysis. Thanks to Lubos Pintes.
<li> 2009/05/17: Bugfixes, massive code restructuring, and simple graphic element support added. setup.py is supported. <li> 2009/05/17: Bugfixes, massive code restructuring, and simple graphic element support added. setup.py is supported.

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
__version__ = '20090721' __version__ = '20090824'
if __name__ == '__main__': print __version__ if __name__ == '__main__': print __version__

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from pdfminer.utils import mult_matrix, translate_matrix from pdfminer.utils import mult_matrix, translate_matrix, apply_matrix_norm
from pdfminer.pdffont import PDFUnicodeNotDefined
## PDFDevice ## PDFDevice
@ -64,17 +65,20 @@ class PDFTextDevice(PDFDevice):
matrix = mult_matrix(textstate.matrix, self.ctm) matrix = mult_matrix(textstate.matrix, self.ctm)
font = textstate.font font = textstate.font
fontsize = textstate.fontsize fontsize = textstate.fontsize
charspace = textstate.charspace
scaling = textstate.scaling * .01 scaling = textstate.scaling * .01
charspace = textstate.charspace * scaling
wordspace = textstate.wordspace * scaling wordspace = textstate.wordspace * scaling
dxscale = scaling / (font.hscale*1000) * .01 dxscale = .001 * fontsize * scaling
chars = [] chars = []
(x,y) = textstate.linematrix (x,y) = textstate.linematrix
for obj in seq: for obj in seq:
if isinstance(obj, int) or isinstance(obj, float): if isinstance(obj, int) or isinstance(obj, float):
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font, (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars) fontsize, charspace, scaling, chars)
x += dx-obj*dxscale x += dx
y += dy
(dx,dy) = apply_matrix_norm(matrix, (-obj*dxscale,0))
x += dx
y += dy y += dy
chars = [] chars = []
else: else:
@ -88,7 +92,10 @@ class PDFTextDevice(PDFDevice):
if cid == 32 and textstate.wordspace and not font.is_multibyte(): if cid == 32 and textstate.wordspace and not font.is_multibyte():
(dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font, (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
fontsize, charspace, scaling, chars) fontsize, charspace, scaling, chars)
x += dx + wordspace x += dx
y += dy
(dx,dy) = apply_matrix_norm(matrix, (wordspace,0))
x += dx
y += dy y += dy
chars = [] chars = []
if chars: if chars: