minor bugfix

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@47 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2008-07-29 15:02:20 +00:00
parent f95e0c15d2
commit 79f425b164
2 changed files with 11 additions and 8 deletions

View File

@ -9,9 +9,12 @@ blockquote { background: #eeeeee; }
</head><body> </head><body>
<h1>PDFMiner</h1> <h1>PDFMiner</h1>
<p>
Python PDF parser and analyzer
<div align=right class=lastmod> <div align=right class=lastmod>
<!-- hhmts start --> <!-- hhmts start -->
Last Modified: Sun Jul 27 13:29:39 JST 2008 Last Modified: Tue Jul 29 21:34:29 JST 2008
<!-- hhmts end --> <!-- hhmts end -->
</div> </div>
@ -32,9 +35,9 @@ which could be useful for analyzing the document.
<strong>Features:</strong> <strong>Features:</strong>
<ul> <ul>
<li> Written entirely in Python. (for version 2.5 or newer) <li> Written entirely in Python. (for version 2.5 or newer)
<li> PDF-1.7 specification. <li> PDF-1.7 specification support.
<li> Non-ASCII languages and vertical writing scripts. <li> Non-ASCII languages and vertical writing scripts support.
<li> Various font types (Type1, TrueType, Type3, and CID). <li> Various font types (Type1, TrueType, Type3, and CID) support.
<li> Basic encryption (RC4). <li> Basic encryption (RC4).
<li> PDF to HTML conversion. <li> PDF to HTML conversion.
<li> Outline (TOC) extraction. <li> Outline (TOC) extraction.
@ -129,7 +132,7 @@ it cannot extract texts embedded within images
You can provide a password for protected PDF documents You can provide a password for protected PDF documents
whose access is limited. whose access is limited.
<p> <p>
For non-ASCII languages, you can speficy the output encoding For non-ASCII languages, you can specify the output encoding
(such as UTF-8). (such as UTF-8).
Note that not all characters in a PDF can be converted safely Note that not all characters in a PDF can be converted safely
to Unicode, as some of them are not included in the current to Unicode, as some of them are not included in the current

View File

@ -981,13 +981,13 @@ class PDFPageInterpreter(object):
subtype = xobj.dic.get('Subtype') subtype = xobj.dic.get('Subtype')
if subtype == LITERAL_FORM and 'BBox' in xobj.dic: if subtype == LITERAL_FORM and 'BBox' in xobj.dic:
interpreter = self.dup() interpreter = self.dup()
(x0,y0,x1,y1) = xobj.dic['BBox'] (x0,y0,x1,y1) = list_value(xobj.dic['BBox'])
ctm = mult_matrix(xobj.dic.get('Matrix', MATRIX_IDENTITY), self.ctm) ctm = mult_matrix(list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY)), self.ctm)
(x0,y0) = apply_matrix(ctm, (x0,y0)) (x0,y0) = apply_matrix(ctm, (x0,y0))
(x1,y1) = apply_matrix(ctm, (x1,y1)) (x1,y1) = apply_matrix(ctm, (x1,y1))
bbox = (x0,y0,x1,y1) bbox = (x0,y0,x1,y1)
self.device.begin_figure(xobjid, bbox) self.device.begin_figure(xobjid, bbox)
interpreter.render_contents(xobj.dic.get('Resources'), [xobj], ctm=ctm) interpreter.render_contents(dict_value(xobj.dic.get('Resources')), [xobj], ctm=ctm)
self.device.end_figure(xobjid) self.device.end_figure(xobjid)
elif subtype == LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic: elif subtype == LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
(x0,y0) = apply_matrix(self.ctm, (0,0)) (x0,y0) = apply_matrix(self.ctm, (0,0))