minor bugfix
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@47 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
f95e0c15d2
commit
79f425b164
13
README.html
13
README.html
|
@ -9,9 +9,12 @@ blockquote { background: #eeeeee; }
|
||||||
</head><body>
|
</head><body>
|
||||||
|
|
||||||
<h1>PDFMiner</h1>
|
<h1>PDFMiner</h1>
|
||||||
|
<p>
|
||||||
|
Python PDF parser and analyzer
|
||||||
|
|
||||||
<div align=right class=lastmod>
|
<div align=right class=lastmod>
|
||||||
<!-- hhmts start -->
|
<!-- hhmts start -->
|
||||||
Last Modified: Sun Jul 27 13:29:39 JST 2008
|
Last Modified: Tue Jul 29 21:34:29 JST 2008
|
||||||
<!-- hhmts end -->
|
<!-- hhmts end -->
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -32,9 +35,9 @@ which could be useful for analyzing the document.
|
||||||
<strong>Features:</strong>
|
<strong>Features:</strong>
|
||||||
<ul>
|
<ul>
|
||||||
<li> Written entirely in Python. (for version 2.5 or newer)
|
<li> Written entirely in Python. (for version 2.5 or newer)
|
||||||
<li> PDF-1.7 specification.
|
<li> PDF-1.7 specification support.
|
||||||
<li> Non-ASCII languages and vertical writing scripts.
|
<li> Non-ASCII languages and vertical writing scripts support.
|
||||||
<li> Various font types (Type1, TrueType, Type3, and CID).
|
<li> Various font types (Type1, TrueType, Type3, and CID) support.
|
||||||
<li> Basic encryption (RC4).
|
<li> Basic encryption (RC4).
|
||||||
<li> PDF to HTML conversion.
|
<li> PDF to HTML conversion.
|
||||||
<li> Outline (TOC) extraction.
|
<li> Outline (TOC) extraction.
|
||||||
|
@ -129,7 +132,7 @@ it cannot extract texts embedded within images
|
||||||
You can provide a password for protected PDF documents
|
You can provide a password for protected PDF documents
|
||||||
whose access is limited.
|
whose access is limited.
|
||||||
<p>
|
<p>
|
||||||
For non-ASCII languages, you can speficy the output encoding
|
For non-ASCII languages, you can specify the output encoding
|
||||||
(such as UTF-8).
|
(such as UTF-8).
|
||||||
Note that not all characters in a PDF can be converted safely
|
Note that not all characters in a PDF can be converted safely
|
||||||
to Unicode, as some of them are not included in the current
|
to Unicode, as some of them are not included in the current
|
||||||
|
|
|
@ -981,13 +981,13 @@ class PDFPageInterpreter(object):
|
||||||
subtype = xobj.dic.get('Subtype')
|
subtype = xobj.dic.get('Subtype')
|
||||||
if subtype == LITERAL_FORM and 'BBox' in xobj.dic:
|
if subtype == LITERAL_FORM and 'BBox' in xobj.dic:
|
||||||
interpreter = self.dup()
|
interpreter = self.dup()
|
||||||
(x0,y0,x1,y1) = xobj.dic['BBox']
|
(x0,y0,x1,y1) = list_value(xobj.dic['BBox'])
|
||||||
ctm = mult_matrix(xobj.dic.get('Matrix', MATRIX_IDENTITY), self.ctm)
|
ctm = mult_matrix(list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY)), self.ctm)
|
||||||
(x0,y0) = apply_matrix(ctm, (x0,y0))
|
(x0,y0) = apply_matrix(ctm, (x0,y0))
|
||||||
(x1,y1) = apply_matrix(ctm, (x1,y1))
|
(x1,y1) = apply_matrix(ctm, (x1,y1))
|
||||||
bbox = (x0,y0,x1,y1)
|
bbox = (x0,y0,x1,y1)
|
||||||
self.device.begin_figure(xobjid, bbox)
|
self.device.begin_figure(xobjid, bbox)
|
||||||
interpreter.render_contents(xobj.dic.get('Resources'), [xobj], ctm=ctm)
|
interpreter.render_contents(dict_value(xobj.dic.get('Resources')), [xobj], ctm=ctm)
|
||||||
self.device.end_figure(xobjid)
|
self.device.end_figure(xobjid)
|
||||||
elif subtype == LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
|
elif subtype == LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
|
||||||
(x0,y0) = apply_matrix(self.ctm, (0,0))
|
(x0,y0) = apply_matrix(self.ctm, (0,0))
|
||||||
|
|
Loading…
Reference in New Issue