more bugfixes.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@194 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-03-23 10:29:52 +00:00
parent ee34d8d549
commit e536b3ef11
3 changed files with 15 additions and 5 deletions

View File

@ -19,7 +19,7 @@ Python PDF parser and analyzer
<div align=right class=lastmod>
<!-- hhmts start -->
Last Modified: Mon Mar 22 06:16:28 UTC 2010
Last Modified: Tue Mar 23 10:29:24 UTC 2010
<!-- hhmts end -->
</div>
@ -335,17 +335,18 @@ no stream header is displayed for the ease of saving it to a file.
<ul>
<li> <A href="http://www.python.org/dev/peps/pep-0008/">PEP-8</a> and
<a href="http://www.python.org/dev/peps/pep-0257/">PEP-257</a> conformance.
<li> Better documentation.
<li> Better text extraction / layout analysis.
<li> Better API Documentation.
<li> Robust error handling.
<li> Crypt stream filter support. (More sample documents are needed!)
<li> CCITTFax stream filter support.
<li> Robust error handling.
</ul>
<a name="changes"></a>
<hr noshade>
<h2>Changes</h2>
<ul>
<li> 2010/03/xx: Bugfixes. Thanks to Brian Berry and Lubos Pintes.
<li> 2010/03/22: Improved layout analysis. Added regression tests.
<li> 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield.
<li> 2010/02/27: Changed the way of internal layout handling. (LTTextItem -&gt; LTChar)

View File

@ -361,7 +361,7 @@ class PDFDocument(object):
self._initialized = True
return
(docid, param) = self.encryption
if literal_name(param['Filter']) != 'Standard':
if literal_name(param.get('Filter')) != 'Standard':
raise PDFEncryptionError('Unknown filter: param=%r' % param)
V = int_value(param.get('V', 0))
if not (V == 1 or V == 2):
@ -439,6 +439,7 @@ class PDFDocument(object):
else:
if STRICT:
raise PDFSyntaxError('Cannot locate objid=%r' % objid)
# return null for a nonexistent reference.
return None
if strmid:
stream = stream_value(self.getobj(strmid))
@ -588,6 +589,7 @@ class PDFParser(PSStackParser):
return
KEYWORD_R = KWD('R')
KEYWORD_NULL = KWD('null')
KEYWORD_ENDOBJ = KWD('endobj')
KEYWORD_STREAM = KWD('stream')
KEYWORD_XREF = KWD('xref')
@ -596,10 +598,16 @@ class PDFParser(PSStackParser):
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
self.add_results(*self.pop(1))
return
if token is self.KEYWORD_ENDOBJ:
self.add_results(*self.pop(4))
return
if token is self.KEYWORD_NULL:
# null object
self.push((pos, None))
return
if token is self.KEYWORD_R:
# reference to indirect object
try:

View File

@ -537,7 +537,8 @@ class PSStackParser(PSBaseParser):
(pos, objs) = self.end_type('d')
if len(objs) % 2 != 0:
raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs))
# construct a Python dictionary.
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) if v is not None )
self.push((pos, d))
except PSTypeError:
if STRICT: raise