more bugfixes.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@194 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-03-23 10:29:52 +00:00
parent ee34d8d549
commit e536b3ef11
3 changed files with 15 additions and 5 deletions

View File

@ -19,7 +19,7 @@ Python PDF parser and analyzer
<div align=right class=lastmod> <div align=right class=lastmod>
<!-- hhmts start --> <!-- hhmts start -->
Last Modified: Mon Mar 22 06:16:28 UTC 2010 Last Modified: Tue Mar 23 10:29:24 UTC 2010
<!-- hhmts end --> <!-- hhmts end -->
</div> </div>
@ -335,17 +335,18 @@ no stream header is displayed for the ease of saving it to a file.
<ul> <ul>
<li> <A href="http://www.python.org/dev/peps/pep-0008/">PEP-8</a> and <li> <A href="http://www.python.org/dev/peps/pep-0008/">PEP-8</a> and
<a href="http://www.python.org/dev/peps/pep-0257/">PEP-257</a> conformance. <a href="http://www.python.org/dev/peps/pep-0257/">PEP-257</a> conformance.
<li> Better documentation.
<li> Better text extraction / layout analysis. <li> Better text extraction / layout analysis.
<li> Better API Documentation. <li> Robust error handling.
<li> Crypt stream filter support. (More sample documents are needed!) <li> Crypt stream filter support. (More sample documents are needed!)
<li> CCITTFax stream filter support. <li> CCITTFax stream filter support.
<li> Robust error handling.
</ul> </ul>
<a name="changes"></a> <a name="changes"></a>
<hr noshade> <hr noshade>
<h2>Changes</h2> <h2>Changes</h2>
<ul> <ul>
<li> 2010/03/xx: Bugfixes. Thanks to Brian Berry and Lubos Pintes.
<li> 2010/03/22: Improved layout analysis. Added regression tests. <li> 2010/03/22: Improved layout analysis. Added regression tests.
<li> 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield. <li> 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield.
<li> 2010/02/27: Changed the way of internal layout handling. (LTTextItem -&gt; LTChar) <li> 2010/02/27: Changed the way of internal layout handling. (LTTextItem -&gt; LTChar)

View File

@ -361,7 +361,7 @@ class PDFDocument(object):
self._initialized = True self._initialized = True
return return
(docid, param) = self.encryption (docid, param) = self.encryption
if literal_name(param['Filter']) != 'Standard': if literal_name(param.get('Filter')) != 'Standard':
raise PDFEncryptionError('Unknown filter: param=%r' % param) raise PDFEncryptionError('Unknown filter: param=%r' % param)
V = int_value(param.get('V', 0)) V = int_value(param.get('V', 0))
if not (V == 1 or V == 2): if not (V == 1 or V == 2):
@ -439,6 +439,7 @@ class PDFDocument(object):
else: else:
if STRICT: if STRICT:
raise PDFSyntaxError('Cannot locate objid=%r' % objid) raise PDFSyntaxError('Cannot locate objid=%r' % objid)
# return null for a nonexistent reference.
return None return None
if strmid: if strmid:
stream = stream_value(self.getobj(strmid)) stream = stream_value(self.getobj(strmid))
@ -588,6 +589,7 @@ class PDFParser(PSStackParser):
return return
KEYWORD_R = KWD('R') KEYWORD_R = KWD('R')
KEYWORD_NULL = KWD('null')
KEYWORD_ENDOBJ = KWD('endobj') KEYWORD_ENDOBJ = KWD('endobj')
KEYWORD_STREAM = KWD('stream') KEYWORD_STREAM = KWD('stream')
KEYWORD_XREF = KWD('xref') KEYWORD_XREF = KWD('xref')
@ -596,10 +598,16 @@ class PDFParser(PSStackParser):
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
self.add_results(*self.pop(1)) self.add_results(*self.pop(1))
return return
if token is self.KEYWORD_ENDOBJ: if token is self.KEYWORD_ENDOBJ:
self.add_results(*self.pop(4)) self.add_results(*self.pop(4))
return return
if token is self.KEYWORD_NULL:
# null object
self.push((pos, None))
return
if token is self.KEYWORD_R: if token is self.KEYWORD_R:
# reference to indirect object # reference to indirect object
try: try:

View File

@ -537,7 +537,8 @@ class PSStackParser(PSBaseParser):
(pos, objs) = self.end_type('d') (pos, objs) = self.end_type('d')
if len(objs) % 2 != 0: if len(objs) % 2 != 0:
raise PSSyntaxError('Invalid dictionary construct: %r' % objs) raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs)) # construct a Python dictionary.
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) if v is not None )
self.push((pos, d)) self.push((pos, d))
except PSTypeError: except PSTypeError:
if STRICT: raise if STRICT: raise