more bugfixes.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@194 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
ee34d8d549
commit
e536b3ef11
|
@ -19,7 +19,7 @@ Python PDF parser and analyzer
|
|||
|
||||
<div align=right class=lastmod>
|
||||
<!-- hhmts start -->
|
||||
Last Modified: Mon Mar 22 06:16:28 UTC 2010
|
||||
Last Modified: Tue Mar 23 10:29:24 UTC 2010
|
||||
<!-- hhmts end -->
|
||||
</div>
|
||||
|
||||
|
@ -335,17 +335,18 @@ no stream header is displayed for the ease of saving it to a file.
|
|||
<ul>
|
||||
<li> <A href="http://www.python.org/dev/peps/pep-0008/">PEP-8</a> and
|
||||
<a href="http://www.python.org/dev/peps/pep-0257/">PEP-257</a> conformance.
|
||||
<li> Better documentation.
|
||||
<li> Better text extraction / layout analysis.
|
||||
<li> Better API Documentation.
|
||||
<li> Robust error handling.
|
||||
<li> Crypt stream filter support. (More sample documents are needed!)
|
||||
<li> CCITTFax stream filter support.
|
||||
<li> Robust error handling.
|
||||
</ul>
|
||||
|
||||
<a name="changes"></a>
|
||||
<hr noshade>
|
||||
<h2>Changes</h2>
|
||||
<ul>
|
||||
<li> 2010/03/xx: Bugfixes. Thanks to Brian Berry and Lubos Pintes.
|
||||
<li> 2010/03/22: Improved layout analysis. Added regression tests.
|
||||
<li> 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield.
|
||||
<li> 2010/02/27: Changed the way of internal layout handling. (LTTextItem -> LTChar)
|
||||
|
|
|
@ -361,7 +361,7 @@ class PDFDocument(object):
|
|||
self._initialized = True
|
||||
return
|
||||
(docid, param) = self.encryption
|
||||
if literal_name(param['Filter']) != 'Standard':
|
||||
if literal_name(param.get('Filter')) != 'Standard':
|
||||
raise PDFEncryptionError('Unknown filter: param=%r' % param)
|
||||
V = int_value(param.get('V', 0))
|
||||
if not (V == 1 or V == 2):
|
||||
|
@ -439,6 +439,7 @@ class PDFDocument(object):
|
|||
else:
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('Cannot locate objid=%r' % objid)
|
||||
# return null for a nonexistent reference.
|
||||
return None
|
||||
if strmid:
|
||||
stream = stream_value(self.getobj(strmid))
|
||||
|
@ -588,6 +589,7 @@ class PDFParser(PSStackParser):
|
|||
return
|
||||
|
||||
KEYWORD_R = KWD('R')
|
||||
KEYWORD_NULL = KWD('null')
|
||||
KEYWORD_ENDOBJ = KWD('endobj')
|
||||
KEYWORD_STREAM = KWD('stream')
|
||||
KEYWORD_XREF = KWD('xref')
|
||||
|
@ -596,10 +598,16 @@ class PDFParser(PSStackParser):
|
|||
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
|
||||
self.add_results(*self.pop(1))
|
||||
return
|
||||
|
||||
if token is self.KEYWORD_ENDOBJ:
|
||||
self.add_results(*self.pop(4))
|
||||
return
|
||||
|
||||
if token is self.KEYWORD_NULL:
|
||||
# null object
|
||||
self.push((pos, None))
|
||||
return
|
||||
|
||||
if token is self.KEYWORD_R:
|
||||
# reference to indirect object
|
||||
try:
|
||||
|
|
|
@ -537,7 +537,8 @@ class PSStackParser(PSBaseParser):
|
|||
(pos, objs) = self.end_type('d')
|
||||
if len(objs) % 2 != 0:
|
||||
raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
|
||||
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs))
|
||||
# construct a Python dictionary.
|
||||
d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) if v is not None )
|
||||
self.push((pos, d))
|
||||
except PSTypeError:
|
||||
if STRICT: raise
|
||||
|
|
Loading…
Reference in New Issue