diff --git a/docs/index.html b/docs/index.html
index ca0ea7e..215b5d2 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Last Modified: Mon Mar 22 06:16:28 UTC 2010
+Last Modified: Tue Mar 23 10:29:24 UTC 2010
@@ -335,17 +335,18 @@ no stream header is displayed for the ease of saving it to a file.
- PEP-8 and
PEP-257 conformance.
+
- Better documentation.
- Better text extraction / layout analysis.
-
- Better API Documentation.
+
- Robust error handling.
- Crypt stream filter support. (More sample documents are needed!)
- CCITTFax stream filter support.
-
- Robust error handling.
Changes
+- 2010/03/xx: Bugfixes. Thanks to Brian Berry and Lubos Pintes.
- 2010/03/22: Improved layout analysis. Added regression tests.
- 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield.
- 2010/02/27: Changed the way of internal layout handling. (LTTextItem -> LTChar)
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index 2581888..44e515c 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -361,7 +361,7 @@ class PDFDocument(object):
self._initialized = True
return
(docid, param) = self.encryption
- if literal_name(param['Filter']) != 'Standard':
+ if literal_name(param.get('Filter')) != 'Standard':
raise PDFEncryptionError('Unknown filter: param=%r' % param)
V = int_value(param.get('V', 0))
if not (V == 1 or V == 2):
@@ -439,6 +439,7 @@ class PDFDocument(object):
else:
if STRICT:
raise PDFSyntaxError('Cannot locate objid=%r' % objid)
+ # return null for a nonexistent reference.
return None
if strmid:
stream = stream_value(self.getobj(strmid))
@@ -588,6 +589,7 @@ class PDFParser(PSStackParser):
return
KEYWORD_R = KWD('R')
+ KEYWORD_NULL = KWD('null')
KEYWORD_ENDOBJ = KWD('endobj')
KEYWORD_STREAM = KWD('stream')
KEYWORD_XREF = KWD('xref')
@@ -596,10 +598,16 @@ class PDFParser(PSStackParser):
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
self.add_results(*self.pop(1))
return
+
if token is self.KEYWORD_ENDOBJ:
self.add_results(*self.pop(4))
return
+ if token is self.KEYWORD_NULL:
+ # null object
+ self.push((pos, None))
+ return
+
if token is self.KEYWORD_R:
# reference to indirect object
try:
diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
index 64cb169..676793f 100644
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@@ -537,7 +537,8 @@ class PSStackParser(PSBaseParser):
(pos, objs) = self.end_type('d')
if len(objs) % 2 != 0:
raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
- d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs))
+ # construct a Python dictionary.
+ d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) if v is not None )
self.push((pos, d))
except PSTypeError:
if STRICT: raise