diff --git a/README.html b/README.html
index 1e85876..0e6418b 100644
--- a/README.html
+++ b/README.html
@@ -164,19 +164,19 @@ $ <strong>python -m pdflib.pdf2txt -P mypassword secret.pdf</strong>
 Options:
 <dl>
 <dt> <code>-o <em>filename</em></code> 
-<dd> Speficies the output file name.
+<dd> Specifies the output file name.
 By default, it prints the extracted contents to stdout.
 <p>
 <dt> <code>-p <em>pageno[,pageno,...]</em></code> 
-<dd> Speficies the comma-separated list of the page numbers to be extracted. 
+<dd> Specifies the comma-separated list of the page numbers to be extracted. 
 Page numbers are starting from one.
 By default, it extracts texts from all the pages.
 <p>
 <dt> <code>-c <em>codec</em></code> 
-<dd> Speficies the output codec for non-ASCII texts.
+<dd> Specifies the output codec for non-ASCII texts.
 <p>
 <dt> <code>-t <em>type</em></code> 
-<dd> Speficies the output format. The following formats are currently supported.
+<dd> Specifies the output format. The following formats are currently supported.
 <ul>
 <li> <code>html</code> : HTML format. (Default)
 <li> <code>sgml</code> : SGML format.
@@ -221,14 +221,14 @@ Options:
 By default, it only prints the document trailer (like a header).
 <p>
 <dt> <code>-p <em>pageno</em></code> 
-<dd> Speficies the page number to be extracted. 
+<dd> Specifies the page number to be extracted.
 Multiple <code>-p</code> options are allowed.
 Note that page numbers start from one.
 <p>
 <dt> <code>-r</code> (raw)
 <dt> <code>-b</code> (binary)
 <dt> <code>-t</code> (text)
-<dd> Speficies the output format of stream contents.
+<dd> Specifies the output format of stream contents.
 Because the contents of stream objects can be very large,
 they are omitted when none of the options above is specified.
 <p>
diff --git a/pdflib/Makefile b/pdflib/Makefile
index efee7fc..19814b4 100644
--- a/pdflib/Makefile
+++ b/pdflib/Makefile
@@ -1,6 +1,32 @@
 # Makefile for pdfminer
 
-all:
+DESTDIR=/usr/local/src/pdflib
+
+PDFLIB = ${DESTDIR}/__init__.py \
+	${DESTDIR}/arcfour.py \
+	${DESTDIR}/ascii85.py \
+	${DESTDIR}/cmap.py \
+	${DESTDIR}/fontmetrics.py \
+	${DESTDIR}/glyphlist.py \
+	${DESTDIR}/latin_enc.py \
+	${DESTDIR}/lzw.py \
+	${DESTDIR}/pdf2txt.py \
+	${DESTDIR}/pdfcolor.py \
+	${DESTDIR}/pdfdevice.py \
+	${DESTDIR}/pdffont.py \
+	${DESTDIR}/pdfinterp.py \
+	${DESTDIR}/pdfparser.py \
+	${DESTDIR}/pdftypes.py \
+	${DESTDIR}/psparser.py \
+	${DESTDIR}/pycdb.py \
+	${DESTDIR}/rijndael.py \
+	${DESTDIR}/utils.py \
+
+${DESTDIR}/%: %
+	cp $? $@
+	chmod 755 $@
+
+all: ${PDFLIB}
 
 clean:
 	-rm *.pyc *.pyo
diff --git a/pdflib/pdfparser.py b/pdflib/pdfparser.py
index a4156d2..df3554b 100755
--- a/pdflib/pdfparser.py
+++ b/pdflib/pdfparser.py
@@ -59,9 +59,13 @@ class PDFBaseXRef(object):
     return
 
   def objids(self):
-    for objid_range in self.objid_ranges:
-      for objid in xrange(objid_range.get_start_id(), objid_range.get_end_id() + 1):
-        yield objid
+    if self.objid_ranges:
+        for objid_range in self.objid_ranges:
+          for objid in xrange(objid_range.get_start_id(), objid_range.get_end_id() + 1):
+            yield objid
+    else:
+        for objid in self.offsets:
+            yield objid
     return
 
 ##  PDFXRef
@@ -70,6 +74,7 @@ class PDFXRef(PDFBaseXRef):
   def __init__(self):
     PDFBaseXRef.__init__(self)
     self.offsets = None
+    self.trailer = {}
     return
 
   def __repr__(self):
@@ -81,6 +86,8 @@ class PDFXRef(PDFBaseXRef):
     while 1:
       try:
         (pos, line) = parser.nextline()
+        if not line.strip():
+            continue
       except PSEOF:
         raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
       if not line:
@@ -112,7 +119,7 @@ class PDFXRef(PDFBaseXRef):
       print >>stderr, 'xref objects:', self.offsets
     self.load_trailer(parser)
     return
-  
+
   KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
   def load_trailer(self, parser):
     try:
@@ -124,7 +131,7 @@ class PDFXRef(PDFBaseXRef):
       if not x:
         raise PDFNoValidXRef('Unexpected EOF - file corrupted')
       (_,dic) = x[0]
-    self.trailer = dict_value(dic)
+    self.trailer.update( dict_value(dic))
     return
 
   def getpos(self, objid):
@@ -199,7 +206,7 @@ class PDFXRefStream(PDFBaseXRef):
 ##  PDFPage
 ##
 class PDFPage(object):
-  
+
   def __init__(self, doc, pageid, attrs):
     self.doc = doc
     self.pageid = pageid
@@ -237,7 +244,7 @@ class PDFPage(object):
 class PDFDocument(object):
 
   debug = 0
-  
+
   def __init__(self):
     self.xrefs = []
     self.objs = {}
@@ -257,7 +264,7 @@ class PDFDocument(object):
     self.parser = parser
     # The document is set to be temporarily ready during collecting
     # all the basic information about the document, e.g.
-    # the header, the encryption information, and the access rights 
+    # the header, the encryption information, and the access rights
     # for the document.
     self.ready = True
     # Retrieve the information of each header that was appended
@@ -292,7 +299,7 @@ class PDFDocument(object):
       if STRICT:
         raise PDFSyntaxError('Catalog not found!')
     return
-  
+
   # initialize(password='')
   #   Perform the initialization with a given password.
   #   This step is mandatory even if there's no password associated
@@ -316,7 +323,7 @@ class PDFDocument(object):
       raise PDFEncryptionError('Unknown revision: %r' % R)
     U = str_value(param['U'])
     P = int_value(param['P'])
-    self.is_printable = bool(P & 4)        
+    self.is_printable = bool(P & 4)
     self.is_modifiable = bool(P & 8)
     self.is_extractable = bool(P & 16)
     # Algorithm 3.2
@@ -418,8 +425,18 @@ class PDFDocument(object):
         self.parser.seek(index)
         (_,objid1) = self.parser.nexttoken() # objid
         (_,genno) = self.parser.nexttoken() # genno
-        #assert objid1 == objid, (objid, objid1)
         (_,kwd) = self.parser.nexttoken()
+# #### hack around malformed pdf files
+#        assert objid1 == objid, (objid, objid1)
+        if objid1 != objid:
+            x = []
+            while kwd is not self.KEYWORD_OBJ:
+                (_,kwd) = self.parser.nexttoken()
+                x.append(kwd)
+            if x:
+                objid1 = x[-2]
+                genno = x[-1]
+# #### end hack around malformed pdf files
         if kwd is not self.KEYWORD_OBJ:
           raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
         (_,obj) = self.parser.nextobject()
@@ -431,7 +448,7 @@ class PDFDocument(object):
     if self.decipher:
       obj = decipher_all(self.decipher, objid, genno, obj)
     return obj
-  
+
   INHERITABLE_ATTRS = set(['Resources', 'MediaBox', 'CropBox', 'Rotate'])
   def get_pages(self):
     if not self.ready:
@@ -526,7 +543,7 @@ class PDFParser(PSStackParser):
     if token is self.KEYWORD_ENDOBJ:
       self.add_results(*self.pop(4))
       return
-    
+
     if token is self.KEYWORD_R:
       # reference to indirect object
       try:
@@ -537,7 +554,7 @@ class PDFParser(PSStackParser):
       except PSSyntaxError:
         pass
       return
-      
+
     if token is self.KEYWORD_STREAM:
       # stream object
       ((_,dic),) = self.pop(1)
@@ -580,7 +597,7 @@ class PDFParser(PSStackParser):
       obj = PDFStream(dic, data, self.doc.decipher)
       self.push((pos, obj))
       return
-    
+
     # others
     self.push((pos, token))
     return
@@ -611,17 +628,15 @@ class PDFParser(PSStackParser):
       raise PDFNoValidXRef('Unexpected EOF')
     if 2 <= self.debug:
       print >>stderr, 'read_xref_from: start=%d, token=%r' % (start, token)
-    if isinstance(token, int):
+    try:
       # XRefStream: PDF-1.5
       self.seek(pos)
       self.reset()
       xref = PDFXRefStream()
       xref.load(self, debug=self.debug)
-    else:
-      if token is not self.KEYWORD_XREF:
-        raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % 
-                             (pos, token))
-      self.nextline()
+    except:
+      if token is self.KEYWORD_XREF:
+          self.nextline()
       xref = PDFXRef()
       xref.load(self, debug=self.debug)
     xrefs.append(xref)
@@ -636,7 +651,7 @@ class PDFParser(PSStackParser):
       pos = int_value(trailer['Prev'])
       self.read_xref_from(pos, xrefs)
     return
-    
+
   # read xref tables and trailers
   def read_xref(self):
     xrefs = []
@@ -656,17 +671,17 @@ class PDFParser(PSStackParser):
           (pos, line) = self.nextline()
         except PSEOF:
           break
-        if line.startswith('trailer'): break
+        if line.startswith('trailer'):
+          xref.offsets = offsets
+          self.seek(pos)
+          xref.load_trailer(self)
+          if 1 <= self.debug:
+            print >>stderr, 'trailer: %r' % xref.trailer
+          continue
         m = pat.match(line)
         if not m: continue
         (objid, genno) = m.groups()
         offsets[int(objid)] = (0, pos)
-      if not offsets: raise
-      xref.offsets = offsets
-      self.seek(pos)
-      xref.load_trailer(self)
-      if 1 <= self.debug:
-        print >>stderr, 'trailer: %r' % xref.trailer
       xrefs.append(xref)
     return xrefs
 
@@ -674,7 +689,7 @@ class PDFParser(PSStackParser):
 ##  PDFObjStrmParser
 ##
 class PDFObjStrmParser(PDFParser):
-  
+
   def __init__(self, doc, data):
     try:
       from cStringIO import StringIO
@@ -682,7 +697,7 @@ class PDFObjStrmParser(PDFParser):
       from StringIO import StringIO
     PDFParser.__init__(self, doc, StringIO(data))
     return
-  
+
   def flush(self):
     self.add_results(*self.popall())
     return
diff --git a/pdflib/pdftypes.py b/pdflib/pdftypes.py
index 571caf4..2b85f7a 100644
--- a/pdflib/pdftypes.py
+++ b/pdflib/pdftypes.py
@@ -159,6 +159,20 @@ class PDFStream(PDFObject):
   def __repr__(self):
     return '<PDFStream(%r): raw=%d, %r>' % (self.objid, len(self.rawdata), self.dic)
 
+  def decomp(self,data):
+    import zlib
+    buf = data
+    # some FlateDecode streams have garbage (newlines, etc) appended to the
+    # end.  remove chars from the end to try and decompress the buffer
+    while len(buf) > 10:
+      try:
+          # will get errors if the document is encrypted.
+          dco = zlib.decompressobj()
+          return dco.decompress(buf)
+      except:
+          buf = buf[:-1]
+    raise Exception, "zlib.error while decompressing data"
+
   def decode(self):
     assert self.data == None and self.rawdata != None
     data = self.rawdata
@@ -175,7 +189,7 @@ class PDFStream(PDFObject):
     for f in filters:
       if f in LITERALS_FLATE_DECODE:
         # will get errors if the document is encrypted.
-        data = zlib.decompress(data)
+        data = self.decomp(data)
       elif f in LITERALS_LZW_DECODE:
         try:
           from cStringIO import StringIO
diff --git a/tools/dumppdf.py b/tools/dumppdf.py
index 4bad243..8eae1fe 100755
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@@ -29,7 +29,7 @@ def dumpxml(out, obj, codec=None):
       out.write('</value>\n')
     out.write('</dict>')
     return
-  
+
   if isinstance(obj, list):
     out.write('<list size="%d">\n' % len(obj))
     for v in obj:
@@ -37,11 +37,11 @@ def dumpxml(out, obj, codec=None):
       out.write('\n')
     out.write('</list>')
     return
-  
+
   if isinstance(obj, str):
     out.write('<string size="%d">%s</string>' % (len(obj), esc(obj)))
     return
-  
+
   if isinstance(obj, PDFStream):
     out.write('<stream>\n<props>\n')
     dumpxml(out, obj.dic)
@@ -51,11 +51,11 @@ def dumpxml(out, obj, codec=None):
       out.write('<data size="%d">%s</data>\n' % (len(data), esc(data)))
     out.write('</stream>')
     return
-  
+
   if isinstance(obj, PDFObjRef):
     out.write('<ref id="%d"/>' % obj.objid)
     return
-  
+
   if isinstance(obj, PSKeyword):
     out.write('<keyword>%s</keyword>' % obj.name)
     return
@@ -63,7 +63,7 @@ def dumpxml(out, obj, codec=None):
   if isinstance(obj, PSLiteral):
     out.write('<literal>%s</literal>' % obj.name)
     return
-  
+
   if isinstance(obj, int) or isinstance(obj, float):
     out.write('<number>%s</number>' % obj)
     return