several bugfixes.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@179 1aa58f4a-7d42-0410-adbc-911cccaed67c
2010-02-07 03:14:00 +00:00 · 2010-02-07 03:14:00 +00:00 · 538a605ac0
parent 63033599ce
commit 538a605ac0
6 changed files with 36 additions and 5 deletions
--- a/1
+++ b/1
@ -2,6 +2,7 @@ TODO
 Makefile
 README.txt
 setup.py
 docs/index.html
 pdfminer/Makefile
 pdfminer/__init__.py
 pdfminer/arcfour.py
--- a/docs/index.html
+++ b/docs/index.html
@ -19,7 +19,7 @@ Python PDF parser and analyzer
 <div align=right class=lastmod>
 <!-- hhmts start -->
-Last Modified: Sun Jan 31 11:11:26 JST 2010
+Last Modified: Sun Feb  7 12:13:27 JST 2010
 <!-- hhmts end -->
 </div>
@ -347,6 +347,7 @@ no stream header is displayed for the ease of saving it to a file.
 <hr noshade>
 <h2>Changes</h2>
 <ul>
 <li> 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
 <li> 2010/01/31: JPEG image extraction supported. Page rotation bug fixed. 
 <li> 2010/01/04: Python 2.6 warning removal. More doctest conversion.
 <li> 2010/01/01: CMap bug fix. Thanks to Winfried Plappert.
@ -399,7 +400,7 @@ no stream header is displayed for the ease of saving it to a file.
 (This is so-called MIT/X License)
 <p>
 <small>
-Copyright (c) 2004-2009  Yusuke Shinyama &lt;yusuke at cs dot nyu dot edu&gt;
+Copyright (c) 2004-2010  Yusuke Shinyama &lt;yusuke at cs dot nyu dot edu&gt;
 <p>
 Permission is hereby granted, free of charge, to any person
 obtaining a copy of this software and associated documentation
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@ -81,7 +81,12 @@ class IdentityCMap(object):
        return self.vertical
    def decode(self, code):
-        return unpack('>%dH' % (len(code)/2), code)
+        n = len(code)/2
        if n:
            return unpack('>%dH' % n, code)
        else:
            return ()
 ##  UnicodeMap
@ -363,3 +368,15 @@ class CMapParser(PSStackParser):
        self.push((pos, token))
        return
 # test
 def main(argv):
    args = argv[1:]
    for fname in args:
        fp = file(fname, 'rb')
        cmap = FileUnicodeMap()
        CMapParser(cmap, fp).run()
        fp.close()
    return
 if __name__ == '__main__': sys.exit(main(sys.argv))
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@ -52,7 +52,7 @@ class PDFPageAggregator(PDFTextDevice):
    def render_image(self, name, stream):
        assert isinstance(self.cur_item, LTFigure)
-        item = LTImage(name, stream['Filter'],
+        item = LTImage(name, stream.get('Filter'),
                       (stream['Width'], stream['Height']),
                       (self.cur_item.x0, self.cur_item.y0,
                        self.cur_item.x1, self.cur_item.y1),
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@ -47,6 +47,9 @@ class PDFBaseXRef(object):
    def get_trailer(self):
        raise NotImplementedError
    def get_objids(self):
        return []
    def get_pos(self, objid):
        raise KeyError(objid)
@ -132,6 +135,9 @@ class PDFXRef(PDFBaseXRef):
    def get_trailer(self):
        return self.trailer
    def get_objids(self):
        return self.offsets.iterkeys()
    def get_pos(self, objid):
        try:
            (genno, pos) = self.offsets[objid]
@ -180,6 +186,12 @@ class PDFXRefStream(PDFBaseXRef):
    def get_trailer(self):
        return self.trailer
    def get_objids(self):
        for objid_range in self.objid_ranges:
            for x in xrange(objid_range.get_start_id(), objid <= objid_range.get_end_id()+1):
                yield x
        return
    def get_pos(self, objid):
        offset = 0
        found = False
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@ -86,7 +86,7 @@ def dumptrailers(out, doc):
 def dumpallobjs(out, doc, codec=None):
    out.write('<pdf>')
    for xref in doc.xrefs:
-        for objid in xref.objids():
+        for objid in xref.get_objids():
            try:
                obj = doc.getobj(objid)
                if obj is None: continue