several bugfixes.

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@179 1aa58f4a-7d42-0410-adbc-911cccaed67c
pull/1/head
yusuke.shinyama.dummy 2010-02-07 03:14:00 +00:00
parent 63033599ce
commit 538a605ac0
6 changed files with 36 additions and 5 deletions

View File

@ -2,6 +2,7 @@ TODO
Makefile
README.txt
setup.py
docs/index.html
pdfminer/Makefile
pdfminer/__init__.py
pdfminer/arcfour.py

View File

@ -19,7 +19,7 @@ Python PDF parser and analyzer
<div align=right class=lastmod>
<!-- hhmts start -->
Last Modified: Sun Jan 31 11:11:26 JST 2010
Last Modified: Sun Feb 7 12:13:27 JST 2010
<!-- hhmts end -->
</div>
@ -347,6 +347,7 @@ no stream header is displayed for the ease of saving it to a file.
<hr noshade>
<h2>Changes</h2>
<ul>
<li> 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
<li> 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
<li> 2010/01/04: Python 2.6 warning removal. More doctest conversion.
<li> 2010/01/01: CMap bug fix. Thanks to Winfried Plappert.
@ -399,7 +400,7 @@ no stream header is displayed for the ease of saving it to a file.
(This is so-called MIT/X License)
<p>
<small>
Copyright (c) 2004-2009 Yusuke Shinyama &lt;yusuke at cs dot nyu dot edu&gt;
Copyright (c) 2004-2010 Yusuke Shinyama &lt;yusuke at cs dot nyu dot edu&gt;
<p>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation

View File

@ -81,7 +81,12 @@ class IdentityCMap(object):
return self.vertical
def decode(self, code):
return unpack('>%dH' % (len(code)/2), code)
n = len(code)/2
if n:
return unpack('>%dH' % n, code)
else:
return ()
## UnicodeMap
@ -363,3 +368,15 @@ class CMapParser(PSStackParser):
self.push((pos, token))
return
# test
def main(argv):
args = argv[1:]
for fname in args:
fp = file(fname, 'rb')
cmap = FileUnicodeMap()
CMapParser(cmap, fp).run()
fp.close()
return
if __name__ == '__main__': sys.exit(main(sys.argv))

View File

@ -52,7 +52,7 @@ class PDFPageAggregator(PDFTextDevice):
def render_image(self, name, stream):
assert isinstance(self.cur_item, LTFigure)
item = LTImage(name, stream['Filter'],
item = LTImage(name, stream.get('Filter'),
(stream['Width'], stream['Height']),
(self.cur_item.x0, self.cur_item.y0,
self.cur_item.x1, self.cur_item.y1),

View File

@ -47,6 +47,9 @@ class PDFBaseXRef(object):
def get_trailer(self):
raise NotImplementedError
def get_objids(self):
return []
def get_pos(self, objid):
raise KeyError(objid)
@ -132,6 +135,9 @@ class PDFXRef(PDFBaseXRef):
def get_trailer(self):
return self.trailer
def get_objids(self):
return self.offsets.iterkeys()
def get_pos(self, objid):
try:
(genno, pos) = self.offsets[objid]
@ -180,6 +186,12 @@ class PDFXRefStream(PDFBaseXRef):
def get_trailer(self):
return self.trailer
def get_objids(self):
for objid_range in self.objid_ranges:
for x in xrange(objid_range.get_start_id(), objid <= objid_range.get_end_id()+1):
yield x
return
def get_pos(self, objid):
offset = 0
found = False

View File

@ -86,7 +86,7 @@ def dumptrailers(out, doc):
def dumpallobjs(out, doc, codec=None):
out.write('<pdf>')
for xref in doc.xrefs:
for objid in xref.objids():
for objid in xref.get_objids():
try:
obj = doc.getobj(objid)
if obj is None: continue