several bugfixes.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@179 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
63033599ce
commit
538a605ac0
1
MANIFEST
1
MANIFEST
|
@ -2,6 +2,7 @@ TODO
|
|||
Makefile
|
||||
README.txt
|
||||
setup.py
|
||||
docs/index.html
|
||||
pdfminer/Makefile
|
||||
pdfminer/__init__.py
|
||||
pdfminer/arcfour.py
|
||||
|
|
|
@ -19,7 +19,7 @@ Python PDF parser and analyzer
|
|||
|
||||
<div align=right class=lastmod>
|
||||
<!-- hhmts start -->
|
||||
Last Modified: Sun Jan 31 11:11:26 JST 2010
|
||||
Last Modified: Sun Feb 7 12:13:27 JST 2010
|
||||
<!-- hhmts end -->
|
||||
</div>
|
||||
|
||||
|
@ -347,6 +347,7 @@ no stream header is displayed for the ease of saving it to a file.
|
|||
<hr noshade>
|
||||
<h2>Changes</h2>
|
||||
<ul>
|
||||
<li> 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
|
||||
<li> 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
|
||||
<li> 2010/01/04: Python 2.6 warning removal. More doctest conversion.
|
||||
<li> 2010/01/01: CMap bug fix. Thanks to Winfried Plappert.
|
||||
|
@ -399,7 +400,7 @@ no stream header is displayed for the ease of saving it to a file.
|
|||
(This is so-called MIT/X License)
|
||||
<p>
|
||||
<small>
|
||||
Copyright (c) 2004-2009 Yusuke Shinyama <yusuke at cs dot nyu dot edu>
|
||||
Copyright (c) 2004-2010 Yusuke Shinyama <yusuke at cs dot nyu dot edu>
|
||||
<p>
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
|
|
|
@ -81,7 +81,12 @@ class IdentityCMap(object):
|
|||
return self.vertical
|
||||
|
||||
def decode(self, code):
|
||||
return unpack('>%dH' % (len(code)/2), code)
|
||||
n = len(code)/2
|
||||
if n:
|
||||
return unpack('>%dH' % n, code)
|
||||
else:
|
||||
return ()
|
||||
|
||||
|
||||
|
||||
## UnicodeMap
|
||||
|
@ -363,3 +368,15 @@ class CMapParser(PSStackParser):
|
|||
|
||||
self.push((pos, token))
|
||||
return
|
||||
|
||||
# test
|
||||
def main(argv):
|
||||
args = argv[1:]
|
||||
for fname in args:
|
||||
fp = file(fname, 'rb')
|
||||
cmap = FileUnicodeMap()
|
||||
CMapParser(cmap, fp).run()
|
||||
fp.close()
|
||||
return
|
||||
|
||||
if __name__ == '__main__': sys.exit(main(sys.argv))
|
||||
|
|
|
@ -52,7 +52,7 @@ class PDFPageAggregator(PDFTextDevice):
|
|||
|
||||
def render_image(self, name, stream):
|
||||
assert isinstance(self.cur_item, LTFigure)
|
||||
item = LTImage(name, stream['Filter'],
|
||||
item = LTImage(name, stream.get('Filter'),
|
||||
(stream['Width'], stream['Height']),
|
||||
(self.cur_item.x0, self.cur_item.y0,
|
||||
self.cur_item.x1, self.cur_item.y1),
|
||||
|
|
|
@ -47,6 +47,9 @@ class PDFBaseXRef(object):
|
|||
def get_trailer(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_objids(self):
|
||||
return []
|
||||
|
||||
def get_pos(self, objid):
|
||||
raise KeyError(objid)
|
||||
|
||||
|
@ -132,6 +135,9 @@ class PDFXRef(PDFBaseXRef):
|
|||
def get_trailer(self):
|
||||
return self.trailer
|
||||
|
||||
def get_objids(self):
|
||||
return self.offsets.iterkeys()
|
||||
|
||||
def get_pos(self, objid):
|
||||
try:
|
||||
(genno, pos) = self.offsets[objid]
|
||||
|
@ -180,6 +186,12 @@ class PDFXRefStream(PDFBaseXRef):
|
|||
def get_trailer(self):
|
||||
return self.trailer
|
||||
|
||||
def get_objids(self):
|
||||
for objid_range in self.objid_ranges:
|
||||
for x in xrange(objid_range.get_start_id(), objid <= objid_range.get_end_id()+1):
|
||||
yield x
|
||||
return
|
||||
|
||||
def get_pos(self, objid):
|
||||
offset = 0
|
||||
found = False
|
||||
|
|
|
@ -86,7 +86,7 @@ def dumptrailers(out, doc):
|
|||
def dumpallobjs(out, doc, codec=None):
|
||||
out.write('<pdf>')
|
||||
for xref in doc.xrefs:
|
||||
for objid in xref.objids():
|
||||
for objid in xref.get_objids():
|
||||
try:
|
||||
obj = doc.getobj(objid)
|
||||
if obj is None: continue
|
||||
|
|
Loading…
Reference in New Issue