several bugfixes.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@179 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
63033599ce
commit
538a605ac0
1
MANIFEST
1
MANIFEST
|
@ -2,6 +2,7 @@ TODO
|
||||||
Makefile
|
Makefile
|
||||||
README.txt
|
README.txt
|
||||||
setup.py
|
setup.py
|
||||||
|
docs/index.html
|
||||||
pdfminer/Makefile
|
pdfminer/Makefile
|
||||||
pdfminer/__init__.py
|
pdfminer/__init__.py
|
||||||
pdfminer/arcfour.py
|
pdfminer/arcfour.py
|
||||||
|
|
|
@ -19,7 +19,7 @@ Python PDF parser and analyzer
|
||||||
|
|
||||||
<div align=right class=lastmod>
|
<div align=right class=lastmod>
|
||||||
<!-- hhmts start -->
|
<!-- hhmts start -->
|
||||||
Last Modified: Sun Jan 31 11:11:26 JST 2010
|
Last Modified: Sun Feb 7 12:13:27 JST 2010
|
||||||
<!-- hhmts end -->
|
<!-- hhmts end -->
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -347,6 +347,7 @@ no stream header is displayed for the ease of saving it to a file.
|
||||||
<hr noshade>
|
<hr noshade>
|
||||||
<h2>Changes</h2>
|
<h2>Changes</h2>
|
||||||
<ul>
|
<ul>
|
||||||
|
<li> 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
|
||||||
<li> 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
|
<li> 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
|
||||||
<li> 2010/01/04: Python 2.6 warning removal. More doctest conversion.
|
<li> 2010/01/04: Python 2.6 warning removal. More doctest conversion.
|
||||||
<li> 2010/01/01: CMap bug fix. Thanks to Winfried Plappert.
|
<li> 2010/01/01: CMap bug fix. Thanks to Winfried Plappert.
|
||||||
|
@ -399,7 +400,7 @@ no stream header is displayed for the ease of saving it to a file.
|
||||||
(This is so-called MIT/X License)
|
(This is so-called MIT/X License)
|
||||||
<p>
|
<p>
|
||||||
<small>
|
<small>
|
||||||
Copyright (c) 2004-2009 Yusuke Shinyama <yusuke at cs dot nyu dot edu>
|
Copyright (c) 2004-2010 Yusuke Shinyama <yusuke at cs dot nyu dot edu>
|
||||||
<p>
|
<p>
|
||||||
Permission is hereby granted, free of charge, to any person
|
Permission is hereby granted, free of charge, to any person
|
||||||
obtaining a copy of this software and associated documentation
|
obtaining a copy of this software and associated documentation
|
||||||
|
|
|
@ -81,7 +81,12 @@ class IdentityCMap(object):
|
||||||
return self.vertical
|
return self.vertical
|
||||||
|
|
||||||
def decode(self, code):
|
def decode(self, code):
|
||||||
return unpack('>%dH' % (len(code)/2), code)
|
n = len(code)/2
|
||||||
|
if n:
|
||||||
|
return unpack('>%dH' % n, code)
|
||||||
|
else:
|
||||||
|
return ()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## UnicodeMap
|
## UnicodeMap
|
||||||
|
@ -363,3 +368,15 @@ class CMapParser(PSStackParser):
|
||||||
|
|
||||||
self.push((pos, token))
|
self.push((pos, token))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# test
|
||||||
|
def main(argv):
|
||||||
|
args = argv[1:]
|
||||||
|
for fname in args:
|
||||||
|
fp = file(fname, 'rb')
|
||||||
|
cmap = FileUnicodeMap()
|
||||||
|
CMapParser(cmap, fp).run()
|
||||||
|
fp.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
if __name__ == '__main__': sys.exit(main(sys.argv))
|
||||||
|
|
|
@ -52,7 +52,7 @@ class PDFPageAggregator(PDFTextDevice):
|
||||||
|
|
||||||
def render_image(self, name, stream):
|
def render_image(self, name, stream):
|
||||||
assert isinstance(self.cur_item, LTFigure)
|
assert isinstance(self.cur_item, LTFigure)
|
||||||
item = LTImage(name, stream['Filter'],
|
item = LTImage(name, stream.get('Filter'),
|
||||||
(stream['Width'], stream['Height']),
|
(stream['Width'], stream['Height']),
|
||||||
(self.cur_item.x0, self.cur_item.y0,
|
(self.cur_item.x0, self.cur_item.y0,
|
||||||
self.cur_item.x1, self.cur_item.y1),
|
self.cur_item.x1, self.cur_item.y1),
|
||||||
|
|
|
@ -47,6 +47,9 @@ class PDFBaseXRef(object):
|
||||||
def get_trailer(self):
|
def get_trailer(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_objids(self):
|
||||||
|
return []
|
||||||
|
|
||||||
def get_pos(self, objid):
|
def get_pos(self, objid):
|
||||||
raise KeyError(objid)
|
raise KeyError(objid)
|
||||||
|
|
||||||
|
@ -132,6 +135,9 @@ class PDFXRef(PDFBaseXRef):
|
||||||
def get_trailer(self):
|
def get_trailer(self):
|
||||||
return self.trailer
|
return self.trailer
|
||||||
|
|
||||||
|
def get_objids(self):
|
||||||
|
return self.offsets.iterkeys()
|
||||||
|
|
||||||
def get_pos(self, objid):
|
def get_pos(self, objid):
|
||||||
try:
|
try:
|
||||||
(genno, pos) = self.offsets[objid]
|
(genno, pos) = self.offsets[objid]
|
||||||
|
@ -180,6 +186,12 @@ class PDFXRefStream(PDFBaseXRef):
|
||||||
def get_trailer(self):
|
def get_trailer(self):
|
||||||
return self.trailer
|
return self.trailer
|
||||||
|
|
||||||
|
def get_objids(self):
|
||||||
|
for objid_range in self.objid_ranges:
|
||||||
|
for x in xrange(objid_range.get_start_id(), objid <= objid_range.get_end_id()+1):
|
||||||
|
yield x
|
||||||
|
return
|
||||||
|
|
||||||
def get_pos(self, objid):
|
def get_pos(self, objid):
|
||||||
offset = 0
|
offset = 0
|
||||||
found = False
|
found = False
|
||||||
|
|
|
@ -86,7 +86,7 @@ def dumptrailers(out, doc):
|
||||||
def dumpallobjs(out, doc, codec=None):
|
def dumpallobjs(out, doc, codec=None):
|
||||||
out.write('<pdf>')
|
out.write('<pdf>')
|
||||||
for xref in doc.xrefs:
|
for xref in doc.xrefs:
|
||||||
for objid in xref.objids():
|
for objid in xref.get_objids():
|
||||||
try:
|
try:
|
||||||
obj = doc.getobj(objid)
|
obj = doc.getobj(objid)
|
||||||
if obj is None: continue
|
if obj is None: continue
|
||||||
|
|
Loading…
Reference in New Issue