diff --git a/MANIFEST b/MANIFEST index b299fb5..7c5a90a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2,6 +2,7 @@ TODO Makefile README.txt setup.py +docs/index.html pdfminer/Makefile pdfminer/__init__.py pdfminer/arcfour.py diff --git a/docs/index.html b/docs/index.html index 6d3f663..b60df7f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Copyright (c) 2004-2009 Yusuke Shinyama <yusuke at cs dot nyu dot edu> +Copyright (c) 2004-2010 Yusuke Shinyama <yusuke at cs dot nyu dot edu>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
index 3c136ae..0ef4750 100644
--- a/pdfminer/cmapdb.py
+++ b/pdfminer/cmapdb.py
@@ -81,7 +81,12 @@ class IdentityCMap(object):
return self.vertical
def decode(self, code):
- return unpack('>%dH' % (len(code)/2), code)
+ n = len(code)/2
+ if n:
+ return unpack('>%dH' % n, code)
+ else:
+ return ()
+
## UnicodeMap
@@ -363,3 +368,15 @@ class CMapParser(PSStackParser):
self.push((pos, token))
return
+
+# test
+def main(argv):
+ args = argv[1:]
+ for fname in args:
+ fp = file(fname, 'rb')
+ cmap = FileUnicodeMap()
+ CMapParser(cmap, fp).run()
+ fp.close()
+ return
+
+if __name__ == '__main__': sys.exit(main(sys.argv))
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index 0460b43..ad721fb 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -52,7 +52,7 @@ class PDFPageAggregator(PDFTextDevice):
def render_image(self, name, stream):
assert isinstance(self.cur_item, LTFigure)
- item = LTImage(name, stream['Filter'],
+ item = LTImage(name, stream.get('Filter'),
(stream['Width'], stream['Height']),
(self.cur_item.x0, self.cur_item.y0,
self.cur_item.x1, self.cur_item.y1),
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index 7012df7..c8bb0f3 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -47,6 +47,9 @@ class PDFBaseXRef(object):
def get_trailer(self):
raise NotImplementedError
+ def get_objids(self):
+ return []
+
def get_pos(self, objid):
raise KeyError(objid)
@@ -132,6 +135,9 @@ class PDFXRef(PDFBaseXRef):
def get_trailer(self):
return self.trailer
+ def get_objids(self):
+ return self.offsets.iterkeys()
+
def get_pos(self, objid):
try:
(genno, pos) = self.offsets[objid]
@@ -180,6 +186,12 @@ class PDFXRefStream(PDFBaseXRef):
def get_trailer(self):
return self.trailer
+ def get_objids(self):
+ for objid_range in self.objid_ranges:
+ for x in xrange(objid_range.get_start_id(), objid <= objid_range.get_end_id()+1):
+ yield x
+ return
+
def get_pos(self, objid):
offset = 0
found = False
diff --git a/tools/dumppdf.py b/tools/dumppdf.py
index 702687c..785f070 100755
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@@ -86,7 +86,7 @@ def dumptrailers(out, doc):
def dumpallobjs(out, doc, codec=None):
out.write('