diff --git a/docs/index.html b/docs/index.html
index b60df7f..b119389 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -19,7 +19,7 @@ Python PDF parser and analyzer
-Last Modified: Sun Feb 7 12:13:27 JST 2010
+Last Modified: Mon Feb 15 14:41:49 UTC 2010
@@ -27,7 +27,7 @@ Last Modified: Sun Feb 7 12:13:27 JST 2010
What's It?
Download
Install
- (for East Asian languages)
+ (for CJK languages)
How to Use
(pdf2txt.py, dumppdf.py)
TODOs
@@ -54,7 +54,7 @@ PDF parser that can be used for other purposes instead of text analysis.
- Written entirely in Python. (for version 2.4 or newer)
- PDF-1.7 specification support. (well, almost)
-
- East Asian languages and vertical writing scripts support.
+
- CJK languages and vertical writing scripts support.
- Various font types (Type1, TrueType, Type3, and CID) support.
- Basic encryption (RC4) support.
- PDF to HTML conversion (with a sample converter web app).
@@ -94,6 +94,7 @@ http://pdf2html.tabesugi.net:8080/
- Install Python 2.4 or newer.
+(Python 3 is not supported.)
- Download the PDFMiner source.
- Unpack it.
- Run
setup.py
to install:
@@ -125,8 +126,8 @@ W o r l d
-
For East Asian languages
-In order to handle East Asian languages (Chinese or Japanese, etc.),
+For CJK languages
+In order to handle CJK languages,
an additional data called CMap
is required.
CMap files are not installed by default.
@@ -347,6 +348,8 @@ no stream header is displayed for the ease of saving it to a file.
Changes
+- 2010/02/15: Bugfixes. Thanks to Sean.
+
- 2010/02/13: Bugfix and enhancement. Thanks to André Auzi.
- 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe.
- 2010/01/31: JPEG image extraction supported. Page rotation bug fixed.
- 2010/01/04: Python 2.6 warning removal. More doctest conversion.
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index 41e08b6..973d3c6 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -53,7 +53,7 @@ class PDFPageAggregator(PDFTextDevice):
def render_image(self, name, stream):
assert isinstance(self.cur_item, LTFigure)
ismask = stream.get_any(('IM', 'ImageMask'))
- bits = stream.get_any(('BPC', 'BitsPerCompoment'), 1)
+ bits = stream.get_any(('BPC', 'BitsPerComponent'), 1)
csp = stream.get_any(('CS', 'ColorSpace'))
if not isinstance(csp, list):
csp = [csp]
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index 91a4a71..c889b16 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -294,7 +294,7 @@ class LTImage(LayoutItem):
def __repr__(self):
(w,h) = self.srcsize
- return '' % (self.id, self.type, w, h)
+ return '' % (self.name, self.type, w, h)
def get_weight(self):
return 0
diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py
index c8bb0f3..2581888 100644
--- a/pdfminer/pdfparser.py
+++ b/pdfminer/pdfparser.py
@@ -188,7 +188,7 @@ class PDFXRefStream(PDFBaseXRef):
def get_objids(self):
for objid_range in self.objid_ranges:
- for x in xrange(objid_range.get_start_id(), objid <= objid_range.get_end_id()+1):
+ for x in xrange(objid_range.get_start_id(), objid_range.get_end_id()+1):
yield x
return
diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
index a4f7da1..74721a4 100644
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@@ -165,7 +165,12 @@ class PDFStream(PDFObject):
return
def __repr__(self):
- return '' % (self.objid, len(self.rawdata), self.attrs)
+ if self.data is None:
+ assert self.rawdata is not None
+ return '' % (self.objid, len(self.rawdata), self.attrs)
+ else:
+ assert self.data is not None
+ return '' % (self.objid, len(self.data), self.attrs)
def __contains__(self, name):
return name in self.attrs
@@ -203,7 +208,8 @@ class PDFStream(PDFObject):
data = self.decipher(self.objid, self.genno, data)
filters = self.get_any(('F', 'Filter'))
if not filters:
- self.rawdata = self.data = data
+ self.data = data
+ self.rawdata = None
return
if not isinstance(filters, list):
filters = [ filters ]
diff --git a/tools/dumppdf.py b/tools/dumppdf.py
index 785f070..cd8bb28 100755
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@@ -116,6 +116,13 @@ def dumpoutline(outfp, fname, objids, pagenos, password='',
if isinstance(dest, dict):
dest = dest['D']
pageno = pages[dest[0].objid]
+ elif a:
+ action = a.resolve()
+ if isinstance(action, dict):
+ subtype = action.get('S')
+ if subtype and repr(subtype) == '/GoTo' and action.get('D'):
+ dest = action['D']
+ pageno = pages[dest[0].objid]
outfp.write(repr((level,title,dest,pageno))+'\n')
parser.close()
fp.close()