diff --git a/docs/index.html b/docs/index.html
index 2e0a7a6..45ff9af 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -18,7 +18,7 @@ Python PDF parser and analyzer
-Last Modified: Mon Aug 24 15:56:08 JST 2009
+Last Modified: Thu Aug 27 00:20:29 JST 2009
@@ -101,6 +101,7 @@ World
+
For non-ASCII languages
In order to handle non-ASCII languages (e.g. Japanese),
you need to install an additional data called CMap
,
@@ -115,17 +116,16 @@ http://www.unixuser.org/~euske/pub/CMap.tar.bz2
Expand the archive and put the CMap
directory under the directory
where pdfminer
is installed.
-(Normally this should be something like /usr/lib/python2.5/site-packages
.)
+(Normally this should be something like /usr/lib/python2.5/site-packages/pdfminer
.)
For example:
-$ cd /usr/lib/python2.5/site-packages
+$ cd /usr/lib/python2.5/site-packages/pdfminer
$ tar jxf CMap.tar.bz2
- Do the following. (this is optional, but highly recommended)
+ Do the following. (this is optional and may take several minutes, but highly recommended!)
$ python -m pdfminer.cmap
-This may take several minutes.
@@ -319,6 +319,8 @@ no stream header is displayed for the ease of saving it to a file.
Changes
+- 2009/08/27: Fixed page rotation handling.
+
- 2009/08/26: Fixed zlib decoding bug. Thanks to Shon Urbas.
- 2009/08/24: Fixed a bug in character placing. Thanks to Pawan Jain.
- 2009/07/21: Improvement in layout analysis.
- 2009/07/11: Improvement in layout analysis. Thanks to Lubos Pintes.
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index 24e37b1..4bc08c8 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -33,7 +33,7 @@ class TagExtractor(PDFDevice):
self.outfp.write(enc(text, self.codec))
return
- def begin_page(self, page):
+ def begin_page(self, page, ctm):
(x0, y0, x1, y1) = page.mediabox
bbox = '%.3f,%.3f,%.3f,%.3f' % (x0, y0, x1, y1)
self.outfp.write('' %
@@ -77,8 +77,12 @@ class PDFPageAggregator(PDFTextDevice):
self.stack = []
return
- def begin_page(self, page):
- self.cur_item = LTPage(self.pageno, page.mediabox, page.rotate)
+ def begin_page(self, page, ctm):
+ (x0,y0,x1,y1) = page.mediabox
+ (x0,y0) = apply_matrix_pt(ctm, (x0,y0))
+ (x1,y1) = apply_matrix_pt(ctm, (x1,y1))
+ mediabox = (0, 0, abs(x0-x1), abs(y0-y1))
+ self.cur_item = LTPage(self.pageno, mediabox)
return
def end_page(self, _):
@@ -225,10 +229,11 @@ class HTMLConverter(PDFConverter):
def end_page(self, page):
def render(item):
if isinstance(item, LTPage):
+ self.yoffset += item.y1
self.write_rect('gray', 1, item.x0, self.yoffset-item.y1, item.width, item.height)
if self.showpageno:
self.outfp.write('
' %
- ((self.yoffset-page.y1)*self.scale))
+ ((self.yoffset-item.y1)*self.scale))
self.outfp.write('
Page %s \n' % (page.id, page.id))
for child in item:
render(child)
@@ -256,7 +261,6 @@ class HTMLConverter(PDFConverter):
render(child)
return
page = PDFConverter.end_page(self, page)
- self.yoffset += page.y1
render(page)
self.yoffset += self.pagepad
return
diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
index 3713fcf..896ca8e 100644
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@@ -32,7 +32,7 @@ class PDFDevice(object):
def do_tag(self, tag, props=None):
return
- def begin_page(self, page):
+ def begin_page(self, page, ctm):
return
def end_page(self, page):
return
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 967a62d..6c24a20 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -690,8 +690,17 @@ class PDFPageInterpreter(object):
def process_page(self, page):
if 1 <= self.debug:
print >>stderr, 'Processing page: %r' % page
- self.device.begin_page(page)
- self.render_contents(page.resources, page.contents)
+ (x0,y0,x1,y1) = page.mediabox
+ if page.rotate == 90:
+ ctm = (0,-1,1,0, -y0,x1)
+ elif page.rotate == 180:
+ ctm = (-1,0,0,-1, x1,y1)
+ elif page.rotate == 270:
+ ctm = (0,1,-1,0, x0,-y1)
+ else:
+ ctm = (1,0,0,1, -x0,-y0)
+ self.device.begin_page(page, ctm)
+ self.render_contents(page.resources, page.contents, ctm=ctm)
self.device.end_page(page)
return