From f22b0564549a1b29a9d2704ee3b070554a7a6877 Mon Sep 17 00:00:00 2001 From: Yusuke Shinyama Date: Sun, 27 Feb 2011 19:53:12 +0900 Subject: [PATCH] release-20110227 --- docs/index.html | 8 ++++++-- pdfminer/__init__.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/index.html b/docs/index.html index d6df1ea..9017298 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@
-Last Modified: Mon Feb 14 13:31:54 UTC 2011 +Last Modified: Sun Feb 27 10:51:18 UTC 2011
@@ -184,7 +184,7 @@ Not all characters in a PDF can be safely converted to Unicode. $ pdf2txt.py -o output.html samples/naacl06-shinyama.pdf (extract text as an HTML file whose filename is output.html) -$ pdf2txt.py -c euc-jp -o output.html samples/jo.pdf +$ pdf2txt.py -V -c euc-jp -o output.html samples/jo.pdf (extract a Japanese HTML file in vertical writing, CMap is required) $ pdf2txt.py -P mypassword -o output.txt secret.pdf @@ -270,6 +270,9 @@ are M = 1.0, L = 0.3, and W = 0.2, respectively.
Forces to perform layout analysis for all the text strings, including texts contained in figures.

+

-V +
Allows vertical writing detection. +

-Y layout_mode
Specifies how the page layout should be preserved. (Currently only applies to HTML format.)