From 8a5bec5065c2e0b3195ca00365bdbb81e843ccb1 Mon Sep 17 00:00:00 2001 From: "yusuke.shinyama.dummy" Date: Tue, 21 Jul 2009 07:55:19 +0000 Subject: [PATCH] layout analysis improved. git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@120 1aa58f4a-7d42-0410-adbc-911cccaed67c --- README.html | 4 +- pdfminer/__init__.py | 2 +- pdfminer/cmap.py | 4 +- pdfminer/converter.py | 79 +++++----- pdfminer/layout.py | 325 ++++++++++++++++++++++++------------------ pdfminer/pdfinterp.py | 4 +- pdfminer/utils.py | 4 + samples/Makefile | 2 +- setup.py | 2 +- tools/__init__.py | 0 tools/pdf2html.cgi | 19 ++- tools/pdf2txt.py | 29 ++-- tools/sgml.py | 152 -------------------- tools/viewpdf.py | 162 --------------------- 14 files changed, 263 insertions(+), 525 deletions(-) delete mode 100644 tools/__init__.py delete mode 100755 tools/sgml.py delete mode 100755 tools/viewpdf.py diff --git a/README.html b/README.html index 9255959..08fb7f6 100644 --- a/README.html +++ b/README.html @@ -18,7 +18,7 @@ Python PDF parser and analyzer
-Last Modified: Sun Jul 12 00:36:44 JST 2009 +Last Modified: Tue Jul 21 16:24:26 JST 2009
@@ -191,6 +191,7 @@ HTML-like tags. pdf2txt tries to extract its content streams rather than inferri Tags used here are defined in the PDF specification (See §10.7 "Tagged PDF").

+

-D direction
-M char_margin
-L line_margin
-W word_margin @@ -318,6 +319,7 @@ no stream header is displayed for the ease of saving it to a file.

Changes