From 8e92ddca30bedc83a4a3b120b301f5b77c86e08d Mon Sep 17 00:00:00 2001
From: "yusuke.shinyama.dummy"
 <yusuke.shinyama.dummy@1aa58f4a-7d42-0410-adbc-911cccaed67c>
Date: Wed, 5 May 2010 05:51:11 +0000
Subject: [PATCH] latin2ascii.py was moved as a utility

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@215 1aa58f4a-7d42-0410-adbc-911cccaed67c
---
 setup.py                           |  5 ++--
 {pdfminer => tools}/latin2ascii.py | 46 ++++++++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 5 deletions(-)
 rename {pdfminer => tools}/latin2ascii.py (60%)
 mode change 100644 => 100755

diff --git a/setup.py b/setup.py
index 3fd5e6a..baca2fe 100644
--- a/setup.py
+++ b/setup.py
@@ -20,11 +20,12 @@ PDF parser that can be used for other purposes instead of text analysis.''',
     url='http://www.unixuser.org/~euske/python/pdfminer/index.html',
     packages=[
     'pdfminer',
-    'pdfminer.cmap'
+    'pdfminer.cmap',
     ],
     scripts=[
     'tools/pdf2txt.py',
-    'tools/dumppdf.py'
+    'tools/dumppdf.py',
+    'tools/latin2ascii.py',
     ],
     keywords=['pdf parser', 'pdf converter', 'text mining'],
     classifiers=[
diff --git a/pdfminer/latin2ascii.py b/tools/latin2ascii.py
old mode 100644
new mode 100755
similarity index 60%
rename from pdfminer/latin2ascii.py
rename to tools/latin2ascii.py
index 1bc196a..36588a6
--- a/pdfminer/latin2ascii.py
+++ b/tools/latin2ascii.py
@@ -1,15 +1,20 @@
 #!/usr/bin/env python
+#
+#  latin2ascii.py - converts latin1 characters into ascii.
+#
+
+import sys
 
 """ Mappings from Latin-1 characters to ASCII.
 
 This is an in-house mapping table for some Latin-1 characters
 (acutes, umlauts, etc.) to ASCII strings.
-
-This file is *not* used currently.
-
 """
 
 LATIN2ASCII = {
+  #0x00a0: '',
+  #0x00a7: '',
+  
   # iso-8859-1
   0x00c0: 'A`',
   0x00c1: "A'",
@@ -87,4 +92,39 @@ LATIN2ASCII = {
   0xfb05: 'ft',
   0xfb06: 'st',
 
+  # Symbols
+  #0x2013: '',
+  0x2014: '--',
+  0x2015: '||',
+  0x2018: '`',
+  0x2019: "'",
+  0x201c: '``',
+  0x201d: "''",
+  #0x2022: '',
+  #0x2212: '',
+
 }
+
+def latin2ascii(s):
+    return ''.join( LATIN2ASCII.get(ord(c),c) for c in s )
+
+
+def main(argv):
+    import getopt, fileinput
+    def usage():
+        print 'usage: %s [-c codec] file ...' % argv[0]
+        return 100
+    try:
+        (opts, args) = getopt.getopt(argv[1:], 'c')
+    except getopt.GetoptError:
+        return usage()
+    if not args: return usage()
+    codec = 'utf-8'
+    for (k, v) in opts:
+        if k == '-c': codec = v
+    for line in fileinput.input(args):
+        line = latin2ascii(unicode(line, codec, 'ignore'))
+        sys.stdout.write(line.encode('ascii', 'replace'))
+    return
+
+if __name__ == '__main__': sys.exit(main(sys.argv))