From ead8e778a66cb65dd9eb145f7dfc2946cc97052c Mon Sep 17 00:00:00 2001
From: Cathal Garvey <cathalgarvey@cathalgarvey.me>
Date: Sat, 30 May 2015 16:27:58 +0100
Subject: [PATCH] Successfully compartmentalised code, getting closer to moving
 pdf->text as a module function.

---
 tools/pdf2txt.py | 57 ++++++++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py
index d74c4c5..15de307 100755
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@@ -131,8 +131,8 @@ def extract_text(files=[], outfile='-',
     if output_dir:
         imagewriter = ImageWriter(output_dir)
 
-    if six.PY2 and sys.stdin.encoding:
-        password = password.decode(sys.stdin.encoding)
+#    if six.PY2 and sys.stdin.encoding:
+#        password = password.decode(sys.stdin.encoding)
     
     if output_type == "text" and outfile != "-":
         for override, alttype in (  (".htm", "html"),
@@ -149,38 +149,39 @@ def extract_text(files=[], outfile='-',
     else:
         outfp = open(outfile, "wb")
     
-    rsrcmgr = PDFResourceManager(caching=not disable_caching)
+#    rsrcmgr = PDFResourceManager(caching=not disable_caching)
 
-    if output_type == 'text':
-        device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
-                               imagewriter=imagewriter)
+#    if output_type == 'text':
+#        device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
+#                               imagewriter=imagewriter)
 
-    if six.PY3 and outfp == sys.stdout:
-        outfp = sys.stdout.buffer
+#    if six.PY3 and outfp == sys.stdout:
+#        outfp = sys.stdout.buffer
 
-    if output_type == 'xml':
-        device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
-                              imagewriter=imagewriter,
-                              stripcontrol=strip_control)
-    elif output_type == 'html':
-        device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
-                               layoutmode=layoutmode, laparams=laparams,
-                               imagewriter=imagewriter)
-    elif output_type == 'tag':
-        device = TagExtractor(rsrcmgr, outfp, codec=codec)
+#    if output_type == 'xml':
+#        device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
+#                              imagewriter=imagewriter,
+#                              stripcontrol=strip_control)
+#    elif output_type == 'html':
+#        device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
+#                               layoutmode=layoutmode, laparams=laparams,
+#                               imagewriter=imagewriter)
+#    elif output_type == 'tag':
+#        device = TagExtractor(rsrcmgr, outfp, codec=codec)
 
     for fname in files:
         with open(fname, "rb") as fp:
-            interpreter = PDFPageInterpreter(rsrcmgr, device)
-            for page in PDFPage.get_pages(fp,
-                                          page_numbers,
-                                          maxpages=maxpages,
-                                          password=password,
-                                          caching=not disable_caching,
-                                          check_extractable=True):
-                page.rotate = (page.rotate + rotation) % 360
-                interpreter.process_page(page)
-    device.close()
+            extract_text_to_fp(fp, **locals())
+#            interpreter = PDFPageInterpreter(rsrcmgr, device)
+#            for page in PDFPage.get_pages(fp,
+#                                          page_numbers,
+#                                          maxpages=maxpages,
+#                                          password=password,
+#                                          caching=not disable_caching,
+#                                          check_extractable=True):
+#                page.rotate = (page.rotate + rotation) % 360
+#                interpreter.process_page(page)
+#    device.close()
     return outfp
 
 # main