imagewriter now works with text output

pull/1/head
Yusuke Shinyama 2011-11-07 01:15:10 +10:00
parent 91174b5665
commit 82ff98c7b3
3 changed files with 18 additions and 8 deletions

View File

@ -144,9 +144,10 @@ class PDFConverter(PDFLayoutAnalyzer):
class TextConverter(PDFConverter):
def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
showpageno=False):
showpageno=False, imagewriter=None):
PDFConverter.__init__(self, rsrcmgr, outfp, codec=codec, pageno=pageno, laparams=laparams)
self.showpageno = showpageno
self.imagewriter = imagewriter
return
def write_text(self, text):
@ -162,19 +163,25 @@ class TextConverter(PDFConverter):
self.write_text(item.get_text())
if isinstance(item, LTTextBox):
self.write_text('\n')
elif isinstance(item, LTImage):
if self.imagewriter is not None:
self.imagewriter.export_image(item)
if self.showpageno:
self.write_text('Page %s\n' % ltpage.pageid)
render(ltpage)
self.write_text('\f')
return
# Some dummy functions to save memory/CPU when all that is wanted is text.
# This stops all the image and drawing ouput from being recorded and taking
# up RAM.
# Some dummy functions to save memory/CPU when all that is wanted
# is text. This stops all the image and drawing ouput from being
# recorded and taking up RAM.
def render_image(self, name, stream):
pass
if self.imagewriter is None: return
PDFConverter.render_image(self, name, stream)
return
def paint_path(self, gstate, stroke, fill, evenodd, path):
pass
return
## HTMLConverter

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python2
import sys
import struct
import os.path
import os, os.path
from pdftypes import LITERALS_DCT_DECODE
from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
@ -54,6 +54,8 @@ class ImageWriter(object):
def __init__(self, outdir):
self.outdir = outdir
if not os.path.exists(self.outdir):
os.makedirs(self.outdir)
return
def export_image(self, image):

View File

@ -80,7 +80,8 @@ def main(argv):
else:
outfp = sys.stdout
if outtype == 'text':
device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams)
device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
imagewriter=imagewriter)
elif outtype == 'xml':
device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
imagewriter=imagewriter)