setup logging for pdf2txt and fix dumppdf

pull/2/head
cybjit 2014-09-11 23:41:01 +02:00
parent 39942b6642
commit 714423883c
2 changed files with 4 additions and 10 deletions

View File

@ -247,7 +247,7 @@ def main(argv):
outfp = sys.stdout outfp = sys.stdout
extractdir = None extractdir = None
for (k, v) in opts: for (k, v) in opts:
if k == '-d': logging.getLogger().setlevel(logging.DEBUG) if k == '-d': logging.getLogger().setLevel(logging.DEBUG)
elif k == '-o': outfp = open(v, 'w') elif k == '-o': outfp = open(v, 'w')
elif k == '-i': objids.extend( int(x) for x in v.split(',') ) elif k == '-i': objids.extend( int(x) for x in v.split(',') )
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') ) elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )

View File

@ -9,6 +9,7 @@ from pdfminer.converter import XMLConverter, HTMLConverter, TextConverter
from pdfminer.cmapdb import CMapDB from pdfminer.cmapdb import CMapDB
from pdfminer.layout import LAParams from pdfminer.layout import LAParams
from pdfminer.image import ImageWriter from pdfminer.image import ImageWriter
import logging
# main # main
def main(argv): def main(argv):
@ -25,8 +26,6 @@ def main(argv):
except getopt.GetoptError: except getopt.GetoptError:
return usage() return usage()
if not args: return usage() if not args: return usage()
# debug option
debug = 0
# input option # input option
password = b'' password = b''
pagenos = set() pagenos = set()
@ -45,7 +44,7 @@ def main(argv):
showpageno = True showpageno = True
laparams = LAParams() laparams = LAParams()
for (k, v) in opts: for (k, v) in opts:
if k == '-d': debug += 1 if k == '-d': logging.getLogger().setLevel(logging.DEBUG)
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') ) elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
elif k == '-m': maxpages = int(v) elif k == '-m': maxpages = int(v)
elif k == '-P': password = v elif k == '-P': password = v
@ -66,11 +65,6 @@ def main(argv):
elif k == '-c': codec = v elif k == '-c': codec = v
elif k == '-s': scale = float(v) elif k == '-s': scale = float(v)
# #
PDFDocument.debug = debug
PDFParser.debug = debug
CMapDB.debug = debug
PDFPageInterpreter.debug = debug
#
rsrcmgr = PDFResourceManager(caching=caching) rsrcmgr = PDFResourceManager(caching=caching)
if not outtype: if not outtype:
outtype = 'text' outtype = 'text'
@ -97,7 +91,7 @@ def main(argv):
elif outtype == 'html': elif outtype == 'html':
device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale, device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
layoutmode=layoutmode, laparams=laparams, layoutmode=layoutmode, laparams=laparams,
imagewriter=imagewriter, debug=debug) imagewriter=imagewriter)
elif outtype == 'tag': elif outtype == 'tag':
device = TagExtractor(rsrcmgr, outfp, codec=codec) device = TagExtractor(rsrcmgr, outfp, codec=codec)
else: else: