setup logging for pdf2txt and fix dumppdf
parent
39942b6642
commit
714423883c
|
@ -247,7 +247,7 @@ def main(argv):
|
||||||
outfp = sys.stdout
|
outfp = sys.stdout
|
||||||
extractdir = None
|
extractdir = None
|
||||||
for (k, v) in opts:
|
for (k, v) in opts:
|
||||||
if k == '-d': logging.getLogger().setlevel(logging.DEBUG)
|
if k == '-d': logging.getLogger().setLevel(logging.DEBUG)
|
||||||
elif k == '-o': outfp = open(v, 'w')
|
elif k == '-o': outfp = open(v, 'w')
|
||||||
elif k == '-i': objids.extend( int(x) for x in v.split(',') )
|
elif k == '-i': objids.extend( int(x) for x in v.split(',') )
|
||||||
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
|
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
|
||||||
|
|
|
@ -9,6 +9,7 @@ from pdfminer.converter import XMLConverter, HTMLConverter, TextConverter
|
||||||
from pdfminer.cmapdb import CMapDB
|
from pdfminer.cmapdb import CMapDB
|
||||||
from pdfminer.layout import LAParams
|
from pdfminer.layout import LAParams
|
||||||
from pdfminer.image import ImageWriter
|
from pdfminer.image import ImageWriter
|
||||||
|
import logging
|
||||||
|
|
||||||
# main
|
# main
|
||||||
def main(argv):
|
def main(argv):
|
||||||
|
@ -25,8 +26,6 @@ def main(argv):
|
||||||
except getopt.GetoptError:
|
except getopt.GetoptError:
|
||||||
return usage()
|
return usage()
|
||||||
if not args: return usage()
|
if not args: return usage()
|
||||||
# debug option
|
|
||||||
debug = 0
|
|
||||||
# input option
|
# input option
|
||||||
password = b''
|
password = b''
|
||||||
pagenos = set()
|
pagenos = set()
|
||||||
|
@ -45,7 +44,7 @@ def main(argv):
|
||||||
showpageno = True
|
showpageno = True
|
||||||
laparams = LAParams()
|
laparams = LAParams()
|
||||||
for (k, v) in opts:
|
for (k, v) in opts:
|
||||||
if k == '-d': debug += 1
|
if k == '-d': logging.getLogger().setLevel(logging.DEBUG)
|
||||||
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
|
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
|
||||||
elif k == '-m': maxpages = int(v)
|
elif k == '-m': maxpages = int(v)
|
||||||
elif k == '-P': password = v
|
elif k == '-P': password = v
|
||||||
|
@ -66,11 +65,6 @@ def main(argv):
|
||||||
elif k == '-c': codec = v
|
elif k == '-c': codec = v
|
||||||
elif k == '-s': scale = float(v)
|
elif k == '-s': scale = float(v)
|
||||||
#
|
#
|
||||||
PDFDocument.debug = debug
|
|
||||||
PDFParser.debug = debug
|
|
||||||
CMapDB.debug = debug
|
|
||||||
PDFPageInterpreter.debug = debug
|
|
||||||
#
|
|
||||||
rsrcmgr = PDFResourceManager(caching=caching)
|
rsrcmgr = PDFResourceManager(caching=caching)
|
||||||
if not outtype:
|
if not outtype:
|
||||||
outtype = 'text'
|
outtype = 'text'
|
||||||
|
@ -97,7 +91,7 @@ def main(argv):
|
||||||
elif outtype == 'html':
|
elif outtype == 'html':
|
||||||
device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
|
device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
|
||||||
layoutmode=layoutmode, laparams=laparams,
|
layoutmode=layoutmode, laparams=laparams,
|
||||||
imagewriter=imagewriter, debug=debug)
|
imagewriter=imagewriter)
|
||||||
elif outtype == 'tag':
|
elif outtype == 'tag':
|
||||||
device = TagExtractor(rsrcmgr, outfp, codec=codec)
|
device = TagExtractor(rsrcmgr, outfp, codec=codec)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue