parent
33b16b3f07
commit
ed1b09c6f2
|
@ -4,6 +4,7 @@ Functions that encapsulate "usual" use-cases for pdfminer, for use making
|
||||||
bundled scripts and for using pdfminer as a module for routine tasks.
|
bundled scripts and for using pdfminer as a module for routine tasks.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import six
|
import six
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -50,6 +51,9 @@ def extract_text_to_fp(inf, outfp,
|
||||||
'upgrade to Python 3. For more information see '
|
'upgrade to Python 3. For more information see '
|
||||||
'https://github.com/pdfminer/pdfminer .six/issues/194')
|
'https://github.com/pdfminer/pdfminer .six/issues/194')
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
if six.PY2 and sys.stdin.encoding:
|
if six.PY2 and sys.stdin.encoding:
|
||||||
password = password.decode(sys.stdin.encoding)
|
password = password.decode(sys.stdin.encoding)
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,8 @@ from pdfminer.pdftypes import PDFStream, PDFObjRef, resolve1, stream_value
|
||||||
from pdfminer.psparser import PSKeyword, PSLiteral, LIT
|
from pdfminer.psparser import PSKeyword, PSLiteral, LIT
|
||||||
from pdfminer.utils import isnumber
|
from pdfminer.utils import isnumber
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
|
||||||
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,12 +7,15 @@ import argparse
|
||||||
import logging
|
import logging
|
||||||
import six
|
import six
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import pdfminer.settings
|
import pdfminer.settings
|
||||||
pdfminer.settings.STRICT = False
|
pdfminer.settings.STRICT = False
|
||||||
import pdfminer.high_level
|
import pdfminer.high_level
|
||||||
import pdfminer.layout
|
import pdfminer.layout
|
||||||
from pdfminer.image import ImageWriter
|
from pdfminer.image import ImageWriter
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
|
||||||
|
|
||||||
def extract_text(files=[], outfile='-',
|
def extract_text(files=[], outfile='-',
|
||||||
no_laparams=False, all_texts=None, detect_vertical=None, # LAParams
|
no_laparams=False, all_texts=None, detect_vertical=None, # LAParams
|
||||||
|
|
|
@ -11,6 +11,8 @@ pdfminer.settings.STRICT = False
|
||||||
import pdfminer.high_level
|
import pdfminer.high_level
|
||||||
import pdfminer.layout
|
import pdfminer.layout
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
|
||||||
|
|
||||||
def compare(file1, file2, **kwargs):
|
def compare(file1, file2, **kwargs):
|
||||||
if '_py2_no_more_posargs' in kwargs is not None:
|
if '_py2_no_more_posargs' in kwargs is not None:
|
||||||
|
@ -90,9 +92,11 @@ def main(args=None):
|
||||||
P.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching")
|
P.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching")
|
||||||
P.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode")
|
P.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode")
|
||||||
|
|
||||||
|
|
||||||
A = P.parse_args(args=args)
|
A = P.parse_args(args=args)
|
||||||
|
|
||||||
|
if A.debug:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
if A.page_numbers:
|
if A.page_numbers:
|
||||||
A.page_numbers = set([x-1 for x in A.page_numbers])
|
A.page_numbers = set([x-1 for x in A.page_numbers])
|
||||||
if A.pagenos:
|
if A.pagenos:
|
||||||
|
|
Loading…
Reference in New Issue