outline (TOC) extraction supported.
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@42 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
cb02051481
commit
9740f26cec
15
README.html
15
README.html
|
@ -11,7 +11,7 @@ blockquote { background: #eeeeee; }
|
|||
<h1>PDFMiner</h1>
|
||||
<div align=right class=lastmod>
|
||||
<!-- hhmts start -->
|
||||
Last Modified: Tue Jul 1 00:02:48 JST 2008
|
||||
Last Modified: Thu Jul 10 00:14:07 JST 2008
|
||||
<!-- hhmts end -->
|
||||
</div>
|
||||
|
||||
|
@ -135,13 +135,13 @@ Unicode Standard.
|
|||
<p>
|
||||
Examples:
|
||||
<blockquote><pre>
|
||||
$ <strong>./pdf2txt.py -H -o output.html samples/naacl06-shinyama.pdf</strong>
|
||||
$ <strong>python -m tools.pdf2txt -H -o output.html samples/naacl06-shinyama.pdf</strong>
|
||||
(extract text as an HTML file whose filename is output.html)
|
||||
|
||||
$ <strong>./pdf2txt.py -c euc-jp samples/jo.pdf</strong>
|
||||
$ <strong>python -m tools.pdf2txt -c euc-jp samples/jo.pdf</strong>
|
||||
(extract Japanese texts in vertical writing, CMap is required)
|
||||
|
||||
$ <strong>./pdf2txt.py -P mypassword secret.pdf</strong>
|
||||
$ <strong>python -m tools.pdf2txt -P mypassword secret.pdf</strong>
|
||||
(extract texts from an encrypted PDF file with a password)
|
||||
</pre></blockquote>
|
||||
|
||||
|
@ -181,10 +181,13 @@ but it's also possible to extract some meaningful contents
|
|||
<p>
|
||||
Examples:
|
||||
<blockquote><pre>
|
||||
$ <strong>./dumppdf.py -a foo.pdf</strong>
|
||||
$ <strong>python -m tools.dumppdf -a foo.pdf</strong>
|
||||
(dump all the headers and contents, except stream objects)
|
||||
|
||||
$ <strong>./dumppdf.py -r -i6 foo.pdf > pic.jpeg</strong>
|
||||
$ <strong>python -m tools.dumppdf -T foo.pdf</strong>
|
||||
(dump the table of contents)
|
||||
|
||||
$ <strong>python -m tools.dumppdf -r -i6 foo.pdf > pic.jpeg</strong>
|
||||
(extract a JPEG image)
|
||||
</pre></blockquote>
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# * public domain *
|
||||
#
|
||||
|
||||
class Arcfour:
|
||||
class Arcfour(object):
|
||||
|
||||
def __init__(self, key):
|
||||
s = range(256)
|
||||
|
|
|
@ -17,7 +17,7 @@ class CMapError(Exception): pass
|
|||
|
||||
## CMap
|
||||
##
|
||||
class CMap:
|
||||
class CMap(object):
|
||||
|
||||
def __init__(self, debug=0):
|
||||
self.debug = debug
|
||||
|
@ -163,7 +163,7 @@ class CDBCMap(CMap):
|
|||
|
||||
## CMapDB
|
||||
##
|
||||
class CMapDB:
|
||||
class CMapDB(object):
|
||||
|
||||
class CMapNotFound(CMapError): pass
|
||||
|
||||
|
@ -340,7 +340,7 @@ class CMapParser(PSStackParser):
|
|||
|
||||
## FontMetricsDB
|
||||
##
|
||||
class FontMetricsDB:
|
||||
class FontMetricsDB(object):
|
||||
from fontmetrics import FONT_METRICS
|
||||
|
||||
@classmethod
|
||||
|
@ -350,7 +350,7 @@ class FontMetricsDB:
|
|||
|
||||
## EncodingDB
|
||||
##
|
||||
class EncodingDB:
|
||||
class EncodingDB(object):
|
||||
|
||||
from glyphlist import charname2unicode
|
||||
from latin_enc import ENCODING
|
||||
|
|
|
@ -4,7 +4,7 @@ stderr = sys.stderr
|
|||
|
||||
## LZWDecoder
|
||||
##
|
||||
class LZWDecoder:
|
||||
class LZWDecoder(object):
|
||||
|
||||
def __init__(self, fp, debug=0):
|
||||
self.fp = fp
|
||||
|
|
|
@ -9,7 +9,7 @@ except ImportError:
|
|||
from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
|
||||
PSStackParser, PSLiteral, PSKeyword, STRICT, \
|
||||
PSLiteralTable, PSKeywordTable, literal_name, keyword_name
|
||||
from pdfparser import PDFException, PDFStream, PDFObjRef, resolve1, \
|
||||
from pdfparser import PDFException, PDFObject, PDFStream, PDFObjRef, resolve1, \
|
||||
int_value, float_value, num_value, \
|
||||
str_value, list_value, dict_value, stream_value
|
||||
from cmap import CMap, CMapDB, CMapParser, FontMetricsDB, EncodingDB
|
||||
|
@ -26,7 +26,7 @@ class PDFUnicodeNotDefined(PDFFontError): pass
|
|||
|
||||
## ColorSpace
|
||||
##
|
||||
class ColorSpace:
|
||||
class ColorSpace(object):
|
||||
def __init__(self, name, ncomponents):
|
||||
self.name = name
|
||||
self.ncomponents = ncomponents
|
||||
|
@ -82,7 +82,7 @@ def apply_matrix((a,b,c,d,e,f), (x,y)):
|
|||
##
|
||||
|
||||
# PDFFont
|
||||
class PDFFont:
|
||||
class PDFFont(object):
|
||||
|
||||
def __init__(self, descriptor, widths, default_width=None):
|
||||
self.descriptor = descriptor
|
||||
|
@ -208,7 +208,7 @@ class PDFType3Font(PDFSimpleFont):
|
|||
|
||||
## TrueTypeFont
|
||||
##
|
||||
class TrueTypeFont:
|
||||
class TrueTypeFont(object):
|
||||
|
||||
class CMapNotFound(Exception): pass
|
||||
|
||||
|
@ -391,7 +391,7 @@ class PDFCIDFont(PDFFont):
|
|||
|
||||
## Resource Manager
|
||||
##
|
||||
class PDFResourceManager:
|
||||
class PDFResourceManager(object):
|
||||
|
||||
'''
|
||||
ResourceManager facilitates reuse of shared resources
|
||||
|
@ -464,7 +464,7 @@ class PDFResourceManager:
|
|||
|
||||
## PDFDevice
|
||||
##
|
||||
class PDFDevice:
|
||||
class PDFDevice(object):
|
||||
|
||||
def __init__(self, rsrc, debug=0):
|
||||
self.rsrc = rsrc
|
||||
|
@ -587,9 +587,9 @@ class PDFContentParser(PSStackParser):
|
|||
|
||||
## Interpreter
|
||||
##
|
||||
class PDFPageInterpreter:
|
||||
class PDFPageInterpreter(object):
|
||||
|
||||
class TextState:
|
||||
class TextState(object):
|
||||
def __init__(self):
|
||||
self.font = None
|
||||
self.fontsize = 0
|
||||
|
|
|
@ -11,7 +11,7 @@ from utils import choplist, nunpack
|
|||
from arcfour import Arcfour
|
||||
from lzw import LZWDecoder
|
||||
from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
|
||||
PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
|
||||
PSObject, PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
|
||||
literal_name, keyword_name, \
|
||||
PSStackParser, STRICT
|
||||
|
||||
|
@ -46,10 +46,12 @@ KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
|
|||
KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
|
||||
PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
|
||||
|
||||
class PDFObject(PSObject): pass
|
||||
|
||||
|
||||
## PDFObjRef
|
||||
##
|
||||
class PDFObjRef:
|
||||
class PDFObjRef(PDFObject):
|
||||
|
||||
def __init__(self, doc, objid, _):
|
||||
if objid == 0:
|
||||
|
@ -165,7 +167,7 @@ def stream_value(x):
|
|||
|
||||
## PDFStream type
|
||||
##
|
||||
class PDFStream:
|
||||
class PDFStream(PDFObject):
|
||||
|
||||
def __init__(self, dic, rawdata, decipher=None):
|
||||
self.dic = dic
|
||||
|
@ -247,11 +249,11 @@ class PDFStream:
|
|||
|
||||
## PDFPage
|
||||
##
|
||||
class PDFPage:
|
||||
class PDFPage(object):
|
||||
|
||||
def __init__(self, doc, pageidx, attrs):
|
||||
def __init__(self, doc, pageid, attrs):
|
||||
self.doc = doc
|
||||
self.pageid = pageidx
|
||||
self.pageid = pageid
|
||||
self.attrs = dict_value(attrs)
|
||||
self.lastmod = self.attrs.get('LastModified')
|
||||
self.resources = resolve1(self.attrs['Resources'])
|
||||
|
@ -397,7 +399,7 @@ class PDFXRefStream(object):
|
|||
## at once. Rather it is parsed dynamically as processing goes.
|
||||
## A PDF parser is associated with the document.
|
||||
##
|
||||
class PDFDocument:
|
||||
class PDFDocument(object):
|
||||
|
||||
def __init__(self, debug=0):
|
||||
self.debug = debug
|
||||
|
@ -453,7 +455,6 @@ class PDFDocument:
|
|||
if self.catalog.get('Type') != LITERAL_CATALOG:
|
||||
if STRICT:
|
||||
raise PDFValueError('Catalog not found!')
|
||||
self.outline = self.catalog.get('Outline')
|
||||
return
|
||||
|
||||
# initialize(password='')
|
||||
|
@ -608,11 +609,54 @@ class PDFDocument:
|
|||
elif tree.get('Type') == LITERAL_PAGE:
|
||||
if 1 <= debug:
|
||||
print >>stderr, 'Page: %r' % tree
|
||||
yield tree
|
||||
yield (obj.objid, tree)
|
||||
if 'Pages' not in self.catalog: return
|
||||
for (i,tree) in enumerate(search(self.catalog['Pages'], self.catalog)):
|
||||
yield PDFPage(self, i, tree)
|
||||
return
|
||||
for (pageid,tree) in search(self.catalog['Pages'], self.catalog):
|
||||
yield PDFPage(self, pageid, tree)
|
||||
return
|
||||
|
||||
def get_outlines(self):
|
||||
if 'Outlines' not in self.catalog:
|
||||
raise PDFException('no /Outlines defined!')
|
||||
def search(entry, level):
|
||||
entry = dict_value(entry)
|
||||
if 'Title' in entry:
|
||||
if 'A' in entry or 'Dest' in entry:
|
||||
title = unicode(str_value(entry['Title']), 'utf-8', 'ignore')
|
||||
dest = entry.get('Dest')
|
||||
action = entry.get('A')
|
||||
se = entry.get('SE')
|
||||
yield (level, title, dest, action, se)
|
||||
if 'First' in entry and 'Last' in entry:
|
||||
for x in search(entry['First'], level+1):
|
||||
yield x
|
||||
if 'Next' in entry:
|
||||
for x in search(entry['Next'], level):
|
||||
yield x
|
||||
return
|
||||
return search(self.catalog['Outlines'], 0)
|
||||
|
||||
def lookup_name(self, cat, key):
|
||||
try:
|
||||
names = dict_value(self.catalog['Names'])
|
||||
except (PDFTypeError, KeyError):
|
||||
raise KeyError((cat,key))
|
||||
# may raise KeyError
|
||||
d0 = dict_value(names[cat])
|
||||
def lookup(d):
|
||||
if 'Limits' in d:
|
||||
(k1,k2) = list_value(d['Limits'])
|
||||
if key < k1 or k2 < key: return None
|
||||
if 'Names' in d:
|
||||
objs = list_value(d['Names'])
|
||||
names = dict(choplist(2, objs))
|
||||
return names[key]
|
||||
if 'Kids' in d:
|
||||
for c in list_value(d['Kids']):
|
||||
v = lookup(dict_value(c))
|
||||
if v: return v
|
||||
raise KeyError((cat,key))
|
||||
return lookup(d0)
|
||||
|
||||
|
||||
## PDFParser
|
||||
|
|
|
@ -19,7 +19,9 @@ class PSValueError(PSException): pass
|
|||
##
|
||||
|
||||
# PSLiteral
|
||||
class PSLiteral:
|
||||
class PSObject(object): pass
|
||||
|
||||
class PSLiteral(PSObject):
|
||||
|
||||
'''
|
||||
PS literals (e.g. "/Name").
|
||||
|
@ -35,7 +37,7 @@ class PSLiteral:
|
|||
return '/%s' % self.name
|
||||
|
||||
# PSKeyword
|
||||
class PSKeyword:
|
||||
class PSKeyword(PSObject):
|
||||
|
||||
'''
|
||||
PS keywords (e.g. "showpage").
|
||||
|
@ -51,7 +53,7 @@ class PSKeyword:
|
|||
return self.name
|
||||
|
||||
# PSSymbolTable
|
||||
class PSSymbolTable:
|
||||
class PSSymbolTable(object):
|
||||
|
||||
'''
|
||||
Symbol table that stores PSLiteral or PSKeyword.
|
||||
|
@ -113,7 +115,7 @@ END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
|
|||
END_STRING = re.compile(r'[()\134]')
|
||||
OCT_STRING = re.compile(r'[0-7]')
|
||||
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
|
||||
class PSBaseParser:
|
||||
class PSBaseParser(object):
|
||||
|
||||
'''
|
||||
Most basic PostScript parser that performs only basic tokenization.
|
||||
|
@ -129,6 +131,13 @@ class PSBaseParser:
|
|||
def __repr__(self):
|
||||
return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
|
||||
|
||||
def flush(self):
|
||||
return
|
||||
|
||||
def close(self):
|
||||
self.flush()
|
||||
return
|
||||
|
||||
def tell(self):
|
||||
return self.fp.tell()
|
||||
|
||||
|
@ -463,8 +472,6 @@ class PSStackParser(PSBaseParser):
|
|||
|
||||
def do_keyword(self, pos, token):
|
||||
return
|
||||
def flush(self):
|
||||
return
|
||||
|
||||
def nextobject(self):
|
||||
'''
|
||||
|
|
|
@ -51,7 +51,7 @@ def cdbiter(fp, eod):
|
|||
|
||||
|
||||
# CDBReader
|
||||
class CDBReader:
|
||||
class CDBReader(object):
|
||||
|
||||
def __init__(self, cdbname, docache=1):
|
||||
self.name = cdbname
|
||||
|
@ -59,7 +59,7 @@ class CDBReader:
|
|||
hash0 = decode(self._fp.read(2048))
|
||||
self._hash0 = [ (hash0[i], hash0[i+1]) for i in xrange(0, 512, 2) ]
|
||||
self._hash1 = [ None ] * 256
|
||||
self._eod = self._hash0[0]
|
||||
self._eod = hash0[0]
|
||||
self._docache = docache
|
||||
self._cache = {}
|
||||
self._keyiter = None
|
||||
|
@ -149,7 +149,7 @@ class CDBReader:
|
|||
|
||||
|
||||
# CDBMaker
|
||||
class CDBMaker:
|
||||
class CDBMaker(object):
|
||||
|
||||
def __init__(self, cdbname, tmpname):
|
||||
self.fn = cdbname
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#
|
||||
import sys, re
|
||||
from pdflib.pdfparser import PDFDocument, PDFParser, PDFStream, \
|
||||
PDFObjRef, PSKeyword, PSLiteral
|
||||
PDFObjRef, PSKeyword, PSLiteral, resolve1
|
||||
stdout = sys.stdout
|
||||
stderr = sys.stderr
|
||||
|
||||
|
@ -94,8 +94,28 @@ def dumpallobjs(out, doc, codec=None):
|
|||
out.write('</pdf>')
|
||||
return
|
||||
|
||||
# dumpoutline
|
||||
def dumpoutline(outfp, fname, objids, pagenos, password='',
|
||||
dumpall=False, codec=None, debug=0):
|
||||
doc = PDFDocument(debug=debug)
|
||||
fp = file(fname, 'rb')
|
||||
parser = PDFParser(doc, fp, debug=debug)
|
||||
doc.initialize(password)
|
||||
pages = dict( (page.pageid, pageno) for (pageno,page) in enumerate(doc.get_pages()) )
|
||||
for (level,title,dest,a,se) in doc.get_outlines():
|
||||
pageno = None
|
||||
if dest:
|
||||
dest = resolve1( doc.lookup_name('Dests', dest) )
|
||||
if isinstance(dest, dict):
|
||||
dest = dest['D']
|
||||
pageno = pages[dest[0].objid]
|
||||
outfp.write(repr((level,title,dest,pageno))+'\n')
|
||||
parser.close()
|
||||
fp.close()
|
||||
return
|
||||
|
||||
# dumppdf
|
||||
def dumppdf(outfp, fname, objids, pageids, password='',
|
||||
def dumppdf(outfp, fname, objids, pagenos, password='',
|
||||
dumpall=False, codec=None, debug=0):
|
||||
doc = PDFDocument(debug=debug)
|
||||
fp = file(fname, 'rb')
|
||||
|
@ -110,13 +130,13 @@ def dumppdf(outfp, fname, objids, pageids, password='',
|
|||
outfp.write(obj.get_data())
|
||||
else:
|
||||
dumpxml(outfp, obj, codec=codec)
|
||||
if pageids:
|
||||
for page in doc.get_pages():
|
||||
if page.pageid in pageids:
|
||||
if pagenos:
|
||||
for (pageno,page) in enumerate(doc.get_pages()):
|
||||
if pageno in pagenos:
|
||||
dumpxml(outfp, page.attrs)
|
||||
if dumpall:
|
||||
dumpallobjs(outfp, doc, codec=codec)
|
||||
if (not objids) and (not pageids) and (not dumpall):
|
||||
if (not objids) and (not pagenos) and (not dumpall):
|
||||
dumptrailers(outfp, doc)
|
||||
fp.close()
|
||||
outfp.write('\n')
|
||||
|
@ -127,34 +147,36 @@ def dumppdf(outfp, fname, objids, pageids, password='',
|
|||
def main(argv):
|
||||
import getopt
|
||||
def usage():
|
||||
print 'usage: %s [-d] [-a] [-p pageid] [-P password] [-r|-b|-t] [-i objid] file ...' % argv[0]
|
||||
print 'usage: %s [-d] [-a] [-p pageid] [-P password] [-r|-b|-t] [-T] [-i objid] file ...' % argv[0]
|
||||
return 100
|
||||
try:
|
||||
(opts, args) = getopt.getopt(argv[1:], 'dap:P:rbti:')
|
||||
(opts, args) = getopt.getopt(argv[1:], 'dap:P:rbtTi:')
|
||||
except getopt.GetoptError:
|
||||
return usage()
|
||||
if not args: return usage()
|
||||
debug = 0
|
||||
objids = []
|
||||
pageids = set()
|
||||
pagenos = set()
|
||||
codec = None
|
||||
password = ''
|
||||
dumpall = False
|
||||
proc = dumppdf
|
||||
outfp = stdout
|
||||
for (k, v) in opts:
|
||||
if k == '-d': debug += 1
|
||||
elif k == '-i': objids.extend( int(x) for x in v.split(',') )
|
||||
elif k == '-p': pageids.update( int(x)-1 for x in v.split(',') )
|
||||
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
|
||||
elif k == '-P': password = v
|
||||
elif k == '-a': dumpall = True
|
||||
elif k == '-r': codec = 'raw'
|
||||
elif k == '-b': codec = 'binary'
|
||||
elif k == '-t': codec = 'text'
|
||||
elif k == '-T': proc = dumpoutline
|
||||
elif k == '-o': outfp = file(v, 'wb')
|
||||
#
|
||||
for fname in args:
|
||||
dumppdf(outfp, fname, objids, pageids, password=password,
|
||||
dumpall=dumpall, codec=codec, debug=debug)
|
||||
proc(outfp, fname, objids, pagenos, password=password,
|
||||
dumpall=dumpall, codec=codec, debug=debug)
|
||||
return
|
||||
|
||||
if __name__ == '__main__': sys.exit(main(sys.argv))
|
||||
|
|
|
@ -93,7 +93,7 @@ class TextConverter(PDFDevice):
|
|||
return
|
||||
|
||||
def begin_page(self, page):
|
||||
self.context = PageItem(str(page.pageid+1), page.mediabox, page.rotate)
|
||||
self.context = PageItem(len(self.pages), page.mediabox, page.rotate)
|
||||
return
|
||||
def end_page(self, _):
|
||||
assert not self.stack
|
||||
|
@ -205,7 +205,7 @@ class TextConverter(PDFDevice):
|
|||
# pdf2txt
|
||||
class TextExtractionNotAllowed(RuntimeError): pass
|
||||
|
||||
def pdf2txt(outfp, rsrc, fname, pages, codec, maxpages=0, html=False, password='', debug=0):
|
||||
def pdf2txt(outfp, rsrc, fname, pagenos, codec, maxpages=0, html=False, password='', debug=0):
|
||||
device = TextConverter(rsrc, debug=debug)
|
||||
doc = PDFDocument(debug=debug)
|
||||
fp = file(fname, 'rb')
|
||||
|
@ -218,10 +218,10 @@ def pdf2txt(outfp, rsrc, fname, pages, codec, maxpages=0, html=False, password='
|
|||
raise TextExtractionNotAllowed('text extraction is not allowed: %r' % fname)
|
||||
interpreter = PDFPageInterpreter(rsrc, device, debug=debug)
|
||||
device.reset()
|
||||
for (i,page) in enumerate(doc.get_pages(debug=debug)):
|
||||
if pages and (i not in pages): continue
|
||||
for (pageno,page) in enumerate(doc.get_pages(debug=debug)):
|
||||
if pagenos and (pageno not in pagenos): continue
|
||||
interpreter.process_page(page)
|
||||
if maxpages and maxpages <= i+1: break
|
||||
if maxpages and maxpages <= pageno+1: break
|
||||
if html:
|
||||
device.dump_html(outfp, codec)
|
||||
else:
|
||||
|
@ -235,7 +235,7 @@ def pdf2txt(outfp, rsrc, fname, pages, codec, maxpages=0, html=False, password='
|
|||
def main(argv):
|
||||
import getopt
|
||||
def usage():
|
||||
print 'usage: %s [-d] [-p pages] [-P password] [-c codec] [-H] [-o output] file ...' % argv[0]
|
||||
print 'usage: %s [-d] [-p pagenos] [-P password] [-c codec] [-H] [-o output] file ...' % argv[0]
|
||||
return 100
|
||||
try:
|
||||
(opts, args) = getopt.getopt(argv[1:], 'dp:P:c:Ho:C:D:m:')
|
||||
|
@ -246,14 +246,14 @@ def main(argv):
|
|||
cmapdir = 'CMap'
|
||||
cdbcmapdir = 'CDBCMap'
|
||||
codec = 'ascii'
|
||||
pages = set()
|
||||
pagenos = set()
|
||||
maxpages = 0
|
||||
html = False
|
||||
password = ''
|
||||
outfp = stdout
|
||||
for (k, v) in opts:
|
||||
if k == '-d': debug += 1
|
||||
elif k == '-p': pages.update( int(x)-1 for x in v.split(',') )
|
||||
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
|
||||
elif k == '-P': password = v
|
||||
elif k == '-c': codec = v
|
||||
elif k == '-m': maxpages = int(v)
|
||||
|
@ -265,7 +265,7 @@ def main(argv):
|
|||
CMapDB.initialize(cmapdir, cdbcmapdir, debug=debug)
|
||||
rsrc = PDFResourceManager(debug=debug)
|
||||
for fname in args:
|
||||
pdf2txt(outfp, rsrc, fname, pages, codec,
|
||||
pdf2txt(outfp, rsrc, fname, pagenos, codec,
|
||||
maxpages=maxpages, html=html, password=password, debug=debug)
|
||||
return
|
||||
|
||||
|
|
Loading…
Reference in New Issue