add some restriction
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@22 1aa58f4a-7d42-0410-adbc-911cccaed67cpull/1/head
parent
6183469f11
commit
ecaf68efed
1
lzw.py
1
lzw.py
|
@ -17,7 +17,6 @@ class LZWDecoder:
|
||||||
return
|
return
|
||||||
|
|
||||||
def readbits(self, bits):
|
def readbits(self, bits):
|
||||||
bits0 = bits
|
|
||||||
v = 0
|
v = 0
|
||||||
while 1:
|
while 1:
|
||||||
# the number of remaining bits we can get from the current buffer.
|
# the number of remaining bits we can get from the current buffer.
|
||||||
|
|
|
@ -163,11 +163,14 @@ class TextConverter(PDFDevice):
|
||||||
|
|
||||||
|
|
||||||
# pdf2txt
|
# pdf2txt
|
||||||
|
class TextExtractionNotAllowed(RuntimeError): pass
|
||||||
def pdf2txt(outfp, rsrc, fname, pages, codec, debug=0):
|
def pdf2txt(outfp, rsrc, fname, pages, codec, debug=0):
|
||||||
device = TextConverter(rsrc, debug=debug)
|
device = TextConverter(rsrc, debug=debug)
|
||||||
doc = PDFDocument(debug=debug)
|
doc = PDFDocument(debug=debug)
|
||||||
fp = file(fname)
|
fp = file(fname)
|
||||||
parser = PDFParser(doc, fp, debug=debug)
|
parser = PDFParser(doc, fp, debug=debug)
|
||||||
|
if not doc.is_extractable:
|
||||||
|
raise TextExtractionNotAllowed('text extraction is not allowed: %r' % fname)
|
||||||
interpreter = PDFPageInterpreter(rsrc, device, debug=debug)
|
interpreter = PDFPageInterpreter(rsrc, device, debug=debug)
|
||||||
outfp.write('<document>\n')
|
outfp.write('<document>\n')
|
||||||
for (i,page) in enumerate(doc.get_pages(debug=debug)):
|
for (i,page) in enumerate(doc.get_pages(debug=debug)):
|
||||||
|
|
Loading…
Reference in New Issue