pdfminer.six/tests/test_tools_pdf2txt.py

63 lines
1.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python
2014-09-03 13:26:08 +00:00
# -*- coding: utf-8 -*-
2014-09-04 07:36:19 +00:00
import nose, logging, os
2014-09-03 13:26:08 +00:00
import tools.pdf2txt as pdf2txt
path=os.path.dirname(os.path.abspath(__file__))+'/'
def run(datapath,filename,options=None):
i=path+datapath+filename+'.pdf'
o=path+filename+'.txt'
if options:
s='pdf2txt -o%s %s %s'%(o,options,i)
else:
s='pdf2txt -o%s %s'%(o,i)
pdf2txt.main(s.split(' ')[1:])
2014-09-03 13:26:08 +00:00
2014-09-04 07:36:19 +00:00
class TestDumpPDF():
2014-09-03 13:26:08 +00:00
def test_1(self):
run('../samples/','jo')
run('../samples/','simple1')
run('../samples/','simple2')
run('../samples/','simple3')
2019-08-10 04:49:20 +00:00
run('../samples/','sampleOneByteIdentityEncode')
2014-09-03 13:26:08 +00:00
def test_2(self):
run('../samples/nonfree/','dmca')
2014-09-03 13:26:08 +00:00
def test_3(self):
run('../samples/nonfree/','f1040nr')
def test_4(self):
run('../samples/nonfree/','i1040nr')
2014-09-03 13:26:08 +00:00
def test_5(self):
run('../samples/nonfree/','kampo')
2014-09-03 13:26:08 +00:00
def test_6(self):
run('../samples/nonfree/','naacl06-shinyama')
# this test works on Windows but on Linux & Travis-CI it says
# PDFSyntaxError: No /Root object! - Is this really a PDF?
# TODO: Find why
"""
def test_7(self):
run('../samples/contrib/','stamp-no')
"""
2017-10-16 10:05:39 +00:00
def test_8(self):
run('../samples/contrib/','2b','-A -t xml')
def test_9(self):
run('../samples/nonfree/','175') # https://github.com/pdfminer/pdfminer.six/issues/65
2017-10-16 10:05:39 +00:00
def test_10(self):
run('../samples/scancode/','patchelf') # https://github.com/euske/pdfminer/issues/96
2014-09-03 13:26:08 +00:00
if __name__ == '__main__':
2014-09-04 07:36:19 +00:00
nose.runmodule()