2014-09-03 13:26:08 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2014-09-04 07:36:19 +00:00
|
|
|
import nose, logging, os
|
2014-09-03 13:26:08 +00:00
|
|
|
|
|
|
|
import tools.pdf2txt as pdf2txt
|
|
|
|
|
|
|
|
path=os.path.dirname(os.path.abspath(__file__))+'/'
|
|
|
|
|
|
|
|
def run(datapath,filename,options=None):
|
|
|
|
i=path+datapath+filename+'.pdf'
|
|
|
|
o=path+filename+'.txt'
|
|
|
|
if options:
|
|
|
|
s='pdf2txt -o%s %s %s'%(o,options,i)
|
|
|
|
else:
|
|
|
|
s='pdf2txt -o%s %s'%(o,i)
|
2015-05-30 15:14:24 +00:00
|
|
|
pdf2txt.main(s.split(' ')[1:])
|
2014-09-03 13:26:08 +00:00
|
|
|
|
2014-09-04 07:36:19 +00:00
|
|
|
class TestDumpPDF():
|
2014-09-03 13:26:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_1(self):
|
|
|
|
run('../samples/','jo')
|
|
|
|
run('../samples/','simple1')
|
|
|
|
run('../samples/','simple2')
|
|
|
|
run('../samples/','simple3')
|
|
|
|
|
|
|
|
def test_2(self):
|
|
|
|
run('../samples/nonfree/','dmca')
|
|
|
|
|
|
|
|
def test_3(self):
|
2014-09-04 08:02:29 +00:00
|
|
|
nose.SkipTest()
|
|
|
|
return # takes too much time
|
2014-09-03 13:26:08 +00:00
|
|
|
run('../samples/nonfree/','f1040nr')
|
|
|
|
|
|
|
|
def test_4(self):
|
2014-09-04 08:02:29 +00:00
|
|
|
nose.SkipTest()
|
|
|
|
return # takes too much time
|
2014-09-03 13:26:08 +00:00
|
|
|
run('../samples/nonfree/','i1040nr')
|
|
|
|
|
|
|
|
def test_5(self):
|
|
|
|
run('../samples/nonfree/','kampo')
|
|
|
|
|
|
|
|
def test_6(self):
|
|
|
|
run('../samples/nonfree/','naacl06-shinyama')
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2014-09-04 07:36:19 +00:00
|
|
|
nose.runmodule()
|