--- bench/bench_crfpp.py.orig 2011-08-11 12:02:41.000000000 +0900 +++ bench/bench_crfpp.py 2013-10-27 00:09:07.000000000 +0900 @@ -5,9 +5,10 @@ import string from bench import * -CRFPP_LEARN='/home/okazaki/local/bin/crf_learn' -CRFPP_TEST='/home/okazaki/local/bin/crf_test' +CRFPP_LEARN='@PREFIX@/bin/crf_learn' +CRFPP_TEST='@PREFIX@/bin/crf_test' OUTDIR='crfpp/' +LOGDIR='logs/' training_patterns = ( ('num_features', r'^Number of features:[ ]*(\d+)', 1, int, last), @@ -31,6 +32,10 @@ if __name__ == '__main__': fe = sys.stderr + if not os.path.exists(OUTDIR): + os.makedirs(OUTDIR) + if not os.path.exists(LOGDIR): + os.makedirs(LOGDIR) R = {} for name, param in params.iteritems(): @@ -40,7 +45,7 @@ tglog = OUTDIR + name + '.tg.log' s = string.Template( - '$crfpp_learn $param template.crfpp train.txt $model > $trlog' + '$crfpp_learn $param @TEMPLATE_CRFPP@ train.txt $model > $trlog' ) cmd = s.substitute( crfpp_learn=CRFPP_LEARN, @@ -51,14 +56,14 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) fo = open(trtxt, 'w') fo.write('$ %s\n' % cmd) fo.write(open(trlog, 'r').read()) s = string.Template( - '$crfpp_test -m $model test.txt | ./accuracy.py > $tglog' + '$crfpp_test -m $model test.txt | @BENCH@/accuracy.py > $tglog' ) cmd = s.substitute( crfpp_test=CRFPP_TEST, @@ -68,7 +73,7 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) D = analyze_log(open(trlog), training_patterns) D.update(analyze_log(open(tglog), tagging_patterns)) --- bench/bench_crfsgd.py.orig 2011-08-11 12:02:41.000000000 +0900 +++ bench/bench_crfsgd.py 2013-10-27 00:09:29.000000000 +0900 @@ -5,8 +5,9 @@ import string from bench import * -CRFSGD='/home/okazaki/install/sgd-1.3/crf/crfsgd' +CRFSGD='@PREFIX@/bin/crfsgd' OUTDIR='crfsgd/' +LOGDIR='logs/' training_patterns = ( ('num_features', r'features: (\d+)', 1, int, last), @@ -26,6 +27,10 @@ if __name__ == '__main__': fe = sys.stderr + if not os.path.exists(OUTDIR): + os.makedirs(OUTDIR) + if not os.path.exists(LOGDIR): + os.makedirs(LOGDIR) R = {} for name, param in params.iteritems(): @@ -35,7 +40,7 @@ tglog = OUTDIR + name + '.tg.log' s = string.Template( - '$crfsgd $param $model template.crfpp train.txt > $trlog' + '$crfsgd $param $model @TEMPLATE_CRFPP@ train.txt > $trlog' ) cmd = s.substitute( crfsgd=CRFSGD, @@ -46,14 +51,14 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) fo = open(trtxt, 'w') fo.write('$ %s\n' % cmd) fo.write(open(trlog, 'r').read()) s = string.Template( - '$crfsgd -t $model test.txt | ./accuracy.py > $tglog' + '$crfsgd -t $model test.txt | @BENCH@/accuracy.py > $tglog' ) cmd = s.substitute( crfsgd=CRFSGD, @@ -63,7 +68,7 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) D = analyze_log(open(trlog), training_patterns) D.update(analyze_log(open(tglog), tagging_patterns)) --- bench/bench_crfsuite.py.orig 2011-08-11 12:02:41.000000000 +0900 +++ bench/bench_crfsuite.py 2013-10-27 00:42:02.000000000 +0900 @@ -5,8 +5,10 @@ import string from bench import * -CRFSUITE='/home/okazaki/projects/crfsuite/frontend/crfsuite' +CRFSUITE='@PREFIX@/bin/crfsuite' +CHUNKING='@PREFIX@/share/examples/crfsuite/chunking.py' OUTDIR='crfsuite/' +LOGDIR='logs/' training_patterns = ( ('num_features', r'^Number of features: (\d+)', 1, int, last), @@ -34,6 +36,23 @@ if __name__ == '__main__': fe = sys.stderr + if not os.path.exists(OUTDIR): + os.makedirs(OUTDIR) + if not os.path.exists(LOGDIR): + os.makedirs(LOGDIR) + + for t in ('train', 'test'): + s = string.Template( + '$chunking < $t.txt > $t.crfsuite' + ) + cmd = s.substitute( + chunking=CHUNKING, + t=t + ) + + fe.write(cmd) + fe.write('\n') + os.system(cmd) R = {} for name, param in params.iteritems(): @@ -54,7 +73,7 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) fo = open(trtxt, 'w') fo.write('$ %s\n' % cmd) @@ -71,7 +90,7 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) D = analyze_log(open(trlog), training_patterns) D.update(analyze_log(open(tglog), tagging_patterns)) --- bench/bench_mallet.py.orig 2011-08-11 12:02:41.000000000 +0900 +++ bench/bench_mallet.py 2013-10-27 00:42:42.000000000 +0900 @@ -5,8 +5,9 @@ import string from bench import * -MALLET='java -cp "/home/okazaki/install/mallet-2.0.6/class:/home/okazaki/install/mallet-2.0.6/lib/mallet-deps.jar" cc.mallet.fst.SimpleTagger' +MALLET='@PREFIX@/bin/mallet' OUTDIR='mallet/' +LOGDIR='logs/' training_patterns = ( ('num_features', r'^Number of weights = (\d+)', 1, int, last), @@ -29,6 +30,23 @@ if __name__ == '__main__': fe = sys.stderr + if not os.path.exists(OUTDIR): + os.makedirs(OUTDIR) + if not os.path.exists(LOGDIR): + os.makedirs(LOGDIR) + + for t in ('train', 'test'): + s = string.Template( + '$mallet import-file --input $t.txt --output $t.mallet' + ) + cmd = s.substitute( + mallet=MALLET, + t=t + ) + + fe.write(cmd) + fe.write('\n') + os.system(cmd) R = {} for name, param in params.iteritems(): @@ -38,7 +56,7 @@ tglog = OUTDIR + name + '.tg.log' s = string.Template( - 'time $mallet --train true $param --model-file $model train.mallet > $trlog 2>&1' + 'time $mallet tag --train true $param --model-file $model train.mallet > $trlog 2>&1' ) cmd = s.substitute( mallet=MALLET, @@ -49,14 +67,14 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) fo = open(trtxt, 'w') fo.write('$ %s\n' % cmd) fo.write(open(trlog, 'r').read()) s = string.Template( - '$mallet --model-file $model --test lab test.mallet > $tglog 2>&1' + '$mallet tag --model-file $model --test lab test.mallet > $tglog 2>&1' ) cmd = s.substitute( mallet=MALLET, @@ -66,7 +84,7 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) D = analyze_log(open(trlog), training_patterns) D['update'] = 0. --- bench/bench_wapiti.py.orig 2011-08-11 12:02:41.000000000 +0900 +++ bench/bench_wapiti.py 2013-10-27 00:10:09.000000000 +0900 @@ -5,8 +5,9 @@ import string from bench import * -WAPITI='/home/okazaki/install/wapiti-1.1.3/wapiti' +WAPITI='@PREFIX@/bin/wapiti' OUTDIR='wapiti/' +LOGDIR='logs/' training_patterns = ( ('num_features', r'nb features: (\d+)', 1, int, last), @@ -30,6 +31,10 @@ if __name__ == '__main__': fe = sys.stderr + if not os.path.exists(OUTDIR): + os.makedirs(OUTDIR) + if not os.path.exists(LOGDIR): + os.makedirs(LOGDIR) R = {} for name, param in params.iteritems(): @@ -39,7 +44,7 @@ tglog = OUTDIR + name + '.tg.log' s = string.Template( - 'time $wapiti train $param -p template.wapiti train.txt $model > $trlog 2>&1' + 'time $wapiti train $param -p @TEMPLATE_WAPITI@ train.txt $model > $trlog 2>&1' ) cmd = s.substitute( wapiti=WAPITI, @@ -50,14 +55,14 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) fo = open(trtxt, 'w') fo.write('$ %s\n' % cmd) fo.write(open(trlog, 'r').read()) s = string.Template( - '$wapiti label -m $model test.txt | ./accuracy.py > $tglog' + '$wapiti label -m $model test.txt | @BENCH@/accuracy.py > $tglog' ) cmd = s.substitute( wapiti=WAPITI, @@ -67,7 +72,7 @@ fe.write(cmd) fe.write('\n') - #os.system(cmd) + os.system(cmd) D = analyze_log(open(trlog), training_patterns) D.update(analyze_log(open(tglog), tagging_patterns)) --- bench/collect.py.orig 2011-08-11 12:02:41.000000000 +0900 +++ bench/collect.py 2013-10-26 22:37:17.000000000 +0900 @@ -4,12 +4,12 @@ import os scripts = ( - ('CRFsuite 0.12', './bench_crfsuite.py'), - ('CRFsuite 0.11', './bench_crfsuite-0.11.py'), - ('Wapiti v1.1.3', './bench_wapiti.py'), - ('sgd 1.3', './bench_crfsgd.py'), - ('CRF++ 0.54', './bench_crfpp.py'), - ('MALLET 2.0.6', './bench_mallet.py'), + ('CRFsuite 0.12', '@BENCH@/bench_crfsuite.py'), +# ('CRFsuite 0.11', './bench_crfsuite-0.11.py'), + ('Wapiti v1.4.0', '@BENCH@/bench_wapiti.py'), + ('sgd 2.1', '@BENCH@/bench_crfsgd.py'), + ('CRF++ 0.58', '@BENCH@/bench_crfpp.py'), + ('MALLET 2.0.7', '@BENCH@/bench_mallet.py'), ) fields = (