Source

Blacklist Classifier / standalone / Makefile

Full commit
#
#    Classifier for language discrimination based on blacklists v0.1
#    Copyright 2012 Joerg Tiedemann
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Lesser General Public License as published
#    by the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#----------------------------------------------------------------------------
# This Makefile is not for installing the software but for testing it only!
#----------------------------------------------------------------------------
#  make train .............. train blacklists from given training data
#  make test ............... test the classifier with the given test data
#  make learning_curve ..... train and test with incremental training data
#----------------------------------------------------------------------------



TRAINDATA = 	data/train/bhs_sr.txt.gz \
		data/train/bhs_hr.txt.gz \
		data/train/bhs_bs.txt.gz

TESTDATA = 	data/eval/politika.rs.200.check \
		data/eval/vecernji.hr.200.check \
		data/eval/dnevniavaz.ba.200.check

LANGS = sr hr bs
CLASSIFIER = ./blacklist_classifier.pl


test:
	${CLASSIFIER} -i sr hr bs \
		< data/eval/politika.rs.200.check \
		> politika.rs.200.check.guess
	sort politika.rs.200.check.guess | uniq -c
	${CLASSIFIER} -i sr hr bs \
		< data/eval/vecernji.hr.200.check \
		> vecernji.hr.200.check.guess
	sort vecernji.hr.200.check.guess | uniq -c
	${CLASSIFIER} -i sr hr bs \
		< data/eval/dnevniavaz.ba.200.check \
		> dnevniavaz.ba.200.check.guess
	sort dnevniavaz.ba.200.check.guess | uniq -c

train:
	${CLASSIFIER} -t "${TRAINDATA}" sr hr bs

learning_curve: experiments/run.out

experiments/run.out: ${TRAINDATA} ${TESTDATA}
	mkdir -p $(shell dirname $@)
	${CLASSIFIER} -t "${TRAINDATA}" -e "${TESTDATA}" \
			-F 1000 -T 3000000 -L 2 sr hr bs \
			> $@ 2>$@.err
	grep acc $@ > $@.accuracy
	grep total $@ > $@.size
	grep 'training took:' $@.err > $@.traintime
	grep 'classification took:' $@.err > $@.testtime