Source

Blacklist Classifier / test / Makefile

#
#    Classifier for language discrimination based on blacklists v0.1
#    Copyright 2012 Joerg Tiedemann
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Lesser General Public License as published
#    by the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#----------------------------------------------------------------------------
# This Makefile is not for installing the software but for testing it only!
#----------------------------------------------------------------------------
#  make train .............. train blacklists from given training data
#  make test ............... test the classifier with the given test data
#  make learning_curve ..... train and test with incremental training data
#----------------------------------------------------------------------------



TRAINDATA = 	../data/train/bhs_sr.txt.gz \
		../data/train/bhs_hr.txt.gz \
		../data/train/bhs_bs.txt.gz

TESTDATA = 	../data/eval/politika.rs.200.check \
		../data/eval/vecernji.hr.200.check \
		../data/eval/dnevniavaz.ba.200.check

LANGS = sr hr bs
CLASSIFIER = Lingua-Identify-Blacklists/bin/blacklist_classifier


test:
	${CLASSIFIER} -i sr hr bs \
		< ../data/eval/politika.rs.200.check \
		> politika.rs.200.check.guess
	sort politika.rs.200.check.guess | uniq -c
	${CLASSIFIER} -i sr hr bs \
		< ../data/eval/vecernji.hr.200.check \
		> vecernji.hr.200.check.guess
	sort vecernji.hr.200.check.guess | uniq -c
	${CLASSIFIER} -i sr hr bs \
		< ../data/eval/dnevniavaz.ba.200.check \
		> dnevniavaz.ba.200.check.guess
	sort dnevniavaz.ba.200.check.guess | uniq -c

train:
	${CLASSIFIER} -t "${TRAINDATA}" sr hr bs

learning_curve: experiments/run.out

experiments/run.out: ${TRAINDATA} ${TESTDATA}
	mkdir -p $(shell dirname $@)
	${CLASSIFIER} -t "${TRAINDATA}" -e "${TESTDATA}" \
			-F 1000 -T 3000000 -L 2 sr hr bs \
			> $@ 2>$@.err
	grep acc $@ > $@.accuracy
	grep total $@ > $@.size
	grep 'training took:' $@.err > $@.traintime
	grep 'classification took:' $@.err > $@.testtime
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.