Commits

Alexandre Patry  committed 3190dea

Unit tests have moved from src/ to tests/ directory.

  • Participants
  • Parent commits 4a4f72c

Comments (0)

Files changed (178)

     Properties
     -->
   <property name="src" location="src"/>
+  <property name="testsrc" location="tests"/>
   <property name="class" location="class"/>
   <property name="lib" location="lib"/>
   <property name="dist" location="dist"/>
       <include name="cc/**/*.java"/>
 	  <!-- compilerarg value="-Xlint:unchecked"/ --> 
     </javac>
+    </target>
+
+  <!--
+    Target: compile-test
+    -->
+  <target
+    name="compile-test"
+    depends="compile"
+    description="Compile all the test files">
+    <javac
+      source="${java_version}"
+      destdir="${class}"
+      classpathref="project.classpath"
+      debug="true"
+      deprecation="off"
+      listfiles="no"
+      >
+      <src path="${testsrc}"/>
+      <include name="cc/**/*.java"/>
+	  <!-- compilerarg value="-Xlint:unchecked"/ --> 
+    </javac>
   </target>
  
   <!--
       <jar destfile="${dist}/mallet.jar" basedir="${class}"/>
   </target>
 
-  <target name="test" depends="compile">
+  <target name="test" depends="compile-test">
 
     <junit printsummary="yes" >
       <classpath>
       <!-- test name="cc.mallet.types.tests.TestInstanceListWeights" todir="${test}" /-->
 
       <batchtest fork="yes" todir="${test}">
-	<fileset dir="${src}">
+	<fileset dir="${testsrc}">
 	  <include name="**/*Test*.java"/>
 	</fileset>
       </batchtest>

File src/cc/mallet/classify/MCMaxEntTrainer.java

 import cc.mallet.optimize.LimitedMemoryBFGS;
 import cc.mallet.optimize.Optimizable;
 import cc.mallet.optimize.Optimizer;
-import cc.mallet.optimize.tests.*;
 import cc.mallet.pipe.Pipe;
 import cc.mallet.types.Alphabet;
 import cc.mallet.types.ExpGain;

File src/cc/mallet/classify/MaxEntTrainer.java

 import cc.mallet.optimize.OptimizationException;
 import cc.mallet.optimize.Optimizer;
 import cc.mallet.optimize.OrthantWiseLimitedMemoryBFGS;
-import cc.mallet.optimize.tests.*;
 import cc.mallet.pipe.Pipe;
 import cc.mallet.types.Alphabet;
 import cc.mallet.types.ExpGain;

File src/cc/mallet/classify/tests/NaiveBayesData/learn/a/m1

-Hello everybody

File src/cc/mallet/classify/tests/NaiveBayesData/learn/a/ma

-The puppy goes woof

File src/cc/mallet/classify/tests/NaiveBayesData/learn/b/m2

-Goodbye now

File src/cc/mallet/classify/tests/NaiveBayesData/update/b/m3

-Hello everybody

File src/cc/mallet/classify/tests/NaiveBayesData/update/b/m4

-Hello everybody

File src/cc/mallet/classify/tests/NaiveBayesData/update/b/m5

-Hello everybody

File src/cc/mallet/classify/tests/TestClassifiers.java

-/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-
-
-
-
-/** 
-   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
- */
-
-package cc.mallet.classify.tests;
-
-//import edu.umass.cs.mallet.base.pipe.SerialPipe;
-import junit.framework.*;
-import java.net.URI;
-import java.util.Iterator;
-
-import cc.mallet.classify.*;
-import cc.mallet.pipe.*;
-import cc.mallet.pipe.iterator.ArrayIterator;
-import cc.mallet.pipe.iterator.PipeInputIterator;
-import cc.mallet.pipe.iterator.RandomTokenSequenceIterator;
-import cc.mallet.types.*;
-import cc.mallet.util.*;
-
-public class TestClassifiers extends TestCase
-{
-	public TestClassifiers (String name)
-	{
-		super (name);
-	}
-
-	private static Alphabet dictOfSize (int size)
-	{
-		Alphabet ret = new Alphabet ();
-		for (int i = 0; i < size; i++)
-			ret.lookupIndex ("feature"+i);
-		return ret;
-	}
-	
-	public void testRandomTrained ()
-	{
-		ClassifierTrainer[] trainers = new ClassifierTrainer[1];
-		//trainers[0] = new NaiveBayesTrainer();
-		trainers[0] = new MaxEntTrainer();
-		//trainers[2] = new DecisionTreeTrainer();
-
-		Alphabet fd = dictOfSize (3);
-		String[] classNames = new String[] {"class0", "class1", "class2"};
-
-		InstanceList ilist = new InstanceList (new Randoms(1), fd, classNames, 200);
-
-		InstanceList lists[] = ilist.split (new java.util.Random(2), new double[] {.5, .5});
-		//System.out.println ("Training set size = "+lists[0].size());
-		//System.out.println ("Testing set size = "+lists[1].size());
-		
-		Classifier[] classifiers = new Classifier[trainers.length];
-		for (int i = 0; i < trainers.length; i++)
-			classifiers[i] = trainers[i].train (lists[0]);
-
-		System.out.println ("Accuracy on training set:");
-		for (int i = 0; i < trainers.length; i++)
-			System.out.println (classifiers[i].getClass().getName()
-													+ ": " + new Trial (classifiers[i], lists[0]).getAccuracy());
-
-		System.out.println ("Accuracy on testing set:");
-		for (int i = 0; i < trainers.length; i++)
-			System.out.println (classifiers[i].getClass().getName()
-													+ ": " + new Trial (classifiers[i], lists[1]).getAccuracy());
-	}
-	
-	public void testNewFeatures ()
-	  {
-	    ClassifierTrainer[] trainers = new ClassifierTrainer[1];
-	    trainers[0] = new MaxEntTrainer();
-
-	    Alphabet fd = dictOfSize (3);
-	    String[] classNames = new String[] {"class0", "class1", "class2"};
-
-	    Randoms r = new Randoms(1);
-	    InstanceList training = new InstanceList (r, fd, classNames, 50);
-	    expandDict (fd, 25);
-
-	    Classifier[] classifiers = new Classifier[trainers.length];
-	    for (int i = 0; i < trainers.length; i++)
-	      classifiers[i] = trainers[i].train (training);
-
-	    System.out.println ("Accuracy on training set:");
-	    for (int i = 0; i < trainers.length; i++)
-	      System.out.println (classifiers[i].getClass().getName()
-	                          + ": " + new Trial (classifiers[i], training).getAccuracy());
-
-	    InstanceList testing = new InstanceList (training.getPipe ());
-	    Iterator<Instance> iter = new RandomTokenSequenceIterator (
-	      r,  new Dirichlet (fd, 2.0),
-	      30, 0,
-	      10, 50,
-	      classNames);
-	    testing.addThruPipe (iter);
-
-	    for (int i = 0; i < testing.size (); i++) {
-	      Instance inst = testing.get (i);
-	      System.out.println ("DATA:"+inst.getData());
-	    }
-
-	    System.out.println ("Accuracy on testing set:");
-	    for (int i = 0; i < trainers.length; i++)
-	      System.out.println (classifiers[i].getClass().getName()
-	                          + ": " + new Trial (classifiers[i], testing).getAccuracy());
-	  }
-
- 
-  private void expandDict (Alphabet fd, int size)
-  {
-    fd.startGrowth ();
-    for (int i = 0; i < size; i++)
-      fd.lookupIndex ("feature"+i, true);
-  }
-
-  public static Test suite ()
-	{
-		return new TestSuite (TestClassifiers.class);
-	}
-
-	protected void setUp ()
-	{
-	}
-
-	public static void main (String[] args)
-	{
-		junit.textui.TestRunner.run (suite());
-	}
-	
-}

File src/cc/mallet/classify/tests/TestMaxEntTrainer.java

-/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-
-
-
-
-/** 
-   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
- */
-
-package cc.mallet.classify.tests;
-
-import junit.framework.*;
-import java.net.URI;
-
-import cc.mallet.classify.*;
-import cc.mallet.optimize.Optimizable;
-import cc.mallet.optimize.tests.TestOptimizable;
-import cc.mallet.pipe.*;
-import cc.mallet.pipe.iterator.ArrayIterator;
-import cc.mallet.types.*;
-import cc.mallet.util.*;
-
-public class TestMaxEntTrainer extends TestCase
-{
-	public TestMaxEntTrainer (String name)
-	{
-		super (name);
-	}
-
-	private static Alphabet dictOfSize (int size)
-	{
-		Alphabet ret = new Alphabet ();
-		for (int i = 0; i < size; i++)
-			ret.lookupIndex ("feature"+i);
-		return ret;
-	}
-
-	public void testSetGetParameters ()
-	{
- 		MaxEntTrainer trainer = new MaxEntTrainer();
-		Alphabet fd = dictOfSize (6);
-		String[] classNames = new String[] {"class0", "class1", "class2"};
-		InstanceList ilist = new InstanceList (new Randoms(1), fd, classNames, 20);
-		Optimizable.ByGradientValue maxable = trainer.getOptimizable (ilist);
-		TestOptimizable.testGetSetParameters (maxable);
-	}
-
-	public void testRandomMaximizable ()
-	{
-		MaxEntTrainer trainer = new MaxEntTrainer();
-		Alphabet fd = dictOfSize (6);
-		String[] classNames = new String[] {"class0", "class1"};
-		InstanceList ilist = new InstanceList (new Randoms(1), fd, classNames, 20);
-		Optimizable.ByGradientValue maxable = trainer.getOptimizable (ilist);
-		TestOptimizable.testValueAndGradient (maxable);
-	}
-	
-	// TODO This doesn't pass, but it didn't in the old MALLET either.  Why?? -akm 1/08
-	public void testTrainedMaximizable ()
-	{
-		MaxEntTrainer trainer = new MaxEntTrainer();
-		Alphabet fd = dictOfSize (6);
-		String[] classNames = new String[] {"class0", "class1"};
-		InstanceList ilist = new InstanceList (new Randoms(1), fd, classNames, 20);
-		MaxEnt me = (MaxEnt)trainer.train(ilist);
-		Optimizable.ByGradientValue maxable = trainer.getOptimizable (ilist, me);
-		TestOptimizable.testValueAndGradientCurrentParameters (maxable);
-	}
-
-	public static Test suite ()
-	{
-		return new TestSuite (TestMaxEntTrainer.class);
-	}
-
-	protected void setUp ()
-	{
-	}
-
-	public static void main (String[] args)
-	{
-		junit.textui.TestRunner.run (suite());
-	}
-	
-}
-		

File src/cc/mallet/classify/tests/TestNaiveBayes.java

-/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-
-
-
-
-/** 
-   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
- */
-
-package cc.mallet.classify.tests;
-
-import junit.framework.*;
-import java.net.URI;
-import java.io.File;
-
-import cc.mallet.classify.*;
-import cc.mallet.pipe.*;
-import cc.mallet.pipe.iterator.ArrayIterator;
-import cc.mallet.pipe.iterator.FileIterator;
-import cc.mallet.types.*;
-import cc.mallet.util.*;
-
-public class TestNaiveBayes extends TestCase
-{
-	public TestNaiveBayes (String name)
-	{
-		super (name);
-	}
-
-	public void testNonTrained ()
-	{
-		Alphabet fdict = new Alphabet ();
-		System.out.println ("fdict.size="+fdict.size());
-		LabelAlphabet ldict = new LabelAlphabet ();
-		Multinomial.Estimator me1 = new Multinomial.LaplaceEstimator (fdict);
-		Multinomial.Estimator me2 = new Multinomial.LaplaceEstimator (fdict);
-
-		// Prior
-		ldict.lookupIndex ("sports");
-		ldict.lookupIndex ("politics");
-		ldict.stopGrowth ();
-		System.out.println ("ldict.size="+ldict.size());
-		Multinomial prior = new Multinomial (new double[] {.5, .5}, ldict);
-
-		// Sports
-		me1.increment ("win", 5);
-		me1.increment ("puck", 5);
-		me1.increment ("team", 5);
-		System.out.println ("fdict.size="+fdict.size());
-
-		// Politics
-		me2.increment ("win", 5);
-		me2.increment ("speech", 5);
-		me2.increment ("vote", 5);
-
-		Multinomial sports = me1.estimate();
-		Multinomial politics = me2.estimate();
-
-		// We must estimate from me1 and me2 after all data is incremented,
-		// so that the "sports" multinomial knows the full dictionary size!
-
-		Classifier c = new NaiveBayes (new Noop (fdict, ldict),
-				prior,
-				new Multinomial[] {sports, politics});
-
-		Instance inst = c.getInstancePipe().instanceFrom(
-				new Instance (new FeatureVector (fdict,
-						new Object[] {"speech", "win"},
-						new double[] {1, 1}),
-						ldict.lookupLabel ("politics"),
-						null, null));
-		System.out.println ("inst.data = "+inst.getData ());
-
-		Classification cf = c.classify (inst);
-		LabelVector l = (LabelVector) cf.getLabeling();
-		//System.out.println ("l.size="+l.size());
-		System.out.println ("l.getBestIndex="+l.getBestIndex());
-		assertTrue (cf.getLabeling().getBestLabel()
-				== ldict.lookupLabel("politics"));
-		assertTrue (cf.getLabeling().getBestValue()	> 0.6);
-	}
-
-	public void testStringTrained ()
-	{
-		String[] africaTraining = new String[] {
-				"on the plains of africa the lions roar",
-				"in swahili ngoma means to dance",
-				"nelson mandela became president of south africa",
-		"the saraha dessert is expanding"};
-		String[] asiaTraining = new String[] {
-				"panda bears eat bamboo",
-				"china's one child policy has resulted in a surplus of boys",
-		"tigers live in the jungle"};
-
-		InstanceList instances =
-			new InstanceList (
-					new SerialPipes (new Pipe[] {
-							new Target2Label (),
-							new CharSequence2TokenSequence (),
-							new TokenSequence2FeatureSequence (),
-							new FeatureSequence2FeatureVector ()}));
-
-		instances.addThruPipe (new ArrayIterator (africaTraining, "africa"));
-		instances.addThruPipe (new ArrayIterator (asiaTraining, "asia"));
-		Classifier c = new NaiveBayesTrainer ().train (instances);
-
-		Classification cf = c.classify ("nelson mandela never eats lions");
-		assertTrue (cf.getLabeling().getBestLabel()
-				== ((LabelAlphabet)instances.getTargetAlphabet()).lookupLabel("africa"));
-	}
-
-	public void testRandomTrained ()
-	{
-		InstanceList ilist = new InstanceList (new Randoms(1), 10, 2);
-		Classifier c = new NaiveBayesTrainer ().train (ilist);
-		// test on the training data
-		int numCorrect = 0;
-		for (int i = 0; i < ilist.size(); i++) {
-			Instance inst = ilist.get(i);
-			Classification cf = c.classify (inst);
-			cf.print ();
-			if (cf.getLabeling().getBestLabel() == inst.getLabeling().getBestLabel())
-				numCorrect++;
-		}
-		System.out.println ("Accuracy on training set = " + ((double)numCorrect)/ilist.size());
-	}
-
-	public void testIncrementallyTrainedGrowingAlphabets()
-	{
-		System.out.println("testIncrementallyTrainedGrowingAlphabets");
-		String[]    args = new String[] {
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/a",
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
-		};
-
-		File[] directories = new File[args.length];
-		for (int i = 0; i < args.length; i++)
-			directories[i] = new File (args[i]);
-
-		SerialPipes instPipe =
-			// MALLET pipeline for converting instances to feature vectors
-			new SerialPipes(new Pipe[] {
-					new Target2Label(),
-					new Input2CharSequence(),
-					//SKIP_HEADER only works for Unix
-					//new CharSubsequence(CharSubsequence.SKIP_HEADER),
-					new CharSequence2TokenSequence(),
-					new TokenSequenceLowercase(),
-					new TokenSequenceRemoveStopwords(),
-					new TokenSequence2FeatureSequence(),
-					new FeatureSequence2FeatureVector() });
-
-		InstanceList instList = new InstanceList(instPipe);
-		instList.addThruPipe(new
-				FileIterator(directories, FileIterator.STARTING_DIRECTORIES));
-
-		System.out.println("Training 1");
-		NaiveBayesTrainer trainer = new NaiveBayesTrainer();
-		NaiveBayes classifier = trainer.trainIncremental(instList);
-
-		//instList.getDataAlphabet().stopGrowth();
-
-		// incrementally train...
-		String[] t2directories = {
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
-		};
-
-		System.out.println("data alphabet size " + instList.getDataAlphabet().size());
-		System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
-		InstanceList instList2 = new InstanceList(instPipe);
-		instList2.addThruPipe(new
-				FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES));
-
-		System.out.println("Training 2");
-
-		System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
-		System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());
-
-		NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
-	}
-
-	public void testIncrementallyTrained()
-	{
-		System.out.println("testIncrementallyTrained");
-		String[]    args = new String[] {
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/a",
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
-		};
-
-		File[] directories = new File[args.length];
-		for (int i = 0; i < args.length; i++)
-			directories[i] = new File (args[i]);
-
-		SerialPipes instPipe =
-			// MALLET pipeline for converting instances to feature vectors
-			new SerialPipes(new Pipe[] {
-					new Target2Label(),
-					new Input2CharSequence(),
-					//SKIP_HEADER only works for Unix
-					//new CharSubsequence(CharSubsequence.SKIP_HEADER),
-					new CharSequence2TokenSequence(),
-					new TokenSequenceLowercase(),
-					new TokenSequenceRemoveStopwords(),
-					new TokenSequence2FeatureSequence(),
-					new FeatureSequence2FeatureVector() });
-
-		InstanceList instList = new InstanceList(instPipe);
-		instList.addThruPipe(new
-				FileIterator(directories, FileIterator.STARTING_DIRECTORIES));
-
-		System.out.println("Training 1");
-		NaiveBayesTrainer trainer = new NaiveBayesTrainer();
-		NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList);
-
-		Classification initialClassification = classifier.classify("Hello Everybody");
-		Classification initial2Classification = classifier.classify("Goodbye now");
-		System.out.println("Initial Classification = ");
-		initialClassification.print();
-		initial2Classification.print();
-		System.out.println("data alphabet " + classifier.getAlphabet());
-		System.out.println("label alphabet " + classifier.getLabelAlphabet());
-
-
-		// incrementally train...
-		String[] t2directories = {
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
-		};
-
-		System.out.println("data alphabet size " + instList.getDataAlphabet().size());
-		System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
-		InstanceList instList2 = new InstanceList(instPipe);
-		instList2.addThruPipe(new
-				FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES));
-
-		System.out.println("Training 2");
-
-		System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
-		System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());
-
-		NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
-
-
-	}
-
-	public void testEmptyStringBug()
-	{
-		System.out.println("testEmptyStringBug");
-		String[]    args = new String[] {
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/a",
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
-		};
-
-		File[] directories = new File[args.length];
-		for (int i = 0; i < args.length; i++)
-			directories[i] = new File (args[i]);
-
-		SerialPipes instPipe =
-			// MALLET pipeline for converting instances to feature vectors
-			new SerialPipes(new Pipe[] {
-					new Target2Label(),
-					new Input2CharSequence(),
-					//SKIP_HEADER only works for Unix
-					//new CharSubsequence(CharSubsequence.SKIP_HEADER),
-					new CharSequence2TokenSequence(),
-					new TokenSequenceLowercase(),
-					new TokenSequenceRemoveStopwords(),
-					new TokenSequence2FeatureSequence(),
-					new FeatureSequence2FeatureVector() });
-
-		InstanceList instList = new InstanceList(instPipe);
-		instList.addThruPipe(new
-				FileIterator(directories, FileIterator.STARTING_DIRECTORIES));
-
-		System.out.println("Training 1");
-		NaiveBayesTrainer trainer = new NaiveBayesTrainer();
-		NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList);
-
-		Classification initialClassification = classifier.classify("Hello Everybody");
-		Classification initial2Classification = classifier.classify("Goodbye now");
-		System.out.println("Initial Classification = ");
-		initialClassification.print();
-		initial2Classification.print();
-		System.out.println("data alphabet " + classifier.getAlphabet());
-		System.out.println("label alphabet " + classifier.getLabelAlphabet());
-
-
-		// test
-		String[] t2directories = {
-				"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
-		};
-
-		System.out.println("data alphabet size " + instList.getDataAlphabet().size());
-		System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
-		InstanceList instList2 = new InstanceList(instPipe);
-		instList2.addThruPipe(new
-				FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES, true));
-
-		System.out.println("Training 2");
-
-		System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
-		System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());
-
-		NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
-		Classification secondClassification = classifier.classify("Goodbye now");
-		secondClassification.print();
-
-	}
-
-
-
-
-	static Test suite ()
-	{
-		return new TestSuite (TestNaiveBayes.class);
-		//TestSuite suite= new TestSuite();
-		//   //suite.addTest(new TestNaiveBayes("testIncrementallyTrained"));
-		// suite.addTest(new TestNaiveBayes("testEmptyStringBug"));
-
-		// return suite;
-	}
-
-	protected void setUp ()
-	{
-	}
-
-	public static void main (String[] args)
-	{
-		junit.textui.TestRunner.run (suite());
-	}
-
-}

File src/cc/mallet/classify/tests/TestStaticParameters.java

-package cc.mallet.classify.tests;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-public class TestStaticParameters extends TestCase 
-{
-	int gamma = 1;
-	public TestStaticParameters () { }
-	
-	public static class Factory {
-		protected static int gamma = 2;
-		public TestStaticParameters newTSP () {
-			System.out.println ("Factory gamma="+this.gamma);
-			TestStaticParameters t = new TestStaticParameters();
-			t.gamma = this.gamma;
-			return t;
-		}
-	}
-	
-	public void testParameterSetting () {
-		Factory f = new Factory () {{gamma=3;}}; 
-		TestStaticParameters g = f.newTSP();
-		System.out.println ("g.gamma="+g.gamma);
-		assertTrue("gamma="+g.gamma, g.gamma == 3);
-	}
-
-  public static Test suite ()
-	{
-		return new TestSuite (TestClassifiers.class);
-	}
-
-	protected void setUp ()
-	{
-	}
-
-	public static void main (String[] args)
-	{
-		junit.textui.TestRunner.run (suite());
-	}
-
-}

File src/cc/mallet/classify/tests/package.html

-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-  <body>
-
-JUnit tests for classifiers
-
-    <hr>
-    <address><a href="mailto:mccallum@cs.umass.edu"></a></address>
-  </body>
-</html>

File src/cc/mallet/cluster/evaluate/tests/TestClusteringEvaluators.java

-/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-
-package cc.mallet.cluster.evaluate.tests;
-
-import cc.mallet.cluster.Clustering;
-import cc.mallet.cluster.evaluate.*;
-import cc.mallet.types.InstanceList;
-import cc.mallet.util.Randoms;
-
-import junit.framework.*;
-
-/**
- * Examples drawn from Luo, "On Coreference Resolution Performance
- * Metrics", HLT 2005.
- *
- * @author "Aron Culotta" <culotta@degas.cs.umass.edu>
- * @version 1.0
- * @since 1.0
- * @see TestCase
- */
-public class TestClusteringEvaluators extends TestCase
-{
-	public TestClusteringEvaluators (String name)
-	{
-		super (name);
-	}
-
-	private Clustering generateTruth (InstanceList instances) {
-		int[] labels = new int[]{0,0,0,0,0,1,1,2,2,2,2,2};
-		return new Clustering(instances, 3, labels);
-	}
-	
-	private Clustering[] generatePredicted (InstanceList instances) {
-		Clustering[] clusterings = new Clustering[4];
-		clusterings[0] = new Clustering(instances, 2, new int[]{0,0,0,0,0,1,1,1,1,1,1,1});
-		clusterings[1] = new Clustering(instances, 2, new int[]{0,0,0,0,0,1,1,0,0,0,0,0});
-		clusterings[2] = new Clustering(instances, 1, new int[]{0,0,0,0,0,0,0,0,0,0,0,0});
-		clusterings[3] = new Clustering(instances, 12, new int[]{0,1,2,3,4,5,6,7,8,9,10,11});
-		return clusterings;
-	}
-	
-	public void testEvaluators ()
-	{
-		InstanceList instances = new InstanceList(new Randoms(1), 100, 2).subList(0,12);
-		System.err.println(instances.size() + " instances");
-		Clustering truth = generateTruth(instances);
-		System.err.println("truth=" + truth);
-
-		Clustering[] predicted = generatePredicted(instances);
-		ClusteringEvaluator pweval = new PairF1Evaluator();
-		ClusteringEvaluator bceval = new BCubedEvaluator();
-		ClusteringEvaluator muceval = new MUCEvaluator();
-
-		for (int i = 0; i < predicted.length; i++) {
-			System.err.println("\npred" + i + "=" + predicted[i]);
-			System.err.println("pairs: " + pweval.evaluate(truth, predicted[i]));
-			System.err.println("bcube: " + bceval.evaluate(truth, predicted[i]));
-			System.err.println("  muc: " + muceval.evaluate(truth, predicted[i]));
-		}
-
-		System.err.println("totals:");
-		System.err.println("pairs: " + pweval.evaluateTotals());
-		System.err.println("bcube: " + bceval.evaluateTotals());
-		System.err.println("  muc: " + muceval.evaluateTotals());
-
-		assertTrue(pweval.evaluateTotals().matches(".*f1=0\\.5550.*"));
-		assertTrue(bceval.evaluateTotals().matches(".*f1=0\\.7404.*"));
-		assertTrue(muceval.evaluateTotals().matches(".*f1=0\\.8059.*"));
-	}
-
-	public static Test suite ()
-	{
-		return new TestSuite (TestClusteringEvaluators.class);
-	}
-
-	protected void setUp ()
-	{
-	}
-
-	public static void main (String[] args)
-	{
-		junit.textui.TestRunner.run (suite());
-	}
-	
-}
-		

File src/cc/mallet/cluster/iterator/tests/TestIterators.java

-/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-
-package cc.mallet.cluster.iterator.tests;
-
-import cc.mallet.cluster.Clustering;
-import cc.mallet.cluster.iterator.*;
-import cc.mallet.types.Instance;
-import cc.mallet.types.InstanceList;
-import cc.mallet.util.Randoms;
-
-import junit.framework.*;
-
-/**
- *
- * @author "Aron Culotta" <culotta@degas.cs.umass.edu>
- * @version 1.0
- * @since 1.0
- * @see TestCase
- */
-public class TestIterators extends TestCase
-{
-	public TestIterators (String name)
-	{
-		super (name);
-	}
-
-	private Clustering generateClustering (InstanceList instances) {
-		int[] labels = new int[]{0,0,0,1,1,1,2,2,2,2};
-		return new Clustering(instances, 3, labels);
-	}
-		
-	public void testEvaluators ()
-	{
-		Randoms random = new Randoms(1);
-		InstanceList instances = new InstanceList(random, 100, 2).subList(0,10);
-		System.err.println(instances.size() + " instances");
-		Clustering clustering = generateClustering(instances);
-		System.err.println("clustering=" + clustering);
-
-		System.err.println("ClusterSampleIterator");
-		NeighborIterator iter = new ClusterSampleIterator(clustering,
-																											random,
-																											0.5,
-																											10);
-		while (iter.hasNext()) {
-			Instance instance = (Instance)iter.next();
-			System.err.println(instance.getData() + "\n");
-		}
-		
-		System.err.println("\n\nPairSampleIterator");
-		iter = new PairSampleIterator(clustering,
-																	random,
-																	0.5,
-																	10);
-		while (iter.hasNext()) {
-			Instance instance = (Instance)iter.next();
-			System.err.println(instance.getData() + "\n");
-		}
-
-		System.err.println("\n\nAllPairsIterator");
-		iter = new AllPairsIterator(clustering);																
-		while (iter.hasNext()) {
-			Instance instance = (Instance)iter.next();
-			System.err.println(instance.getData() + "\n");
-		}
-}
-
-	public static Test suite ()
-	{
-		return new TestSuite (TestIterators.class);
-	}
-
-	protected void setUp ()
-	{
-	}
-
-	public static void main (String[] args)
-	{
-		junit.textui.TestRunner.run (suite());
-	}
-	
-}
-		

File src/cc/mallet/extract/test/TestDocumentExtraction.java

-/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-package cc.mallet.extract.test;
-
-import junit.framework.*;
-
-import java.util.regex.Pattern;
-
-import cc.mallet.extract.*;
-import cc.mallet.types.Label;
-import cc.mallet.types.LabelAlphabet;
-import cc.mallet.types.LabelSequence;
-import cc.mallet.util.CharSequenceLexer;
-
-/**
- * Created: Oct 12, 2004
- *
- * @author <A HREF="mailto:casutton@cs.umass.edu>casutton@cs.umass.edu</A>
- * @version $Id: TestDocumentExtraction.java,v 1.1 2007/10/22 21:38:02 mccallum Exp $
- */
-public class TestDocumentExtraction extends TestCase {
-
-  public TestDocumentExtraction (String name)
-  {
-    super (name);
-  }
-
-
-  public static Test suite ()
-  {
-    return new TestSuite (TestDocumentExtraction.class);
-  }
-
-
-  public void testToXml () {
-    LabelAlphabet dict = new LabelAlphabet ();
-    String document = "the quick brown fox leapt over the lazy dog";
-    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
-
-    Label O = dict.lookupLabel ("O");
-    Label ANML = dict.lookupLabel ("ANIMAL");
-    Label VB = dict.lookupLabel ("VERB");
-    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });
-
-    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
-    String actualXml = extr.toXmlString();
-    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
-            "<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
-    assertEquals (expectedXml, actualXml);
-  }
-
-   public void testToXmlBIO () {
-    LabelAlphabet dict = new LabelAlphabet ();
-    String document = "the quick brown fox leapt over the lazy dog";
-    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
-
-    Label O = dict.lookupLabel ("O");
-    Label BANML = dict.lookupLabel ("B-ANIMAL");
-    Label ANML = dict.lookupLabel ("ANIMAL");
-    Label BVB = dict.lookupLabel ("B-VERB");
-    Label VB = dict.lookupLabel ("I-VERB");
-    LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });
-
-    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
-    String actualXml = extr.toXmlString();
-    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
-            "<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
-    assertEquals (expectedXml, actualXml);
-  }
-
-  public void testNestedToXML ()
-  {
-    LabelAlphabet dict = new LabelAlphabet ();
-    String document = "the quick brown fox leapt over the lazy dog";
-    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
-
-    Label O = dict.lookupLabel ("O");
-    Label ANML = dict.lookupLabel ("ANIMAL");
-    Label VB = dict.lookupLabel ("VERB");
-    Label JJ = dict.lookupLabel ("ADJ");
-    Label MAMMAL = dict.lookupLabel ("MAMMAL");
-
-    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });
-
-    LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);
-
-    Span foxToken = toks.subspan (3, 4);
-    spans.add (new LabeledSpan (foxToken, MAMMAL, false));
-    Span bigDogToken = toks.subspan (7, 8);
-    spans.add (new LabeledSpan (bigDogToken, JJ, false));
-
-    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, spans, null, "O");
-    String actualXml = extr.toXmlString();
-    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
-            "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n";
-    assertEquals (expectedXml, actualXml);
-
-  }
-
-  public void testNestedXMLTokenizationFilter ()
-  {
-    LabelAlphabet dict = new LabelAlphabet ();
-    String document = "the quick brown fox leapt over the lazy dog";
-    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
-
-    Label O = dict.lookupLabel ("O");
-    Label ANML = dict.lookupLabel ("ANIMAL");
-    Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
-    Label VB = dict.lookupLabel ("VERB");
-    Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
-    Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");
-
-    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
-    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());
-
-    String actualXml = extr.toXmlString();
-    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
-            "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";
-    assertEquals (expectedXml, actualXml);
-
-    // Test the ignore function
-
-    extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter (Pattern.compile ("AD.*")));
-
-    actualXml = extr.toXmlString();
-    expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
-            "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n";
-    assertEquals (expectedXml, actualXml);
-
-
-
-  }
-
-  public static void main (String[] args) throws Throwable
-  {
-    TestSuite theSuite;
-    if (args.length > 0) {
-      theSuite = new TestSuite ();
-      for (int i = 0; i < args.length; i++) {
-        theSuite.addTest (new TestDocumentExtraction (args[i]));
-      }
-    } else {
-      theSuite = (TestSuite) suite ();
-    }
-
-    junit.textui.TestRunner.run (theSuite);
-  }
-
-}

File src/cc/mallet/extract/test/TestDocumentViewer.java

-/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-package cc.mallet.extract.test;
-
-import junit.framework.*;
-
-import java.io.IOException;
-import java.io.File;
-
-import cc.mallet.extract.CRFExtractor;
-import cc.mallet.extract.DocumentViewer;
-import cc.mallet.extract.Extraction;
-import cc.mallet.fst.CRF;
-import cc.mallet.fst.CRFTrainerByLabelLikelihood;
-import cc.mallet.fst.tests.TestCRF;
-import cc.mallet.fst.tests.TestMEMM;
-import cc.mallet.pipe.Pipe;
-import cc.mallet.pipe.iterator.ArrayIterator;
-import cc.mallet.types.InstanceList;
-
-
-/**
- * Created: Mar 30, 2005
- *
- * @author <A HREF="mailto:casutton@cs.umass.edu>casutton@cs.umass.edu</A>
- * @version $Id: TestDocumentViewer.java,v 1.1 2007/10/22 21:38:02 mccallum Exp $
- */
-public class TestDocumentViewer extends TestCase {
-
-  public TestDocumentViewer (String name)
-  {
-    super (name);
-  }
-
-  public static Test suite ()
-  {
-    return new TestSuite (TestDocumentViewer.class);
-  }
-
-  private File outputDir = new File ("extract");
-
-   public void testSpaceViewer () throws IOException
-   {
-     Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
-     String[] data0 = { TestCRF.data[0] };
-     String[] data1 = { TestCRF.data[1] };
-
-     InstanceList training = new InstanceList (pipe);
-     training.addThruPipe (new ArrayIterator (data0));
-     InstanceList testing = new InstanceList (pipe);
-     testing.addThruPipe (new ArrayIterator (data1));
-
-     CRF crf = new CRF (pipe, null);
-     crf.addFullyConnectedStatesForLabels ();
-     CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
-     crft.trainIncremental (training);
-
-     CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf);
-     Extraction extraction = extor.extract (new ArrayIterator (data1));
-
-     if (!outputDir.exists ()) outputDir.mkdir ();
-     DocumentViewer.writeExtraction (outputDir, extraction);
-   }
-
-
-  public static void main (String[] args) throws Throwable
-  {
-    TestSuite theSuite;
-    if (args.length > 0) {
-      theSuite = new TestSuite ();
-      for (int i = 0; i < args.length; i++) {
-        theSuite.addTest (new TestDocumentViewer (args[i]));
-      }
-    } else {
-      theSuite = (TestSuite) suite ();
-    }
-
-    junit.textui.TestRunner.run (theSuite);
-  }
-
-}

File src/cc/mallet/extract/test/TestLatticeViewer.java

-/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-package cc.mallet.extract.test;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-import java.io.*;
-
-import cc.mallet.extract.CRFExtractor;
-import cc.mallet.extract.Extraction;
-import cc.mallet.extract.LatticeViewer;
-import cc.mallet.fst.CRF;
-import cc.mallet.fst.CRFTrainerByLabelLikelihood;
-import cc.mallet.fst.MEMM;
-import cc.mallet.fst.MEMMTrainer;
-import cc.mallet.fst.TokenAccuracyEvaluator;
-import cc.mallet.fst.TransducerEvaluator;
-import cc.mallet.fst.tests.TestCRF;
-import cc.mallet.fst.tests.TestMEMM;
-import cc.mallet.pipe.Pipe;
-import cc.mallet.pipe.SerialPipes;
-import cc.mallet.pipe.iterator.ArrayIterator;
-import cc.mallet.types.InstanceList;
-
-/**
- * Created: Oct 31, 2004
- *
- * @author <A HREF="mailto:casutton@cs.umass.edu>casutton@cs.umass.edu</A>
- * @version $Id: TestLatticeViewer.java,v 1.1 2007/10/22 21:38:02 mccallum Exp $
- */
-public class TestLatticeViewer extends TestCase {
-
-  public TestLatticeViewer (String name)
-  {
-    super (name);
-  }
-
-  private static File htmlFile = new File ("errors.html");
-  private static File latticeFile = new File ("lattice.html");
-  private static File htmlDir = new File ("html/");
-
-  public void testSpaceViewer () throws FileNotFoundException
-  {
-    Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
-    String[] data0 = { TestCRF.data[0] };
-    String[] data1 = { TestCRF.data[1] };
-
-    InstanceList training = new InstanceList (pipe);
-    training.addThruPipe (new ArrayIterator (data0));
-    InstanceList testing = new InstanceList (pipe);
-    testing.addThruPipe (new ArrayIterator (data1));
-
-    CRF crf = new CRF (pipe, null);
-    crf.addFullyConnectedStatesForLabels ();
-    CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
-    crft.trainIncremental (training);
-
-    CRFExtractor extor = hackCrfExtor (crf);
-    Extraction extration = extor.extract (new ArrayIterator (data1));
-
-    PrintStream out = new PrintStream (new FileOutputStream (htmlFile));
-    LatticeViewer.extraction2html (extration, extor, out);
-    out.close();
-
-    out = new PrintStream (new FileOutputStream (latticeFile));
-    LatticeViewer.extraction2html (extration, extor, out, true);
-    out.close();
-
-
-  }
-
-
-  static CRFExtractor hackCrfExtor (CRF crf)
-  {
-    Pipe[] newPipes = new Pipe [3];
-
-    SerialPipes pipes = (SerialPipes) crf.getInputPipe ();
-    for (int i = 0; i < 3; i++) {
-      Pipe p0 = pipes.getPipe (0);
-      //pipes.removePipe (0);  TODO Fix me
-      //p0.setParent (null);
-      newPipes[i] = p0;
-    }
-
-    Pipe tokPipe = new SerialPipes (newPipes);
-
-    CRFExtractor extor = new CRFExtractor (crf, (Pipe)tokPipe);
-    return extor;
-  }
-
-
-  public void testDualSpaceViewer () throws IOException
-  {
-    Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
-    String[] data0 = { TestCRF.data[0] };
-    String[] data1 = TestCRF.data;
-
-    InstanceList training = new InstanceList (pipe);
-    training.addThruPipe (new ArrayIterator (data0));
-    InstanceList testing = new InstanceList (pipe);
-    testing.addThruPipe (new ArrayIterator (data1));
-
-    CRF crf = new CRF (pipe, null);
-    crf.addFullyConnectedStatesForLabels ();
-    CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
-    TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator (new InstanceList[] {training, testing}, new String[] {"Training", "Testing"});
-    for (int i = 0; i < 5; i++) {
-    	crft.train (training, 1);
-    	eval.evaluate(crft);
-    }
-
-    CRFExtractor extor = hackCrfExtor (crf);
-    Extraction e1 = extor.extract (new ArrayIterator (data1));
-
-    Pipe pipe2 = TestMEMM.makeSpacePredictionPipe ();
-    InstanceList training2 = new InstanceList (pipe2);
-    training2.addThruPipe (new ArrayIterator (data0));
-    InstanceList testing2 = new InstanceList (pipe2);
-    testing2.addThruPipe (new ArrayIterator (data1));
-
-    MEMM memm = new MEMM (pipe2, null);
-    memm.addFullyConnectedStatesForLabels ();
-    MEMMTrainer memmt = new MEMMTrainer (memm);
-    TransducerEvaluator memmeval = new TokenAccuracyEvaluator (new InstanceList[] {training2, testing2}, new String[] {"Training2", "Testing2"});
-    memmt.train (training2, 5);
-    memmeval.evaluate(memmt);
-
-    CRFExtractor extor2 = hackCrfExtor (memm);
-    Extraction e2 = extor2.extract (new ArrayIterator (data1));
-
-    if (!htmlDir.exists ()) htmlDir.mkdir ();
-    LatticeViewer.viewDualResults (htmlDir, e1, extor, e2, extor2);
-
-  }
-
-  public static Test suite ()
-  {
-    return new TestSuite (TestLatticeViewer.class);
-  }
-
-
-  public static void main (String[] args) throws Throwable
-  {
-    TestSuite theSuite;
-    if (args.length > 0) {
-      theSuite = new TestSuite ();
-      for (int i = 0; i < args.length; i++) {
-        theSuite.addTest (new TestLatticeViewer (args[i]));
-      }
-    } else {
-      theSuite = (TestSuite) suite ();
-    }
-
-    junit.textui.TestRunner.run (theSuite);
-  }
-
-}

File src/cc/mallet/extract/test/TestPerDocumentF1Evaluator.java

-/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-package cc.mallet.extract.test;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-import java.io.PrintWriter;
-import java.io.OutputStreamWriter;
-
-import cc.mallet.extract.*;
-import cc.mallet.pipe.*;
-import cc.mallet.pipe.iterator.ArrayIterator;
-import cc.mallet.types.Instance;
-import cc.mallet.types.InstanceList;
-import cc.mallet.types.LabelAlphabet;
-import cc.mallet.types.Sequence;
-import cc.mallet.util.CharSequenceLexer;
-
-/**
- * Created: Nov 18, 2004
- *
- * @author <A HREF="mailto:casutton@cs.umass.edu>casutton@cs.umass.edu</A>
- * @version $Id: TestPerDocumentF1Evaluator.java,v 1.1 2007/10/22 21:38:02 mccallum Exp $
- */
-public class TestPerDocumentF1Evaluator extends TestCase {
-
-  public TestPerDocumentF1Evaluator (String name)
-  {
-    super (name);
-  }
-
-
-  public static Test suite ()
-  {
-    return new TestSuite (TestPerDocumentF1Evaluator.class);
-  }
-
-  private static String[] testPred = {
-    "<eater>the big red fox</eater> did it",
-    "it was done by <meal>the dog</meal>",
-    "<eater>the cat</eater> ate the <meal>canary</meal>",
-    "<meal>the hamburger</meal> was eaten by the kid",
-    "<eater>the dog</eater> was eaten with zest",
-    "four score and seven years <meal>ago</meal>"
-
-  };
-
-  private static String[] testTrue = {
-    "<eater>the big red fox</eater> did it",
-    "it was done by <eater>the dog</eater>",
-    "<eater>the cat</eater> ate <meal>the canary</meal>",
-    "<meal>the hamburger</meal> was eaten by <eater>the kid</eater>",
-    "<meal>the dog</meal> was eaten with zest",
-    "four score and seven years ago"
-  };
-
-
-  private Extraction createExtractionFrom (String[] predStrings, String[] trueStrings)
-  {
-    Pipe pipe = new SerialPipes (new Pipe[] {
-      new SGML2TokenSequence (new CharSequenceLexer (CharSequenceLexer.LEX_NONWHITESPACE_CLASSES	), "O"),
-      new Target2LabelSequence (),
-      new PrintInputAndTarget (),
-    });
-
-    InstanceList pred = new InstanceList (pipe);
-    pred.addThruPipe (new ArrayIterator (predStrings));
-
-    InstanceList targets = new InstanceList (pipe);
-    targets.addThruPipe (new ArrayIterator (trueStrings));
-
-    LabelAlphabet dict = (LabelAlphabet) pipe.getTargetAlphabet ();
-    Extraction extraction = new Extraction (null, dict);
-
-    for (int i = 0; i < pred.size(); i++) {
-      Instance aPred = pred.get (i);
-      Instance aTarget = targets.get (i);
-      Tokenization input = (Tokenization) aPred.getData ();
-      Sequence predSeq = (Sequence) aPred.getTarget ();
-      Sequence targetSeq = (Sequence) aTarget.getTarget ();
-      DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O");
-      extraction.addDocumentExtraction (docextr);
-    }
-
-    return extraction;
-  }
-
-  private static final String testAExpected = "Testing per-document F1\nName\tP\tR\tF1\n" +
-          "eater\t0.6667\t0.5\t0.5714\n" +
-          "O\t0\t1\t0\n" +
-          "meal\t0.25\t0.3333\t0.2857\n" +
-          "OVERALL (micro-averaged) P=0.4286 R=0.4286 F1=0.4286\n" +
-          "OVERALL (macro-averaged) F1=0.4286\n\n";
-
-  public void testPerDocEval ()
-  {
-    Extraction extraction = createExtractionFrom (testPred, testTrue);
-    PerDocumentF1Evaluator eval = new PerDocumentF1Evaluator ();
-    ByteArrayOutputStream out = new ByteArrayOutputStream ();
-    eval.setErrorOutputStream (System.out);
-    eval.evaluate ("Testing", extraction, new PrintWriter (new OutputStreamWriter (out), true));
-
-    String output = out.toString ();
-    assertEquals (testAExpected, output);
-  }
-
-  private static final String[] mpdPred = {
-    "<title>Wizard of Oz</title> by <author>John Smith</author> and <author>Adam Felber</author>",
-    "<title>Jisp Boo Fuzz by</title> the estimable <title>Rich Q. Doe</title> and <author>Frank Wilson</author>",
-    "<title>Howdy Doody</title> if you think this is Mr. nonsense <author>don't you huh</author>",
-  };
-
-  private static final String[] mpdTrue = {
-    "<title>Wizard of Oz</title> by <author>John Smith</author> and <author>Adam Felber</author>",
-    "<title>Jisp Boo Fuzz</title> by the estimable <author>Rich Q. Doe</author> and <author>Frank Wilson</author>",
-    "<title>Howdy Doody</title> if <title>you</title> think this is <title>Mr.</title> <author> nonsense don't you huh</author>",
-  };
-
-  private static final String mpdExpected = "Testing SEGMENT counts\nName\tCorrect\tPred\tTarget\n" +
-          "title\t2\t4\t5\n" +
-          "O\t0\t0\t0\n" +
-          "author\t3\t4\t5\n" +
-          "\nTesting per-field F1\n" +
-          "Name\tP\tR\tF1\n" +
-          "title\t0.5\t0.4\t0.4444\n" +
-          "O\t0\t1\t0\n" +
-          "author\t0.75\t0.6\t0.6667\n" +
-          "OVERALL (micro-averaged) P=0.625 R=0.5 F1=0.5556\n" +
-          "OVERALL (macro-averaged) F1=0.5556\n\n";
-
-  public void testPerFieldEval ()
-  {
-    Extraction extraction = createExtractionFrom (mpdPred, mpdTrue);
-    PerFieldF1Evaluator eval = new PerFieldF1Evaluator ();
-    ByteArrayOutputStream out = new ByteArrayOutputStream ();
-    eval.evaluate ("Testing", extraction, new PrintStream (out));
-    assertEquals (mpdExpected, out.toString());
-  }
-
-    public void testToStdout ()
-  {
-    Extraction extraction = createExtractionFrom (mpdPred, mpdTrue);
-    PerFieldF1Evaluator eval = new PerFieldF1Evaluator ();
-    eval.evaluate (extraction);
-    System.out.println ("*** Please verify that something was output above.");
-  }
-
-  private static final String[] punctPred = {
-    "<title>Wizard of Oz,</title> by <author>John Smith</author> and <author>Adam Felber</author>",
-    "<title>Jisp Boo Fuzz by</title> the estimable <title>Rich Q. Doe</title> and <author>Frank Wilson</author>",
-    "<title>Howdy Doody</title>!, if you think this is Mr. nonsense <author>don't you huh</author>",
-  };
-
-  private static final String[] punctTrue = {
-    "<title>Wizard of Oz</title>, by <author>John Smith</author> and <author>Adam Felber</author>",
-    "<title>Jisp Boo Fuzz</title> by the estimable <author>Rich Q. Doe</author> and <author>Frank Wilson</author>",
-    "<title>Howdy Doody!</title>, if <title>you</title> think this is <title>Mr.</title> <author> nonsense don't you huh</author>",
-  };
-
-  //xxx  Currently fails because grabbing the field span for Howdy Doody! grabs the </title> as
-  //  well.  I think this is because getting the text subspan goes to the start of the next,
-  //  rather than the end of the last.  It seems like that should be changed, but I'd need to
-  //  think about the ikmplications for Rexa before doing this.
-  public void testPunctuationIgnoringEvaluator ()
-  {
-    Extraction extraction = createExtractionFrom (punctPred, punctTrue);
-    PerFieldF1Evaluator eval = new PerFieldF1Evaluator ();
-    eval.setComparator (new PunctuationIgnoringComparator ());
-    eval.setErrorOutputStream (System.out);
-
-    ByteArrayOutputStream out = new ByteArrayOutputStream ();
-    eval.evaluate ("Testing", extraction, new PrintStream (out));
-    assertEquals (mpdExpected, out.toString());
-  }
-
-  public void testFieldCleaning ()
-  {
-    Extraction extraction = createExtractionFrom (punctPred, punctTrue);
-    extraction.cleanFields (new RegexFieldCleaner ("<.*?>|,|!"));
-
-    PerFieldF1Evaluator eval = new PerFieldF1Evaluator ();
-    ByteArrayOutputStream out = new ByteArrayOutputStream ();
-    eval.evaluate ("Testing", extraction, new PrintStream (out));
-    assertEquals (mpdExpected, out.toString());
-  }
-
-  public static void main (String[] args) throws Throwable
-  {
-    TestSuite theSuite;
-    if (args.length > 0) {
-      theSuite = new TestSuite ();
-      for (int i = 0; i < args.length; i++) {
-        theSuite.addTest (new TestPerDocumentF1Evaluator (args[i]));
-      }
-    } else {
-      theSuite = (TestSuite) suite ();
-    }
-
-    junit.textui.TestRunner.run (theSuite);
-  }
-
-}

File src/cc/mallet/fst/tests/TestCRF.java

-/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
-   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
-   http://www.cs.umass.edu/~mccallum/mallet
-   This software is provided under the terms of the Common Public License,
-   version 1.0, as published by http://www.opensource.org.  For further
-   information, see the file `LICENSE' included with this distribution. */
-
-/**
- @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
- */
-
-package cc.mallet.fst.tests;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.io.StringReader;
-import java.io.StringWriter;
-
-import java.util.Random;
-import java.util.regex.Pattern;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-import cc.mallet.types.Alphabet;
-import cc.mallet.types.FeatureSequence;
-import cc.mallet.types.FeatureVector;
-import cc.mallet.types.FeatureVectorSequence;
-import cc.mallet.types.Instance;
-import cc.mallet.types.InstanceList;
-import cc.mallet.types.MatrixOps;
-import cc.mallet.types.Sequence;
-import cc.mallet.types.SparseVector;
-import cc.mallet.types.Token;
-import cc.mallet.types.TokenSequence;
-
-import cc.mallet.pipe.CharSequence2TokenSequence;
-import cc.mallet.pipe.LineGroupString2TokenSequence;
-import cc.mallet.pipe.Noop;
-import cc.mallet.pipe.Pipe;
-import cc.mallet.pipe.PrintInputAndTarget;
-import cc.mallet.pipe.SerialPipes;
-import cc.mallet.pipe.Target2LabelSequence;
-import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
-import cc.mallet.pipe.TokenSequenceLowercase;
-import cc.mallet.pipe.TokenSequenceMatchDataAndTarget;
-import cc.mallet.pipe.TokenSequenceParseFeatureString;
-import cc.mallet.pipe.iterator.ArrayIterator;
-import cc.mallet.pipe.iterator.LineGroupIterator;
-import cc.mallet.pipe.tsf.OffsetConjunctions;
-import cc.mallet.pipe.tsf.TokenText;
-
-import cc.mallet.fst.CRF;
-import cc.mallet.fst.CRFTrainerByLabelLikelihood;
-import cc.mallet.fst.CRFTrainerByStochasticGradient;
-import cc.mallet.fst.MaxLattice;
-import cc.mallet.fst.MaxLatticeDefault;
-import cc.mallet.fst.SumLattice;
-import cc.mallet.fst.SumLatticeDefault;
-import cc.mallet.fst.SumLatticeScaling;
-import cc.mallet.fst.TokenAccuracyEvaluator;
-import cc.mallet.fst.Transducer;
-
-import cc.mallet.optimize.Optimizable;
-import cc.mallet.optimize.tests.TestOptimizable;
-
-import cc.mallet.util.FileUtils;
-
-// TODO (gsc (08/25/08)): some tests fail because tests are using CRFTrainerByLabelLikelihood
-// instead of CRFOptimizableByLabelLikelihood and CRFOptimizableByValueGradients
-/** Tests for CRF training. */
-public class TestCRF extends TestCase {
-
-	public TestCRF(String name) {
-		super(name);
-	}
-
-	public static final String[] data = new String[] {
-			"Free software is a matter of the users' freedom to run, copy, distribute, study, change and improve the software. More precisely, it refers to four kinds of freedom, for the users of the software.",
-			"The freedom to run the program, for any purpose.",
-			"The freedom to study how the program works, and adapt it to your needs.",
-			"The freedom to redistribute copies so you can help your neighbor.",
-			"The freedom to improve the program, and release your improvements to the public, so that the whole community benefits.",
-			"A program is free software if users have all of these freedoms. Thus, you should be free to redistribute copies, either with or without modifications, either gratis or charging a fee for distribution, to anyone anywhere. Being free to do these things means (among other things) that you do not have to ask or pay for permission.",
-			"You should also have the freedom to make modifications and use them privately in your own work or play, without even mentioning that they exist. If you do publish your changes, you should not be required to notify anyone in particular, or in any particular way.",
-			"In order for the freedoms to make changes, and to publish improved versions, to be meaningful, you must have access to the source code of the program. Therefore, accessibility of source code is a necessary condition for free software.",
-			"Finally, note that criteria such as those stated in this free software definition require careful thought for their interpretation. To decide whether a specific software license qualifies as a free software license, we judge it based on these criteria to determine whether it fits their spirit as well as the precise words. If a license includes unconscionable restrictions, we reject it, even if we did not anticipate the issue in these criteria. Sometimes a license requirement raises an issue that calls for extensive thought, including discussions with a lawyer, before we can decide if the requirement is acceptable. When we reach a conclusion about a new issue, we often update these criteria to make it easier to see why certain licenses do or don't qualify.",
-			"In order for these freedoms to be real, they must be irrevocable as long as you do nothing wrong; if the developer of the software has the power to revoke the license, without your doing anything to give cause, the software is not free.",
-			"However, certain kinds of rules about the manner of distributing free software are acceptable, when they don't conflict with the central freedoms. For example, copyleft (very simply stated) is the rule that when redistributing the program, you cannot add restrictions to deny other people the central freedoms. This rule does not conflict with the central freedoms; rather it protects them.",
-			"Thus, you may have paid money to get copies of free software, or you may have obtained copies at no charge. But regardless of how you got your copies, you always have the freedom to copy and change the software, even to sell copies.",
-			"Rules about how to package a modified version are acceptable, if they don't effectively block your freedom to release modified versions. Rules that ``if you make the program available in this way, you must make it available in that way also'' can be acceptable too, on the same condition. (Note that such a rule still leaves you the choice of whether to publish the program or not.) It is also acceptable for the license to require that, if you have distributed a modified version and a previous developer asks for a copy of it, you must send one.",
-			"Sometimes government export control regulations and trade sanctions can constrain your freedom to distribute copies of programs internationally. Software developers do not have the power to eliminate or override these restrictions, but what they can and must do is refuse to impose them as conditions of use of the program. In this way, the restrictions will not affect activities and people outside the jurisdictions of these governments.",
-			"Finally, note that criteria such as those stated in this free software definition require careful thought for their interpretation. To decide whether a specific software license qualifies as a free software license, we judge it based on these criteria to determine whether it fits their spirit as well as the precise words. If a license includes unconscionable restrictions, we reject it, even if we did not anticipate the issue in these criteria. Sometimes a license requirement raises an issue that calls for extensive thought, including discussions with a lawyer, before we can decide if the requirement is acceptable. When we reach a conclusion about a new issue, we often update these criteria to make it easier to see why certain licenses do or don't qualify.",
-			"The GNU Project was launched in 1984 to develop a complete Unix-like operating system which is free software: the GNU system." };
-
-	public void testGetSetParameters() {
-		int inputVocabSize = 100;
-		int numStates = 5;
-		Alphabet inputAlphabet = new Alphabet();
-		for (int i = 0; i < inputVocabSize; i++)
-			inputAlphabet.lookupIndex("feature" + i);
-		Alphabet outputAlphabet = new Alphabet();
-		CRF crf = new CRF(inputAlphabet, outputAlphabet);
-		String[] stateNames = new String[numStates];
-		for (int i = 0; i < numStates; i++)
-			stateNames[i] = "state" + i;
-		crf.addFullyConnectedStates(stateNames);
-		CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
-		Optimizable.ByGradientValue mcrf = crft
-				.getOptimizableCRF(new InstanceList(null));
-		TestOptimizable.testGetSetParameters(mcrf);
-	}
-
-	public void testSumLogProb() {
-		double w1 = Math.log(.2);
-		double w2 = Math.log(.8);
-		double s1 = Math.log(.2 + .8);
-		double s2 = Transducer.sumLogProb(w1, w2);
-		assertEquals(s1, s2, 0.00001);
-		w1 = Math.log(99999);
-		w2 = Math.log(.0001);
-		s1 = Math.log(99999.0001);
-		s2 = Transducer.sumLogProb(w1, w2);
-		assertEquals(s1, s2, 0.00001);
-