Commits

muzny committed 3842fd7

Commented detector experiments.

Comments (0)

Files changed (6)

WiktionaryIdioms/src/detector/experiments/DetectorExperiment.java

+/**
+ * Interface to run detector experiment through.
+ * To add a new experiment, simply list it here.
+ */
+
 package detector.experiments;
 
 import java.util.List;
 
 public interface DetectorExperiment {
 	
+	// String to pass to RunDetectorExperiment[FromFiles] -> instance of experiment object
 	public static final TreeMap<String, DetectorExperiment> EXPERIMENTS = new TreeMap<String, DetectorExperiment>() {/**
 		 * 
 		 */
 
 	{
 	    put("dummy", new DummyExperiment());
-	    put("identificationincorporated", new IdentificationIncorporatedExperiment());
 	    put("goldenlabels", new GoldenLabelsExperiment());
 	}};
 
-	// TODO: see if you should remove the train data set (you should...)
 	public DetectorExperimentResult runExperiment(DetectorMethod d, List<ClassifierData> test);
 }

WiktionaryIdioms/src/detector/experiments/DummyExperiment.java

+/**
+ * This experiment simply uses the detector method passed to get the
+ * best prediction for the example, then adds this to the DetectorExperimentResult
+ * that it returns.
+ */
+
 package detector.experiments;
 
 import java.util.List;
 	 * In this experiment, we will use the example senses associated with the senses in the database
 	 * and see if we can disambiguate them.
 	 */
-	public DetectorExperimentResult runExperiment(DetectorMethod d, List<ClassifierData> test) {
-		// For each result, look up in WKT for the sense associated with it.
-		
+	public DetectorExperimentResult runExperiment(DetectorMethod d, List<ClassifierData> test) {		
 		DetectorExperimentResult result = new DetectorExperimentResult();
 	
 		int count = 0;

WiktionaryIdioms/src/detector/experiments/GoldenLabelsExperiment.java

+/**
+ * An experiment that displays statistics about the examples according to 
+ * the labelings of idioms already in Wiktionary - the "golden labels".
+ */
+
 package detector.experiments;
 
 import java.util.ArrayList;
 public class GoldenLabelsExperiment implements DetectorExperiment {
 
 	@Override
-	/**
-	 * In this experiment, we will use the example senses associated with the senses in the database
-	 * and see if we can disambiguate them.
-	 */
 	public DetectorExperimentResult runExperiment(DetectorMethod d, List<ClassifierData> test) {
 
 		System.out.println("There are "+ test.size() + " test examples.");
 				predictedIdiomaticTitles.add(pair.getFirst().getTitle());
 			}
 		}
-		System.out.println("There are " + predictedIdiomaticTitles.size() + " titles predicted idiomatic.");
+		System.out.println("There are " + predictedIdiomaticTitles.size() + " titles labeled idiomatic in Wiktionary.");
 		
 		// Now gather all examples with those titles
 		List<Example> predictedIdiomatic = new ArrayList<Example>();
 				predictedIdiomatic.add(pair.getSecond());
 			}
 		}
-		System.out.println("There are " + predictedIdiomatic.size() + " examples with a potentially idiomatic title.");
+		System.out.println("There are " + predictedIdiomatic.size() + " examples corresponding to these titles.");
 		
 		int count = 0;
 		for (Example ex : predictedIdiomatic) {

WiktionaryIdioms/src/detector/experiments/IdentificationIncorporatedExperiment.java

-package detector.experiments;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-
-
-import classifier.classifiers.Classifier;
-import classifier.classifiers.Perceptron;
-import classifier.config.GeneralConfigs;
-import classifier.features.dependency.Pair;
-import classifier.model.ClassifierData;
-import classifier.model.ClassifierModel;
-import classifier.model.Result;
-import classifier.model.Sense;
-import classifier.utilities.ClassifierDataUtils;
-
-
-import detector.methods.DetectorMethod;
-import detector.model.Example;
-
-public class IdentificationIncorporatedExperiment implements DetectorExperiment {
-
-	@Override
-	/**
-	 * In this experiment, we will use the example senses associated with the senses in the database
-	 * and see if we can disambiguate them.
-	 */
-	public DetectorExperimentResult runExperiment(DetectorMethod d, List<ClassifierData> test) {
-
-		System.out.println("There are "+ test.size() + " test examples.");
-		DetectorExperimentResult result = new DetectorExperimentResult();
-		
-		// First, go and classify these using the identification model.
-		Classifier classy = new Perceptron(new ClassifierModel(
-				new File(GeneralConfigs.getString(
-						GeneralConfigs.DETECTOR_CONFIGS, "IdentificationIncorporated.classifierModel"))));
-		
-		// Go and get the right senses
-		String database = GeneralConfigs.getString(
-				GeneralConfigs.DETECTOR_CONFIGS, "IdentificationIncorporated.senseDb");
-		String table = GeneralConfigs.getString(
-				GeneralConfigs.DETECTOR_CONFIGS, "IdentificationIncorporated.senseTable");
-		List<ClassifierData> senses = ClassifierDataUtils.getSensesFromDbBasedOnExamples(database, table, test);
-		System.out.println("These examples map to " + senses.size() + " senses.");
-		
-		List<Pair<Sense, Example>> pairs = new ArrayList<Pair<Sense, Example>>();
-		for (ClassifierData sense : senses) {
-			// Find all examples that go with it
-			for (ClassifierData example : test) {
-				if (example.getKey().equals(sense.getKey())) {
-					pairs.add(new Pair<Sense, Example>((Sense) sense, (Example) example));
-				}
-			}
-		}
-		System.out.println("Which makes a total of " + pairs.size() + " pairs.");
-		
-		// For each classifier data, see if the corresponding sense "should" be literal or idiomatic
-		Set<String> predictedIdiomaticTitles = new HashSet<String>();
-		for (Pair<Sense, Example> pair : pairs) {
-			int label = pair.getFirst().getLabel();
-			Result r = classy.predictResult(pair.getFirst());
-			if (r.getDifference() > 0 || label == 1) {
-				predictedIdiomaticTitles.add(pair.getFirst().getTitle());
-			}
-		}
-		System.out.println("There are " + predictedIdiomaticTitles.size() + " titles predicted idiomatic.");
-		
-		// Now gather all examples with those titles
-		List<Example> predictedIdiomatic = new ArrayList<Example>();
-		for (Pair<Sense, Example> pair : pairs) {
-			if (predictedIdiomaticTitles.contains(pair.getSecond().getTitle())) {
-				predictedIdiomatic.add(pair.getSecond());
-			}
-		}
-		System.out.println("There are " + predictedIdiomatic.size() + " examples with a potentially idiomatic title.");
-		
-		int count = 0;
-		for (Example ex : predictedIdiomatic) {
-										
-			String best = d.predict(ex);
-
-			if (best != null) {
-				result.addResult(best, ex);
-			} else {
-				result.addUnclassified(ex);
-			}
-			count++;
-			if (count % 100 == 0) {
-				System.out.println(count + "/" + test.size());
-			}
-		}
-		return result;
-	}
-
-}

WiktionaryIdioms/src/detector/experiments/RunDetectorExperiment.java

+/**
+ * The main class to run a detector experiment from that
+ * draws from MySQL databases.
+ */
+
 package detector.experiments;
 
 import java.io.File;

WiktionaryIdioms/src/detector/experiments/RunDetectorExperimentFromFiles.java

+/**
+ * The main class to run detector experiments from that draws only from
+ * text files.
+ * 
+ * Works with the DummyExperiment - "dummy" for type of detectore_experiment in
+ * command line args.
+ */
+
 package detector.experiments;
 
 import java.io.File;