Commits

muzny committed 4dcc80b

Commented experiments

Comments (0)

Files changed (10)

WiktionaryIdioms/config/classifierconfig.xml

     </CompareGroups>
     
     <LabelData>
-    		<modelFile>./paperOutputModels/devCorrectedUncleanedAll.model</modelFile>
-    		<precisionBoundary>.529</precisionBoundary>
+    	<modelFile>./paperOutputModels/devCorrectedUncleanedAll.model</modelFile>
+    	<precisionBoundary>.529</precisionBoundary>
     </LabelData>
 </config>

WiktionaryIdioms/src/classifier/experiments/BasicApply.java

+/**
+ * A basic experiment for the classifier. This means that it runs the classifier
+ * once with the specified settings.
+ */
+
 package classifier.experiments;
 
 import java.io.File;
 import classifier.model.ClassifierData;
 import classifier.model.ClassifierModel;
 import classifier.model.Result;
-import classifier.model.Sense;
 
 public class BasicApply implements Experiment {
-	public static final String CONFIG_STR = "BasicApply";
+	public static final String CONFIG_STR = "BasicApply";  // specifies what section to look in in the config file
 
 	@Override
 	public ExperimentResult runExperiment(ClassifierConfigs configs, List<ClassifierData> train, List<ClassifierData> test) {
-		// TODO Auto-generated method stub
 		configs.setSection(CONFIG_STR);
 		
 		boolean verbose = configs.getSBool(ClassifierConfigs.VERBOSE);

WiktionaryIdioms/src/classifier/experiments/CompareFeatures.java

+/**
+ * An experiment that compares feature sets. This means that if you give it 
+ * the features 1,2,3,5, it will test each of these features individually. If you
+ * set buildUp to true, it will also test 1,2 1,2,3 and 1,2,3,5. If you set buildByBest
+ * to true, it will build up to all features by adding the ones that performed the best
+ * in first.
+ */
+
+
 package classifier.experiments;
 
 import java.util.ArrayList;

WiktionaryIdioms/src/classifier/experiments/CompareGroups.java

+/**
+ * This experiment compares the LEXICAL versus GRAPH features (and any other group
+ * that you define) in classifer.features.numeric.Feature.
+ */
+
 package classifier.experiments;
 
 import java.util.ArrayList;

WiktionaryIdioms/src/classifier/experiments/Experiment.java

+/**
+ * The interface to running an experiment, so that it can be used with
+ * RunExperiment and RunClassifierExperimentFromFiles.
+ */
+
 package classifier.experiments;
 
 import java.util.List;

WiktionaryIdioms/src/classifier/experiments/ExperimentResult.java

+/**
+ * A wrapper for the results of an experiment so that all experiments
+ * can return the same thing and fill in the fields as necessary for that specific
+ * experiment.
+ */
+
 package classifier.experiments;
 
 import java.util.ArrayList;
 	public double[] weights;
 	public List<ClassifierData> unclassified;
 	
+	/**
+	 * An empty ExperimentResult doesn't know anything 
+	 * about what happened in the experiment, or about the classifier.
+	 */
 	public ExperimentResult() {
 		results = new TreeMap<Integer, List<ClassifierData>>();
 		generalInfo = "";
 		unclassified = new ArrayList<ClassifierData>();
 	}
 	
+	/**
+	 * Knows what model this ExperimentResult is associated with.
+	 * @param clm
+	 */
 	public ExperimentResult(ClassifierModel clm) {
 		this();
 		model = clm;
 	}
 	
+	/**
+	 * Add one result to this experiment result. This means that you
+	 * are adding one predicted classification associated with on ClassifierData.
+	 * @param label - The label predicted by the classifier.
+	 * @param cd - The ClassifierData that was classified.
+	 */
 	public void addResult(int label, ClassifierData cd) {
 		if (!results.containsKey(label)) {
 			results.put(label, new ArrayList<ClassifierData>());
 		results.get(label).add(cd);
 	}
 	
-	public void addUnclassified(ClassifierData cd) {
-		unclassified.add(cd);
-	}
-	
+	/**
+	 * Add sets of results to this ExperimentResult.
+	 * @param generalResults - The map that corresponds predicted class to ClassifierDatas
+	 * @param resultDeltas - The List of Results that contains information about
+	 * how each ClassifierData was scored for each available class. 
+	 */
 	public void addResultSets(TreeMap<Integer, List<ClassifierData>> generalResults, List<Result> resultDeltas) {
 		results = generalResults;
 		this.resultDeltas = resultDeltas;
 	}
 	
+	/**
+	 * Get the FScore for this ExperimentResult, from the perspective of the idiom or the
+	 * classification label of "1".
+	 * @return - The double FScore (Harmonic mean of precision and recall)
+	 */
 	public double getFScore() {
 		return ClassifierEvaluationUtils.getFMeasure(results, 1);
 	}
 	
+	/**
+	 * Get an array of doubles that contains [precision, recall] associated with the
+	 * results in this ExperimentResult.
+	 * @return - Double array of [precision, recall]
+	 */
 	public double[] getPrecisionRecall() {
 		return ClassifierEvaluationUtils.getPrecisionRecall(results, 1);
 	}
 	
+	/**
+	 * Get the precision of this classifier.
+	 * @return - The double precision.
+	 */
 	public double getPrecision() {
 		return getPrecisionRecall()[0];
 	}
 	
+	/**
+	 * Get the recall of this classifier.
+	 * @return - The double recall.
+	 */
 	public double getRecall() {
 		return getPrecisionRecall()[1];
 	}
 	
+	/**
+	 * Returns true if the classifier has classified every data point
+	 * as the same label.
+	 * @return True if it classified everything as the same thing.
+	 */
 	public boolean isUnaryClassifier() {
 		List<double[]> prList = new ArrayList<double[]>();
 		prList.add(getPrecisionRecall());
 		return ClassifierEvaluationUtils.getAllOneClassifier(prList);
 	}
 	
+	/**
+	 * Set some printable info (about the classifier) so that when an experiment is run,
+	 * the important information is communicated. Does not print in default toString().
+	 * @param info - String that the info of this ExperimentResult should be (does
+	 * NOT append)
+	 */
 	public void setClassifierInfo(String info) {
 		classifierInfo = info;
 	}
 	
+	/**
+	 * Add some info that is easily accessible to be printed later (does not print in default
+	 * toString()).
+	 * @param info - APPENDS info to the generalInfo field.
+	 */
 	public void addPrintInfo(String info) {
 		this.generalInfo += info;
 	}
 	
+	/**
+	 * Add info about precision/recall in a format such that it is easy to put in spreadsheets.
+	 * @param info
+	 */
 	public void addGraphFormatInfo(String info) {
 		this.graphInfo = info;
 	}
 		return generalInfo;
 	}
 	
+	/**
+	 * The "golden measures" are the measures that you would get if you
+	 * used your classifier but also counted the examples that are marked
+	 * as label 1 as label 1 no matter what your classifier said they were.
+	 * @return - double that is precision - your recall for this will always
+	 * be 1.
+	 */
 	public double getGoldenMeasures() {
 		// count the true positives
 		int tP = 0;
 		return s;
 	}
 	
+	/**
+	 * Shows the difference between each ClassifierData being classified as class 1
+	 * versus class 0.
+	 * @return
+	 */
 	public String getConfidenceString() {
 		String s = "";
 		for (Result r : resultDeltas) {
 		return s;
 	}
 	
+	/**
+	 * Returns a string in easily spreadsheet insertable format that
+	 * contains the percent classified as label 1, the number labelled as 1
+	 * at that percent, the recall, precision, and fscore
+	 * @param granularity - the granularity to advance the percent by
+	 * until it becomes 100% (<0,1]).
+	 * @return - The String result.
+	 */
 	public String getTestCutoffsString(double granularity) {
 		List<Double> percents = new ArrayList<Double>();
 		List<Double> precisions = new ArrayList<Double>();
 		return results;
 	}
 
+	
 	@Override
 	// return -num if other is > this
 	// return +num if other is < this
 	// return 0 if they are equal
 	public int compareTo(ExperimentResult other) {
-		// TODO Auto-generated method stub
 		return 0;
 	}
 }

WiktionaryIdioms/src/classifier/experiments/GridSearch.java

+/**
+ * An experiment that does a grid search over numbers of
+ * iterations that the classifier should perform. Determines the "best"
+ * settings by choosing the ones that maximize FScore.
+ */
+
 package classifier.experiments;
 
 import java.util.ArrayList;

WiktionaryIdioms/src/classifier/experiments/LabelData.java

+/**
+ * An experiment that labels data according to the precision boundary specified.
+ * This means that we determine what percentage of results to classify as 1 by determining
+ * what percentage of the "most 1" data points can be classified as 1 while maintaining a
+ * precision above the specified level. Determines the percentage on the dev set, and does
+ * the final reporting on the test set. Current configurations require mysql databases.
+ */
+
 package classifier.experiments;
 
 import java.io.File;
 		ClassifierConfigs configs = ClassifierConfigs.getInstance();
 		configs.setSection("LabelData");
 		
-		String dataInfo = "";
 		System.out.print("Loading data...");
 
-
 		List<ClassifierData> data = null;
 		
 		String database = GeneralConfigs.getString(GeneralConfigs.CLASSIFIER_CONFIGS, "MySQL.classify.database");
 		String where = " WHERE " + column + " = \"train\" ";
 		data = ClassifierDataUtils.getSensesFromDb(
 				database, table, where, "label");
-		String labelStr = "label";
 		
 		// Get the dev set
 		where = " WHERE " + column + " = \"dev\"";
 		System.out.println("entries with label 0: " + classifications.get(0).size());
 		System.out.println("entries with label 1: " + classifications.get(1).size());
 
-		// you should probably get rid of this code at some point
-		System.out.println("augmented entries with label 0: " + lits);
-		System.out.println("augmented entries with label 1: " + ids);
-		
-		/*
-		System.out.println("Things with both");
-		for (String title: idKeys) {
-			if (litKeys.contains(title)) {
-				System.out.println(title);
-			}
-		}*/
+		//System.out.println("augmented entries with label 0: " + lits);
+		//System.out.println("augmented entries with label 1: " + ids);
 	}
 
 }

WiktionaryIdioms/src/classifier/experiments/RunClassifierExperimentFromFiles.java

-
+/**
+ * The main class to run experiments directly from files (no databases).
+ * 
+ * Not all experiments have been tested with this experiment running framework.
+ */
 
 package classifier.experiments;
 

WiktionaryIdioms/src/classifier/experiments/RunExperiment.java

+/**
+ * This is the file to run experiments that draw from databases.
+ */
+
 package classifier.experiments;
 
 import java.io.BufferedWriter;
 	public static final String SENSE_TRAIN_WHERE = " WHERE data_set = \"train\" ";
 	public static final String SENSE_DEV_WHERE = 
 			" WHERE data_set=\"dev\" AND corrected_label IS NOT NULL " +
-			"AND label != -1 AND specific_definition = \"\"";
+			"AND corrected_label != -1 AND specific_definition = \"\"";
 	public static final String SENSE_TEST_WHERE = 
 			" WHERE data_set=\"test\" AND corrected_label IS NOT NULL " +
 			"AND corrected_label != -1 AND specific_definition = \"\"";