Commits

muzny  committed f969cd4

work on documentation

  • Participants
  • Parent commits 039a881

Comments (0)

Files changed (5)

 
 * <type> : The type of classifier experiment to run. Choose from "basic", "grid", "compare", or "comparegroups".
 
-* <config_path> : The path to `config/classifierconfig.xml`. Depending on the type of experiment you are running, different fields will be drawn from for the config file.
+* <config_path> : The path to `classifierconfig.xml`. Depending on the type of experiment you are running, different fields will be drawn from for the config file. Descriptions of fields are located in `config/classifierconfig.xml`.
 
 Example:
 
 
 * <detector_method> : The method to disambiguate with. Choose from "baseline", "baselinefirst", "baselinerandom", "lesk", or "elesk" (needs to have access to WordNet).
 
-* <config_path> : The path to `config/nodbconfig.xml`. Depending on the type of experiment you are running, different fields will be drawn from for the config file.
+* <config_path> : The path to `nodbconfig.xml`. Depending on the type of experiment you are running, different fields will be drawn from for the config file. Descriptions of fields are located in `config/nodbconfig.xml`.
 
 * <classifier_model_path> : The path to the classifier model that you wish to use. This file should be produced by RunClassifierExperimentFromFiles.
 
+* <number_of_times> : (optional) A number of times that you would like to run this experiment (default is 1). Useful if you are using the "baselinerandom" detector method.
+
 Example:
 
 ```
 
 ## Data
 
+The data is located in the downloads section. Statistics on it are described in the paper referenced at the top of this file.
+
+The data download holds the following files:
+
+* allsenses.txt - Holds all sense data gathered via JWKTL from the November 12th, 2012 english wiktionary dump and computed values for the features described in classifier.features.numeric.Feature.
+
+* [test|dev]_unannotated.txt - The same as allsenses.txt, but only for the senses in the test or development data set.
+
+* [test|dev]_annotated.txt - The same as [test|dev]_unannotated.txt, but with annotated labels. All data were annotated in accordance with the `AnnotationGuidlines.pdf` file that is also in the data download zip.
+
+* [test|dev]_[un]annotated_nofeatures.txt - The same as the corresponding files, but with no computed feature values.
+
 
 ### MySQL
 
-This work was originally conducted using a series of MySQL databases.
+This work was originally conducted using a series of MySQL databases. Instructions on the setup of databases are coming soon!
 
 ### Eclipse
 
-To work on the project in Eclipse, simply download and import the project WiktionaryIdioms into it.
+To work on the project in Eclipse, simply download and import the project into your workspace.

File WiktionaryIdioms/config/classifierconfig.xml

 	<default>
 		<trainPath>./data/train.txt</trainPath>
 		<testPath>./data/dev_annotated.txt</testPath>
-	    <equalDist>false</equalDist>
 	    <percents></percents>
-		<verbose>true</verbose>
+		
 		<features>1, 2, 4, 6, 8, 9, 10, 14</features>
 		<errorBound>.000001</errorBound>
 		<specificsPath>./specifics/specifics.txt</specificsPath>
 		<classifierType>perceptron</classifierType>
 		<precisionRecallGranularity>.01</precisionRecallGranularity>
+		
 		<cleanTest>false</cleanTest>
 		<testCorrectedLabel>true</testCorrectedLabel>
+		
+		<verbose>true</verbose>
+		<shouldOutputGeneral>true</shouldOutputGeneral>
+		<shouldOutputModel>true</shouldOutputModel>
+		
 		<output>
 			<file>file_name</file>
 			<generalInfo>true</generalInfo>
     	<learningRate>1</learningRate>
 		<iterations>93</iterations>
 		<modelFile></modelFile>
-		
-		<shouldOutputGeneral>true</shouldOutputGeneral>
-		<shouldOutputModel>true</shouldOutputModel>
     </BasicApply>
     
-     <Bootstrapping>
-     	<iterations>3</iterations>
-		<cutoffMin>.1</cutoffMin>
-		<cutoffMax>.9</cutoffMax>
-		<cutoffIncrement>.1</cutoffIncrement>
-		
-		<modelFile>./outputModels/devCorrectedCleanedGroups.model</modelFile>
-		
-		<shouldOutputGeneral>true</shouldOutputGeneral>
-		<shouldOutputModel>true</shouldOutputModel>
-    </Bootstrapping>
-    
     <GridSearch>
     	<group>groups</group>
     	<iterationMax>100</iterationMax>
     	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>true</shouldOutputGeneral>
-    	<shouldOutputModel>true</shouldOutputModel>
     </GridSearch>
     
     <CompareFeatures>
 		<buildByBest>false</buildByBest>
     	<iterationMax>100</iterationMax>
     	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>true</shouldOutputGeneral>
-    	<shouldOutputModel>false</shouldOutputModel>
     </CompareFeatures>
     
      <CompareGroups>
-
     	<iterationMax>100</iterationMax>
     	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>false</shouldOutputGeneral>
-    	<shouldOutputModel>false</shouldOutputModel>
     </CompareGroups>
     
     <LabelData>
-    	<modelFile>./paperOutputModels/devCorrectedUncleanedAll.model</modelFile>
-    	<precisionBoundary>.529</precisionBoundary>
+    	<modelFile></modelFile>
+    	<precisionBoundary></precisionBoundary>
     </LabelData>
 </config>

File WiktionaryIdioms/config/nodbconfig.xml

 <?xml version="1.0" encoding="UTF-8"?>
 <!-- config.xml -->
 <config>
+
 	<default>
 		<testPath>./data/dev_examples.txt</testPath>
 		<sensesPath>./data/dev_unannotated.txt</sensesPath>
 		<allSensesPath>./data/allsenses.txt</allSensesPath>
-		<classifierModelCorrected>./paperOutputModels/devCorrectedUncleanedAll.model</classifierModelCorrected>
-		<classifierModelCleaned>./paperOutputModels/devCorrectedCleanedAll.model</classifierModelCleaned>
 	</default>
 	
 </config>

File WiktionaryIdioms/src/classifier/experiments/RunClassifierExperimentFromFiles.java

 		System.out.println("Number in train: " + trainData.size());
 		System.out.println("Number in test: " + testData.size());
 		
-		boolean equalDist = configs.getSBool(ClassifierConfigs.EQUAL_DIST);
+		boolean equalDist = false;
 		String[] percents = configs.getSStringArray(ClassifierConfigs.PERCENTS);
 		System.out.println("equalDist: " + equalDist);
 		System.out.println("percents: " + Arrays.toString(percents));

File WiktionaryIdioms/src/classifier/experiments/RunExperiment.java

 		System.out.println("Number in train: " + trainData.size());
 		System.out.println("Number in test: " + testData.size());
 		
-		boolean equalDist = configs.getSBool(ClassifierConfigs.EQUAL_DIST);
+		boolean equalDist = false;
 		String[] percents = configs.getSStringArray(ClassifierConfigs.PERCENTS);
 		System.out.println("equalDist: " + equalDist);
 		System.out.println("percents: " + Arrays.toString(percents));