Commits

muzny committed 1fb1363

update gitcommit for yet another restructuring

Comments (0)

Files changed (271)

 *\.DS_Store
 *\.class
 
-WiktionaryIdiomClassification/annotations/*
-WiktionaryIdiomClassification/data/*
-WiktionaryIdiomClassification/data_sets/*
-WiktionaryIdiomClassification/inverted_index/*
-WiktionaryIdiomClassification/output/*
-WiktionaryIdiomClassification/outputDetect/*
-WiktionaryIdiomClassification/outputModels/*
-WiktionaryIdiomClassification/paperOutput/*
-WiktionaryIdiomClassification/paperOutputModels/*
-WiktionaryIdiomClassification/nytimes/*
-WiktionaryIdiomClassification/specific_full_wiki_concepts/*
-WiktionaryIdiomClassification/test/*
-WiktionaryIdiomClassification/wikipedia_concepts/*
-
-WiktionaryIdiomDetection/data/*
-
-WiktionaryFileIO/data/*
+WiktionaryIdioms/annotations/*
+WiktionaryIdioms/data/*
+WiktionaryIdioms/paperOutputModels/*

WiktionaryFileIO/.classpath

-<?xml version="1.0" encoding="UTF-8"?>
-<classpath>
-	<classpathentry kind="src" path="src"/>
-	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
-	<classpathentry kind="lib" path="lib/wiktionaryidiomclassification.jar"/>
-	<classpathentry kind="lib" path="lib/wiktionaryidiomdetection.jar"/>
-	<classpathentry kind="lib" path="lib/commons-configuration-1.9.jar"/>
-	<classpathentry kind="lib" path="lib/commons-lang3-3.1.jar"/>
-	<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
-	<classpathentry kind="lib" path="lib/commons-logging-1.1.2.jar"/>
-	<classpathentry kind="lib" path="lib/mysql-connector-java-5.1.22-bin.jar"/>
-	<classpathentry kind="output" path="bin"/>
-</classpath>

WiktionaryFileIO/.settings/org.eclipse.jdt.core.prefs

-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
-org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
-org.eclipse.jdt.core.compiler.compliance=1.6
-org.eclipse.jdt.core.compiler.debug.lineNumber=generate
-org.eclipse.jdt.core.compiler.debug.localVariable=generate
-org.eclipse.jdt.core.compiler.debug.sourceFile=generate
-org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
-org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
-org.eclipse.jdt.core.compiler.source=1.6

WiktionaryFileIO/bin/fileio/CompareAnnotations.class

Binary file removed.

WiktionaryFileIO/bin/fileio/EditableFile.class

Binary file removed.

WiktionaryFileIO/bin/fileio/PlainExamplesFile.class

Binary file removed.

WiktionaryFileIO/bin/fileio/PlainSensesFile.class

Binary file removed.

WiktionaryFileIO/bin/fileio/ProduceFile.class

Binary file removed.

WiktionaryFileIO/bin/fileio/ProduceFileSettings.class

Binary file removed.

WiktionaryFileIO/bin/fileio/ProducesFile$1.class

Binary file removed.

WiktionaryFileIO/bin/fileio/ProducesFile.class

Binary file removed.

WiktionaryFileIO/bin/fileio/RandomSampleFile.class

Binary file removed.

WiktionaryFileIO/bin/fileio/ReadCorrectedLabels.class

Binary file removed.

WiktionaryFileIO/lib/commons-configuration-1.9.jar

Binary file removed.

WiktionaryFileIO/lib/commons-lang-2.6.jar

Binary file removed.

WiktionaryFileIO/lib/commons-lang3-3.1.jar

Binary file removed.

WiktionaryFileIO/lib/commons-logging-1.1.2.jar

Binary file removed.

WiktionaryFileIO/lib/mysql-connector-java-5.1.22-bin.jar

Binary file removed.

WiktionaryFileIO/lib/wiktionaryidiomclassification.jar

Binary file removed.

WiktionaryFileIO/lib/wiktionaryidiomdetection.jar

Binary file removed.

WiktionaryFileIO/src/fileio/CompareAnnotations.java

-package fileio;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Scanner;
-import java.util.Set;
-
-import classifier.model.Sense;
-import dependency.Pair;
-
-public class CompareAnnotations {
-
-	/**
-	 * @param args
-	 */
-	public static void main(String[] args) {
-		
-		if (args.length < 1) {
-			System.err.println("Usage: <full_corrections_path> [<sample_path_1> ... <sample_path_n>]");
-		}
-		Map<String, Pair<Integer, Sense>> fullAnnotations = readAnnotationFile(args[0]);
-		
-		
-		List<Map<String, Pair<Integer, Sense>>> annoList = new ArrayList<Map<String, Pair<Integer, Sense>>>();
-		for (int i = 1; i < args.length; i++) {
-			String filename = args[i];
-			Map<String, Pair<Integer, Sense>> annotations = readAnnotationFile(filename);
-			annoList.add(annotations);
-		}
-		System.out.println("There are " + fullAnnotations.size() + " annotations in the full list");
-		
-		for (int i = 0; i < annoList.size(); i++) {
-			System.out.println("There are " + annoList.get(i).size() + " annotations in list " + i);
-		}
-		
-		int contained = 0;
-		for (Map<String, Pair<Integer, Sense>> l : annoList) {
-			Set<String> toBeRemoved = new HashSet<String>();
-			for (String key : l.keySet()) {
-				if (fullAnnotations.containsKey(key)) {
-					contained++;
-				} else {
-					toBeRemoved.add(key);
-				}
-			}
-			System.out.println("Number contained in both: " + contained);
-			System.out.println("Removing extras....(" + toBeRemoved.size() + ")");
-			for (String removeKey : toBeRemoved) {
-				l.remove(removeKey);
-			}
-			contained = 0;
-		}
-		
-		System.out.println("Checking for un-annotated data points....");
-		Set<String> toBeRemoved = new HashSet<String>();
-		for (int i = 0; i < annoList.size(); i++) {
-			Map<String, Pair<Integer, Sense>> l = annoList.get(i);
-			for (String key : l.keySet()) {
-				Sense s = l.get(key).getSecond();
-				if (s.getLabel() == -2) {
-					System.out.println("List " + i + " has unlabeled data point: " + s.getKey());
-					toBeRemoved.add(key);
-				}
-			}
-		}
-		
-		for (int i = 0; i < annoList.size(); i++) {
-			Map<String, Pair<Integer, Sense>> l = annoList.get(i);
-			for (String removeKey : toBeRemoved) {
-				l.remove(removeKey);
-			}
-			System.out.println("List " + i + " is has " + l.size() + " annotations now");
-		}
-		
-		// All of the key sets will be the same and what we want
-		Set<String> keys = annoList.get(0).keySet(); 
-		
-		// Make a third list that is from my annotations
-		Map<String, Pair<Integer, Sense>> fromFull = new HashMap<String, Pair<Integer, Sense>>();
-		for (String key : keys) {
-			fromFull.put(key, fullAnnotations.get(key));
-		}
-		System.out.println("From full list has " + fromFull.size() + " annotations");
-		annoList.add(fromFull);
-		
-		System.out.println();
-		System.out.println();
-
-		int literals = 0;
-		int idioms = 0;
-		int unknowns = 0;
-		for (Map<String, Pair<Integer, Sense>> l : annoList) {
-			
-			for (String key : l.keySet()) {
-				Sense s = l.get(key).getSecond();
-				if (s.getLabel() == 0) {
-					literals++;
-				} else if (s.getLabel() == 1) {
-					idioms++;
-				} else if (s.getLabel() == -1) {
-					unknowns++;
-				}
-			}
-			System.out.println("Number literals: " + literals);
-			System.out.println("Number idioms: " + idioms);
-			System.out.println("Number unknowns: " + unknowns);
-			System.out.println("Sum: " + (literals + idioms + unknowns));
-			System.out.println();
-			
-			literals = 0;
-			idioms = 0;
-			unknowns = 0;
-		}
-		
-		
-		// num similar -> list of sense keys in this category
-		Map<Integer, List<String>> agreement = new HashMap<Integer, List<String>>();
-		for (String key : keys) {
-			
-			// get all the labels from this data point in each list
-			List<Integer> labels = new ArrayList<Integer>();
-			for (Map<String, Pair<Integer, Sense>> l : annoList) {
-				labels.add(l.get(key).getSecond().getLabel());
-			}
-			
-			int first = labels.get(0);  // should always have at least one entry
-			int numSame = 0;
-			for (int label : labels) {
-				if (first == label) {
-					numSame++;
-				}
-			}
-			
-			if (!agreement.containsKey(numSame)) {
-				agreement.put(numSame, new ArrayList<String>());
-			}
-			agreement.get(numSame).add(key);
-		}
-		
-		int sum = 0;
-		for (int num : agreement.keySet()) {
-			System.out.println("Number with " + num + " agreeing: " + agreement.get(num).size());
-			sum += agreement.get(num).size();
-		}
-		System.out.println("Sum: " + sum);
-		System.out.println();
-		
-		/*System.out.println("Cases with disagreement:");
-		
-		for (int num : agreement.keySet()) {
-			if (num == annoList.size()) {
-				continue;
-			}
-			
-			for (String key : agreement.get(num)) {
-				
-				for (int i = 0; i < annoList.size(); i++) {
-					Map<String, Pair<Integer, Sense>> l = annoList.get(i);
-					System.out.println("List " + i + ":");
-					System.out.println("Original label: " + l.get(key).getFirst());
-					System.out.println(((Sense) l.get(key).getSecond()).getAnnotationString());
-					System.out.println();
-				}
-			}
-		}*/
-		
-	}
-	
-	public static Map<String, Pair<Integer, Sense>> readAnnotationFile(String filename) {
-		// sense key -> <original, sense>
-		Map<String, Pair<Integer, Sense>> annotations = new HashMap<String, Pair<Integer, Sense>>();
-		Scanner scan = null;
-		try {
-			scan = new Scanner(new File(filename));
-		} catch (FileNotFoundException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		} 
-		while (scan.hasNextLine()) {
-			String line = scan.nextLine().trim();
-			
-			// go and get all the lines associated with this
-			if (line.startsWith("sense key:")) {
-				String senseKey = line.split("key:")[1].trim();
-				String title = scan.nextLine().split("title:")[1].trim();
-				String gloss = scan.nextLine().split("gloss:")[1].trim();
-				String uncleaned = scan.nextLine().split("gloss:")[1].trim();
-				int label = Integer.parseInt(scan.nextLine().split(":")[1].trim());
-				
-				String correctedLine = scan.nextLine().split(":")[1].trim();
-				int corrected = -2;
-				if (correctedLine.length() != 0) {
-					corrected = Integer.parseInt(correctedLine);
-				}
-					String comments = scan.nextLine().split("comments:")[1].trim();
-					Sense s = new Sense(senseKey, title, gloss, uncleaned, corrected);
-					annotations.put(senseKey, new Pair<Integer, Sense>(label, s));
-					
-					if (comments.length() > 0) {
-						s.comments = comments;
-					}
-				
-			}
-		}
-		return annotations;
-	}
-
-}

WiktionaryFileIO/src/fileio/EditableFile.java

-package fileio;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import utilities.MySQLConnection;
-
-public class EditableFile implements ProducesFile {
-
-	@Override
-	public String produce(ProduceFileSettings settings) {
-		List<String> whatToSelect = new ArrayList<String>();
-		whatToSelect.add("sense_key");
-		whatToSelect.add("title");
-		whatToSelect.add("gloss");
-		whatToSelect.add("uncleaned_gloss");
-		whatToSelect.add("label");
-		whatToSelect.add("corrected_label");
-		
-		MySQLConnection moby = MySQLConnection.getInstance("localhost", settings.database, "root", "");
-
-		String strResult = "";
-		// 3. fill with data
-		int current = settings.min;
-					
-		// Go and get the data.
-		String whereClause = settings.whereClause + " LIMIT " + current + 
-				", " + ProduceFile.STEP_SIZE;
-
-		List<String[]> results = moby.selectQuery(whatToSelect, settings.table, whereClause);
-						
-		for (String[] result: results) {
-			String senseKey = result[whatToSelect.indexOf("sense_key")];
-
-			String title = result[whatToSelect.indexOf("title")];
-			int label = Integer.parseInt(result[whatToSelect.indexOf("label")]);
-			String gloss = result[whatToSelect.indexOf("gloss")];
-			String uncleaned = result[whatToSelect.indexOf("uncleaned_gloss")];
-			int corrected = Integer.parseInt(result[whatToSelect.indexOf("corrected_label")]);
-
-				
-			strResult += "sense key: " + senseKey + "\n";
-			strResult += "title: " + title + "\n";
-			strResult += "gloss: " + gloss + "\n";
-			strResult += "uncleaned gloss: " + uncleaned + "\n";
-			strResult += "label: " + label + "\n";
-			strResult += "corrected label: " + corrected + "\n" ;
-			strResult += "comments: \n";
-			strResult += "\n";
-			strResult += "\n";
-
-		}
-					
-		
-		System.out.println(results.size());
-		
-		return strResult;
-	}
-}

WiktionaryFileIO/src/fileio/PlainExamplesFile.java

-package fileio;
-
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import classifier.model.Sense;
-import detector.model.Example;
-import features.numeric.Feature;
-
-import utilities.MySQLConnection;
-
-public class PlainExamplesFile implements ProducesFile {
-	public static final int STEP_SIZE = 100000;
-
-	@Override
-	public String produce(ProduceFileSettings settings) {
-		List<String> whatToSelect = new ArrayList<String>();
-		whatToSelect.add("sense_key");
-		whatToSelect.add("title");
-		whatToSelect.add("text");
-		whatToSelect.add(settings.labelType);
-		whatToSelect.add("data_set_classifier");
-		
-		MySQLConnection moby = MySQLConnection.getInstance("localhost", settings.database, "root", "");
-		
-		// 3. fill with data
-		List<Example> points = new ArrayList<Example>();
-		
-		// 3. fill with data
-		int current = settings.min;
-					
-		// Go and get the data.
-		String whereClause = settings.whereClause + " LIMIT " + current + 
-					", " + ProduceFile.STEP_SIZE;
-
-		List<String[]> results = moby.selectQuery(whatToSelect, settings.table, whereClause);
-						
-		for (String[] result: results) {
-			String senseKey = result[whatToSelect.indexOf("sense_key")];
-
-			String title = result[whatToSelect.indexOf("title")];
-			int label = Integer.parseInt(result[whatToSelect.indexOf(settings.labelType)]);
-			String text = result[whatToSelect.indexOf("text")];
-
-			Example e = new Example(label, title, text, senseKey);
-				
-			points.add(e);
-		}
-					
-
-		System.out.println("Done gathering.");
-		System.out.println(results.size());
-		
-		// 4. Output points to String
-		String strResult = "";
-		if (points.size() > 0) {
-			System.out.println("Appending to string...");
-			for (Example dp : points) {
-				strResult += dp.getDetectionString();
-				strResult += "\n";
-			}
-		}
-		System.out.println("Done appending.");
-		
-		return strResult;
-	}
-}

WiktionaryFileIO/src/fileio/PlainSensesFile.java

-package fileio;
-
-import java.util.ArrayList;
-
-import java.util.List;
-
-import classifier.model.Sense;
-import features.numeric.Feature;
-
-import utilities.MySQLConnection;
-
-public class PlainSensesFile implements ProducesFile {
-
-	@Override
-	public String produce(ProduceFileSettings settings) {
-		List<String> whatToSelect = new ArrayList<String>();
-		whatToSelect.add("sense_key");
-		whatToSelect.add("title");
-		whatToSelect.add("gloss");
-		whatToSelect.add("uncleaned_gloss");
-		whatToSelect.add(settings.labelType);
-		whatToSelect.add("data_set");
-		
-		for (int key : Feature.FEATURE_INDEXES.keySet()) {
-			if (settings.features.contains(key)) {
-				whatToSelect.add(Feature.FEATURE_INDEXES.get(key).getDBName());
-			}
-		}
-		
-		
-		MySQLConnection moby = MySQLConnection.getInstance("localhost", settings.database, "root", "");
-		
-		// 3. fill with data
-		List<Sense> points = new ArrayList<Sense>();
-		
-		// 3. fill with data
-		int current = settings.min;
-					
-		// Go and get the data.
-		String whereClause = settings.whereClause + " LIMIT " + current + 
-					", " + ProduceFile.STEP_SIZE;
-
-		List<String[]> results = moby.selectQuery(whatToSelect, settings.table, whereClause);
-						
-		for (String[] result: results) {
-			String senseKey = result[whatToSelect.indexOf("sense_key")];
-
-			String title = result[whatToSelect.indexOf("title")];
-			int label = Integer.parseInt(result[whatToSelect.indexOf(settings.labelType)]);
-			String gloss = result[whatToSelect.indexOf("gloss")];
-			String uncleaned = result[whatToSelect.indexOf("uncleaned_gloss")];
-				
-			Sense dp = new Sense(senseKey, title, gloss, uncleaned, label);
-			for (int key : Feature.FEATURE_INDEXES.keySet()) {
-				if (settings.features.contains(key)) {
-					dp.addFeature(key, Double.parseDouble(
-						result[whatToSelect.indexOf(Feature.FEATURE_INDEXES.get(key).getDBName())]));
-				}
-			}
-			points.add(dp);
-		}
-					
-			
-		System.out.println("Done gathering.");
-		System.out.println(points.size());
-		
-		// 4. Output points to String
-		String strResult = "";
-		if (points.size() > 0) {
-			System.out.println("Appending to string...");
-			for (Sense dp : points) {
-				strResult += dp.getDataString();
-				strResult += "\n";
-			}
-		}
-		System.out.println("Done appending.");
-		
-		return strResult;
-	}
-}

WiktionaryFileIO/src/fileio/ProduceFile.java

-/**
- * Provides an interface to the database to produce text files from it.
- * Parameters are as follows:
- * -outfile: the path the the output file
- * -kind: what kind of file you would like to produce, 
- *	"edit", "nytimes", "random", "plainsense", "plainexample"
- * -db: the mysql database to connect to (assumes hosted at "root" with "" password)
- * -table: the mysql table to draw from
- * -num: The raw number of things that you would like output, used in "edit", "nytimes", and "random"
- * -numexamples: The max number of examples associated with each title you would like, used in "nytimes"
- * -glosstable: the table where the glosses are kept, used in "nytimes"
- * -labeltype: the column heading for the label you you like output, used in "plainsense"
- * -features: the features that you would like output, used in "plainsense", formated as "#, #, #",
- * 	where all numbers are associated with a feature in the features.numeric package
- */
-
-
-package fileio;
-
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Map;
-
-import features.numeric.Feature;
-
-import utilities.CommandLineUtils;
-
-
-public class ProduceFile {
-	public static final String OUTFILE = "-outfile";
-	public static final String KIND = "-kind";
-	public static final String WHERE = "-where";
-	public static final String DB = "-db";
-	public static final String TABLE = "-table";
-	public static final String NUM = "-num";
-	public static final String NUM_EXAMPLES = "-numexamples";
-	public static final String GLOSS_TABLE = "-glosstable";
-	public static final String LABEL_TYPE = "-labeltype";
-	public static final String FEATURES = "-features";
-	
-	public static final int STEP_SIZE = 1000;
-
-	
-	public static void main(String[] args) {
-		
-		Map<String, String> argMap = CommandLineUtils.simpleCommandLineParser(args);
-
-		String outfile = argMap.get(OUTFILE);
-		
-		ProduceFileSettings settings = new ProduceFileSettings();
-		if (argMap.containsKey(WHERE)) {
-			settings.whereClause = argMap.get(WHERE);
-		}
-		settings.database = argMap.get(DB);
-		settings.table = argMap.get(TABLE);
-		
-		if (argMap.containsKey(GLOSS_TABLE)) {
-			settings.glossTable = argMap.get(GLOSS_TABLE);
-		}
-		if (argMap.containsKey(NUM)) {
-			settings.num = Integer.parseInt(argMap.get(NUM));
-		}
-		if (argMap.containsKey(NUM_EXAMPLES)) {
-			settings.numExamples = Integer.parseInt(argMap.get(NUM_EXAMPLES));
-		}
-		
-		if (argMap.containsKey(LABEL_TYPE)) {
-			settings.labelType = argMap.get(LABEL_TYPE);
-		}
-		
-		settings.features = new HashSet<Integer>();
-		if (argMap.containsKey(FEATURES)) {
-			String list = argMap.get(FEATURES);
-			String[] nums = list.split(",");
-			for (String num : nums) {
-				settings.features.add(Integer.parseInt(num.trim()));
-			}
-		} else {
-			for (int i : Feature.FEATURE_INDEXES.keySet()) {
-				settings.features.add(i);
-			}
-		}
-		System.out.println("The produced file will have the following features: " + settings.features);
-		
-		System.out.print("Writing to file...");
-		try {
-			BufferedWriter bw = new BufferedWriter(
-					new FileWriter(outfile));
-			
-			settings.min = 0;
-			String toWrite = ProducesFile.INDEXES.get(argMap.get(KIND)).produce(settings);
-			
-			while (toWrite.length() != 0) {
-				bw.write(toWrite);
-				
-				// if we are doing random, we don't want this
-				if (argMap.get(KIND).equals("random")) {
-					break;
-				}
-				settings.min += STEP_SIZE;
-				toWrite = ProducesFile.INDEXES.get(argMap.get(KIND)).produce(settings);
-				System.out.println("Fetched next batch: " + settings.min);
-				System.out.println("Current string was: " + toWrite.length());
-			}
-			
-			bw.close();
-		} catch (IOException e) {
-			System.out.println("Error with file: " + outfile);
-		}
-		System.out.println("done");
-	
-	}
-}

WiktionaryFileIO/src/fileio/ProduceFileSettings.java

-package fileio;
-
-import java.util.Set;
-
-public class ProduceFileSettings {
-	public String whereClause;
-	public String database;
-	public String table;
-	public String glossTable;
-	public int num;
-	public int numExamples;
-	public String labelType;
-	public Set<Integer> features;
-	public int min;
-}

WiktionaryFileIO/src/fileio/ProducesFile.java

-package fileio;
-
-import java.util.TreeMap;
-
-public interface ProducesFile {
-	public static final TreeMap<String , ProducesFile> INDEXES = new TreeMap<String , ProducesFile>() {/**
-		 * 
-		 */
-		private static final long serialVersionUID = 1L;
-
-	{
-	    put("edit", new EditableFile());
-	    put("random", new RandomSampleFile());
-	    put("plainsense", new PlainSensesFile());
-	    put("plainexample", new PlainExamplesFile());
-
-
-	}};
-	
-	public String produce(ProduceFileSettings settings);
-
-}

WiktionaryFileIO/src/fileio/RandomSampleFile.java

-/**
- * WARNING: this file has not been updated since the last changes were made to ProduceFile
- */
-
-
-package fileio;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import utilities.MySQLConnection;
-
-public class RandomSampleFile implements ProducesFile {
-	public static final int STEP_SIZE = 100000;
-
-	@Override
-	public String produce(ProduceFileSettings settings) {
-		List<String> whatToSelect = new ArrayList<String>();
-		whatToSelect.add("sense_key");
-		whatToSelect.add("title");
-		whatToSelect.add("gloss");
-		whatToSelect.add("uncleaned_gloss");
-		whatToSelect.add("label");
-		whatToSelect.add("data_set");
-		
-		MySQLConnection moby = MySQLConnection.getInstance("localhost", settings.database, "root", "");
-		
-		int number = settings.num;
-
-		String strResult = "";
-		// 3. fill with data
-		int current = 0;
-		while (true) {
-					
-			// Go and get the data.
-			String whereClause = settings.whereClause + " LIMIT " + current + 
-					", " + STEP_SIZE;
-			current += STEP_SIZE;
-
-			List<String[]> results = moby.selectQuery(whatToSelect, settings.table, whereClause);
-			
-			// Select random sense keys that we want from dev and test
-			List<String> devKeys = new ArrayList<String>();
-			List<String> testKeys = new ArrayList<String>();
-			
-			for (String[] result : results) {
-				String dataSet = result[whatToSelect.indexOf("data_set")];
-				String senseKey = result[whatToSelect.indexOf("sense_key")];
-				if (dataSet.equals("dev")) {
-					devKeys.add(senseKey);
-				} else if (dataSet.equals("test")) {
-					testKeys.add(senseKey);
-				}
-			}
-			
-			int numDev = number;//(int) Math.ceil((1.0 / 3.0) * (number));
-			int numTest = 0; //(int) Math.floor((2.0 / 3.0) * (number));
-
-			Collections.shuffle(devKeys);
-			Collections.shuffle(testKeys);
-			
-			System.out.println("Picked " + numDev + " results from dev");
-			System.out.println("Picked " + numTest + " results from test");
-
-			
-			Set<String> keys = new HashSet<String>();
-			for (int i = 0; i < numDev; i++) {
-				keys.add(devKeys.get(i));
-			}
-			for (int i = 0; i < numTest; i++) {
-				keys.add(testKeys.get(i));
-			}
-						
-			for (String[] result: results) {
-				String senseKey = result[whatToSelect.indexOf("sense_key")];
-
-				String title = result[whatToSelect.indexOf("title")];
-				int label = Integer.parseInt(result[whatToSelect.indexOf("label")]);
-				String gloss = result[whatToSelect.indexOf("gloss")];
-				String uncleaned = result[whatToSelect.indexOf("uncleaned_gloss")];
-				
-				if (keys.contains(senseKey)) {
-					strResult += "sense key: " + senseKey + "\n";
-					strResult += "title: " + title + "\n";
-					strResult += "gloss: " + gloss + "\n";
-					strResult += "uncleaned gloss: " + uncleaned + "\n";
-					strResult += "label: " + label + "\n";
-					strResult += "corrected label: \n" ;
-					strResult += "comments: \n";
-					strResult += "\n";
-					strResult += "\n";
-				}
-
-			}
-					
-			if (results.size() < STEP_SIZE) {
-				System.out.println(results.size());
-				break;  // We're done.
-			}
-		}
-		return strResult;
-	}
-}

WiktionaryFileIO/src/fileio/ReadCorrectedLabels.java

-package fileio;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Scanner;
-
-import utilities.MySQLConnection;
-
-
-public class ReadCorrectedLabels {
-	
-	public static void main(String[] args) {
-		
-		String database = args[1];
-		String table = args[2];
-		
-		MySQLConnection moby = MySQLConnection.getInstance("localhost", database, "root", "");
-
-		File f = new File(args[0]);
-		
-		try {
-			Scanner scan = new Scanner(f);
-			Map<String, String> commentUps = new HashMap<String, String>();
-			Map<String, Integer> labelUps = new HashMap<String, Integer>();
-
-			
-			while (scan.hasNextLine()) {
-				String line = scan.nextLine().trim();
-				
-				// go and get all the lines associated with this
-				if (line.startsWith("sense key:")) {
-					String senseKey = line.split("key:")[1].trim();
-					String title = scan.nextLine().split("title:")[1].trim();
-					String gloss = scan.nextLine().split("gloss:")[1].trim();
-					String uncleaned = scan.nextLine().split("gloss:")[1].trim();
-					int label = Integer.parseInt(scan.nextLine().split(":")[1].trim());
-					
-					String correctedLine = scan.nextLine().split(":")[1].trim();
-					if (correctedLine.length() != 0) {
-						int corrected = Integer.parseInt(correctedLine);
-						String comments = scan.nextLine().split("comments:")[1].trim();
-						commentUps.put(senseKey, comments);
-						labelUps.put(senseKey, corrected);
-					}
-				}
-			}
-			
-			System.out.print("Updating...");
-			moby.updateCaseColumnString(table, "comments", "sense_key", commentUps);
-			moby.updateCaseColumnInteger(table, "corrected_label", "sense_key", labelUps);
-			System.out.println("done.");
-
-
-		} catch (IOException e) {
-			System.out.println("Error with file: " + f);
-		}
-	}
-
-}

WiktionaryIdiomClassification/.classpath

-<?xml version="1.0" encoding="UTF-8"?>
-<classpath>
-	<classpathentry kind="src" path="src"/>
-	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
-	<classpathentry kind="lib" path="lib/de.tudarmstadt.ukp.wiktionary-0.16.1.jar"/>
-	<classpathentry kind="lib" path="lib/stanford-corenlp-1.3.5.jar"/>
-	<classpathentry kind="lib" path="lib/ant-1.7.1.jar"/>
-	<classpathentry kind="lib" path="lib/ant-launcher-1.7.1.jar"/>
-	<classpathentry kind="lib" path="lib/wikokit-20120611.jar"/>
-	<classpathentry kind="lib" path="lib/xercesImpl-2.9.1-lucene.jar"/>
-	<classpathentry kind="lib" path="lib/commons-configuration-1.9.jar"/>
-	<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
-	<classpathentry kind="lib" path="lib/commons-lang3-3.1.jar"/>
-	<classpathentry kind="lib" path="lib/gson-2.2.2.jar"/>
-	<classpathentry kind="lib" path="lib/jaws-bin.jar"/>
-	<classpathentry kind="lib" path="lib/timestools.jar"/>
-	<classpathentry kind="lib" path="lib/de.tudarmstadt.ukp.wikipedia.datamachine-0.9.2-jar-with-dependencies.jar"/>
-	<classpathentry kind="lib" path="lib/de.tudarmstadt.ukp.wikipedia.api-0.9.2.jar"/>
-	<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
-	<classpathentry kind="output" path="bin"/>
-</classpath>

WiktionaryIdiomClassification/.settings/org.eclipse.jdt.core.prefs

-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
-org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
-org.eclipse.jdt.core.compiler.compliance=1.6
-org.eclipse.jdt.core.compiler.debug.lineNumber=generate
-org.eclipse.jdt.core.compiler.debug.localVariable=generate
-org.eclipse.jdt.core.compiler.debug.sourceFile=generate
-org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
-org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
-org.eclipse.jdt.core.compiler.source=1.6

WiktionaryIdiomClassification/bin/.gitignore

-/utilities
-/config
-/features
-/experiments
+/bin

WiktionaryIdiomClassification/bin/classifier/model/ClassifierData.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifier/model/ClassifierEvaluationUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifier/model/ClassifierModel.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifier/model/ClassifierSettings.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifier/model/DataPoint.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifier/model/ProportionalBaseline.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifier/model/Result.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifier/model/Sense.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifiers/ClassificationResult.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifiers/Classifier.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifiers/Perceptron.class

Binary file removed.

WiktionaryIdiomClassification/bin/classifiers/UntrustingPerceptron.class

Binary file removed.

WiktionaryIdiomClassification/bin/config/ClassifierConfigs.class

Binary file removed.

WiktionaryIdiomClassification/bin/config/GeneralConfigs.class

Binary file removed.

WiktionaryIdiomClassification/bin/dependency/DependencyParser.class

Binary file removed.

WiktionaryIdiomClassification/bin/dependency/DependencyTest.class

Binary file removed.

WiktionaryIdiomClassification/bin/dependency/GovernerVerbPOSExtractor.class

Binary file removed.

WiktionaryIdiomClassification/bin/dependency/Pair.class

Binary file removed.

WiktionaryIdiomClassification/bin/dependency/ParseDemo.class

Binary file removed.

WiktionaryIdiomClassification/bin/dependency/ParseLine.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/Distance$1.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/Distance.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/WiktionaryOneLevelAntonym.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/WiktionarySynonym.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/WordNetHypernym.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/WordNetHyponym.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/WordNetOneLevelAntonym.class

Binary file removed.

WiktionaryIdiomClassification/bin/distances/WordNetSynonym.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/BasicApply.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/Bootstrapping.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/CompareFeatures.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/CompareGroups.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/Experiment.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/ExperimentResult.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/GridSearch.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/LabelData.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/RunClassifierExperimentFromFiles.class

Binary file removed.

WiktionaryIdiomClassification/bin/experiments/RunExperiment.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/AntonymOverlap.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/AverageCapitals.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/AverageHypernymWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/AverageHyponymWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/AverageSynonymWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/ClosestAntonymWiktionary.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/ClosestAntonymWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/ClosestHypernymWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/ClosestHyponymWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/ClosestSynonymWiktionary.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/ClosestSynonymWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/DifferentSynsetsConnectedByAntonymyWordNet.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/Feature$1.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/Feature.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/LeskSynonymDistance.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/SenseFeatureUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/SimplifiedDefinitionOverlap.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/SynonymOverlap.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/SynonymPhraseDefinitionDistance.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/numeric/dependency/GovernsHeadTFIDF.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/EntryPartOfSpeech.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/Gloss.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/Parse.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/SenseKey.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/Specific.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/StringFeature$1.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/StringFeature.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/Title.class

Binary file removed.

WiktionaryIdiomClassification/bin/features/strings/UncleanedGloss.class

Binary file removed.

WiktionaryIdiomClassification/bin/stats/AnalyzeResults.class

Binary file removed.

WiktionaryIdiomClassification/bin/stats/BasicStats.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/ClassifierDataUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/CommandLineUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/DataUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/MySQLConnection.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/ParseUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/SelectRandomSamplesWithGloss.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/StemUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/Stemmer.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/StopWords.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/WikipediaUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/WiktionaryUtils.class

Binary file removed.

WiktionaryIdiomClassification/bin/utilities/WordNetUtils.class

Binary file removed.

WiktionaryIdiomClassification/config/classifierconfig.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<!-- config.xml -->
-<config>
-	<MySQL>
-		<classify>
-			<database>wiktionary_data_sets</database>
-			<table>sense_data_scaled</table>
-			<column>data_set</column>
-			<testData>dev</testData>
-		</classify>
-	</MySQL>
-
-	<default>
-		<trainPath></trainPath>
-		<testPath></testPath>
-	    <equalDist>false</equalDist>
-	    <percents></percents>
-		<verbose>true</verbose>
-		<features>1, 2, 3, 4, 6, 7, 8, 10, 11</features>
-		<errorBound>.000001</errorBound>
-		<specificsPath>./specifics/specifics.txt</specificsPath>
-		<classifierType>perceptron</classifierType>
-		<precisionRecallGranularity>.01</precisionRecallGranularity>
-		<cleanTest>false</cleanTest>
-		<testCorrectedLabel>true</testCorrectedLabel>
-		<output>
-			<file>./paperOutput/devCorrectedUncleanedGraphCompare</file>
-			<generalInfo>true</generalInfo>
-			<confidenceList>true</confidenceList>
-			<precisionRecallPoints>true</precisionRecallPoints>
-			<randomErrorAnalysis>100</randomErrorAnalysis>
-		</output>
-		<outputModel>
-			<file>./paperOutputModels/</file>
-		</outputModel>
-	</default>
-	
-    
-    <BasicApply>
-    	<learningRate></learningRate>
-		<iterations></iterations>
-		<modelFile>./paperOutputModels/devUncorrectedUncleanedAll.model</modelFile>
-		
-		<shouldOutputGeneral>true</shouldOutputGeneral>
-		<shouldOutputModel>false</shouldOutputModel>
-    </BasicApply>
-    
-     <Bootstrapping>
-     	<iterations>3</iterations>
-		<cutoffMin>.1</cutoffMin>
-		<cutoffMax>.9</cutoffMax>
-		<cutoffIncrement>.1</cutoffIncrement>
-		
-		<modelFile>./outputModels/devCorrectedCleanedGroups.model</modelFile>
-		
-		<shouldOutputGeneral>true</shouldOutputGeneral>
-		<shouldOutputModel>true</shouldOutputModel>
-    </Bootstrapping>
-    
-    <GridSearch>
-    	<group>groups</group>
-    	<iterationMax>100</iterationMax>
-    	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>true</shouldOutputGeneral>
-    	<shouldOutputModel>true</shouldOutputModel>
-    </GridSearch>
-    
-    <CompareFeatures>
-    	<features>6,8,9,10,14</features>
-
-		<buildUp>true</buildUp>
-		<buildByBest>false</buildByBest>
-    	<iterationMax>100</iterationMax>
-    	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>true</shouldOutputGeneral>
-    	<shouldOutputModel>false</shouldOutputModel>
-    </CompareFeatures>
-    
-     <CompareGroups>
-
-    	<iterationMax>100</iterationMax>
-    	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>false</shouldOutputGeneral>
-    	<shouldOutputModel>false</shouldOutputModel>
-    </CompareGroups>
-    
-    <LabelData>
-    		<modelFile>./paperOutputModels/devCorrectedUncleanedAll.model</modelFile>
-    		<precisionBoundary>.529</precisionBoundary>
-    </LabelData>
-</config>

WiktionaryIdiomClassification/config/dbconfig.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<!-- config.xml -->
-<config>
-	<detector>
-		<lookupDb>wiktionary_data_sets</lookupDb>
-		<dbTable>sense_data_scaled</dbTable>
-	</detector>
-	
-	<SenseFeature>
-		<lookupDb>wiktionary_data_sets</lookupDb>
-	</SenseFeature>
-	
-</config>

WiktionaryIdiomClassification/config/nodbconfig.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<!-- config.xml -->
-<config>
-
-	<default>
-		<trainPath>./data/train.txt</trainPath>
-		<testPath>./data/dev_annotated.txt</testPath>
-	    <equalDist>false</equalDist>
-	    <percents></percents>
-		<verbose>true</verbose>
-		<features>1, 2, 3, 4, 6, 7, 8, 10, 11</features>
-		<errorBound>.000001</errorBound>
-		<specificsPath>./specifics/specifics.txt</specificsPath>
-		<classifierType>perceptron</classifierType>
-		<precisionRecallGranularity>.01</precisionRecallGranularity>
-		<cleanTest>false</cleanTest>
-		<output>
-			<file>./test/devCorrectedUncleanedGraphCompareFromFiles</file>
-			<generalInfo>true</generalInfo>
-			<confidenceList>true</confidenceList>
-			<precisionRecallPoints>true</precisionRecallPoints>
-			<randomErrorAnalysis>100</randomErrorAnalysis>
-		</output>
-		<outputModel>
-			<file>./paperOutputModels/</file>
-		</outputModel>
-	</default>
-	
-    
-    <BasicApply>
-    	<learningRate></learningRate>
-		<iterations></iterations>
-		<modelFile>./paperOutputModels/devUncorrectedUncleanedAll.model</modelFile>
-		
-		<shouldOutputGeneral>true</shouldOutputGeneral>
-		<shouldOutputModel>false</shouldOutputModel>
-    </BasicApply>
-    
-     <Bootstrapping>
-     	<iterations>3</iterations>
-		<cutoffMin>.1</cutoffMin>
-		<cutoffMax>.9</cutoffMax>
-		<cutoffIncrement>.1</cutoffIncrement>
-		
-		<modelFile>./outputModels/devCorrectedCleanedGroups.model</modelFile>
-		
-		<shouldOutputGeneral>true</shouldOutputGeneral>
-		<shouldOutputModel>true</shouldOutputModel>
-    </Bootstrapping>
-    
-    <GridSearch>
-    	<group>groups</group>
-    	<iterationMax>100</iterationMax>
-    	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>true</shouldOutputGeneral>
-    	<shouldOutputModel>true</shouldOutputModel>
-    </GridSearch>
-    
-    <CompareFeatures>
-    	<features>6,8,9,10,14</features>
-
-		<buildUp>true</buildUp>
-		<buildByBest>false</buildByBest>
-    	<iterationMax>100</iterationMax>
-    	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>true</shouldOutputGeneral>
-    	<shouldOutputModel>false</shouldOutputModel>
-    </CompareFeatures>
-    
-     <CompareGroups>
-
-    	<iterationMax>100</iterationMax>
-    	<learningDelta>1</learningDelta>
-    	
-    	<shouldOutputGeneral>false</shouldOutputGeneral>
-    	<shouldOutputModel>false</shouldOutputModel>
-    </CompareGroups>
-    
-    <LabelData>
-    	<modelFile>./paperOutputModels/devCorrectedUncleanedAll.model</modelFile>
-    	<precisionBoundary>.529</precisionBoundary>
-    </LabelData>
-</config>

WiktionaryIdiomClassification/lib/ant-1.7.1.jar

Binary file removed.

WiktionaryIdiomClassification/lib/ant-launcher-1.7.1.jar

Binary file removed.

WiktionaryIdiomClassification/lib/commons-configuration-1.9.jar

Binary file removed.

WiktionaryIdiomClassification/lib/commons-lang-2.6.jar

Binary file removed.

WiktionaryIdiomClassification/lib/commons-lang3-3.1.jar

Binary file removed.

WiktionaryIdiomClassification/lib/de.tudarmstadt.ukp.wikipedia.api-0.9.2.jar

Binary file removed.

WiktionaryIdiomClassification/lib/de.tudarmstadt.ukp.wikipedia.datamachine-0.9.2-jar-with-dependencies.jar

Binary file removed.

WiktionaryIdiomClassification/lib/de.tudarmstadt.ukp.wiktionary-0.16.1.jar

Binary file removed.

WiktionaryIdiomClassification/lib/gson-2.2.2.jar

Binary file removed.

WiktionaryIdiomClassification/lib/jaws-bin.jar

Binary file removed.

WiktionaryIdiomClassification/lib/stanford-corenlp-1.3.5.jar

Binary file removed.

WiktionaryIdiomClassification/lib/timestools.jar

Binary file removed.

WiktionaryIdiomClassification/lib/wikokit-20120611.jar

Binary file removed.

WiktionaryIdiomClassification/lib/xercesImpl-2.9.1-lucene.jar

Binary file removed.

WiktionaryIdiomClassification/specifics/specifics.txt

-accounting
-advertising
-agriculture
-algebra
-analytical chemistry
-anatomy
-anthropology
-archaeology
-arithmetic
-astronomy
-automotive
-aviation
-architecture
-baseball
-basketball
-biblical
-bingo
-biology
-botany
-boxing
-broadcasting
-business
-cartography
-chemistry
-Christianity
-computer science
-computing
-construction
-cricket
-cryptography
-cycling
-dentistry
-design
-disease
-ecology
-economics
-electronics
-ethics
-enzyme
-epidemiology
-espionage
-fatty acid
-fantasy
-fashion
-fencing
-film
-finance
-firearms
-firefighting
-fish
-football
-game theory
-galaxy
-genetics
-geography
-geology
-geometry
-golf
-grammar
-graphtheory
-gymnastics
-gynaecology
-hematology
-ice hockey
-ichthyology
-information theory
-Internet
-Islam
-Judaism
-juggling
-legal
-linear algebra
-linguistics
-logic
-magic
-management
-marketing
-mathematics
-mechanics
-medicine
-medical
-meteorology
-military
-mineralogy
-muscle
-music
-mycology
-nautical
-networking
-neurology
-optics
-organic compound
-pathology
-pharmaceutical drug
-pharmacology
-philosophy
-phonetics
-phonology
-photography
-physics
-physiology
-poker
-politics
-printing
-programming
-protein
-psychology
-radio
-rhetoric
-rope
-rugby
-schools
-sports
-soccer
-sociology
-softball
-software
-star
-statistics
-stochastic processes
-sumo
-taxation
-taxonomy
-telecommunications
-television
-tennis
-thermodynamics
-topology
-trademark
-transport
-trigonometry
-typesetting
-typography
-user interface
-video games
-weapons
-wrestling
-zoology

WiktionaryIdiomClassification/src/classifier/model/ClassifierData.java