Commits

Slavko Zitnik  committed 9014660

chemdner test: unigram ffs only

  • Participants
  • Parent commits 073f35b

Comments (0)

Files changed (1)

File src/main/java/si/zitnik/research/iobie/algorithms/crf/feature/packages/FeatureFunctionPackages.scala

     //part B - level 1
     if (ffLevel <= 1) {
     featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "UGzG", fileNames = Array("gazeteers/GREEK")))
-    featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzG", fileNames = Array("gazeteers/GREEK")))
+    //featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzG", fileNames = Array("gazeteers/GREEK")))
 
     featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "UGzP", fileNames = Array("gazeteers/PERIODICTABLE")))
-    featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzP", fileNames = Array("gazeteers/PERIODICTABLE")))
+    //featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzP", fileNames = Array("gazeteers/PERIODICTABLE")))
 
     featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "UGzB", fileNames = Array("gazeteers/CHEMICALS/brands.txt")))
-    featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzB", fileNames = Array("gazeteers/CHEMICALS/brands.txt")))
+    //featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzB", fileNames = Array("gazeteers/CHEMICALS/brands.txt")))
 
     featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "UGzI", fileNames = Array("gazeteers/CHEMICALS/iupacNames.txt")))
-    featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzI", fileNames = Array("gazeteers/CHEMICALS/iupacNames.txt")))
+    //featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzI", fileNames = Array("gazeteers/CHEMICALS/iupacNames.txt")))
 
     featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "UGzN", fileNames = Array("gazeteers/CHEMICALS/names.txt")))
-    featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzN", fileNames = Array("gazeteers/CHEMICALS/names.txt")))
+    //featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzN", fileNames = Array("gazeteers/CHEMICALS/names.txt")))
 
     featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "UGzS", fileNames = Array("gazeteers/CHEMICALS/synonyms.txt")))
-    featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzS", fileNames = Array("gazeteers/CHEMICALS/synonyms.txt")))
+    //featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzS", fileNames = Array("gazeteers/CHEMICALS/synonyms.txt")))
 
     featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "UGzC", fileNames = Array("gazeteers/CHEMICALS/CTDBase.txt")))
-    featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzC", fileNames = Array("gazeteers/CHEMICALS/CTDBase.txt")))
+    //featureFunctions.add(new GazeteerFeatureFunction(userPredicate = "BGzC", fileNames = Array("gazeteers/CHEMICALS/CTDBase.txt")))
     }
 
     //part C - level 2
     if (ffLevel <= 2) {
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.LEMMA, range = -1 to 1, userPredicate = "UBLF").generate())
-    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.LEMMA, range = -1 to 1, userPredicate = "BBLF").generate())
+    //featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.LEMMA, range = -1 to 1, userPredicate = "BBLF").generate())
     featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.LEMMA, range = -1 to 1, userPredicate = "UULF").generate())
-    featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.LEMMA, range = -1 to 1, userPredicate = "BULF").generate())
+    //featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.LEMMA, range = -1 to 1, userPredicate = "BULF").generate())
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.CHUNK, range = -1 to 1, userPredicate = "UBCF").generate())
-    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.CHUNK, range = -1 to 1, userPredicate = "BBCF").generate())
+    //featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.CHUNK, range = -1 to 1, userPredicate = "BBCF").generate())
     featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.CHUNK, range = -1 to 1, userPredicate = "UUCF").generate())
-    featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.CHUNK, range = -1 to 1, userPredicate = "BUCF").generate())
+    //featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.CHUNK, range = -1 to 1, userPredicate = "BUCF").generate())
 
-    featureFunctions.addAll(new OffsetFeatureFunctionGenerator(Label.POS, -2 to 2, userPredicate = "BOff").generate())
+    //featureFunctions.addAll(new OffsetFeatureFunctionGenerator(Label.POS, -2 to 2, userPredicate = "BOff").generate())
     featureFunctions.add(new UnigramFeatureFunction(Label.POS, "U=POS"))
-    featureFunctions.add(new UnigramFeatureFunction(Label.POS, "B=POS"))
+    //featureFunctions.add(new UnigramFeatureFunction(Label.POS, "B=POS"))
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.POS, userPredicate = "UPOS").generate())
     }
 
     featureFunctions.add(new ContainsDashFeatureFunction(userPredicate = "UD"))
     featureFunctions.add(new ContainsDotFeatureFunction(userPredicate = "UDo"))
     featureFunctions.add(new ContainsCommaFeatureFunction(userPredicate = "UC"))
-    featureFunctions.add(new StartsUpperFeatureFunction(-1, userPredicate = "BSUm1"))
-    featureFunctions.add(new StartsUpperFeatureFunction(userPredicate = "BSUm1"))
-    featureFunctions.add(new ContainsTwoDigitsFeatureFunction(userPredicate = "BTD"))
-    featureFunctions.add(new ContainsTwoCapsFeatureFunction(userPredicate = "BTC"))
-    featureFunctions.add(new ContainsDashFeatureFunction(userPredicate = "BD"))
-    featureFunctions.add(new ContainsDotFeatureFunction(userPredicate = "BDo"))
-    featureFunctions.add(new ContainsCommaFeatureFunction(userPredicate = "BC"))
-
-    featureFunctions.addAll(new CharacterNGramFeatureFunctionGenerator(userPredicate = "BCNG").generate())
+    //featureFunctions.add(new StartsUpperFeatureFunction(-1, userPredicate = "BSUm1"))
+    //featureFunctions.add(new StartsUpperFeatureFunction(userPredicate = "BSUm1"))
+    //featureFunctions.add(new ContainsTwoDigitsFeatureFunction(userPredicate = "BTD"))
+    //featureFunctions.add(new ContainsTwoCapsFeatureFunction(userPredicate = "BTC"))
+    //featureFunctions.add(new ContainsDashFeatureFunction(userPredicate = "BD"))
+    //featureFunctions.add(new ContainsDotFeatureFunction(userPredicate = "BDo"))
+    //featureFunctions.add(new ContainsCommaFeatureFunction(userPredicate = "BC"))
+
+    //featureFunctions.addAll(new CharacterNGramFeatureFunctionGenerator(userPredicate = "BCNG").generate())
     featureFunctions.addAll(new CharacterNGramFeatureFunctionGenerator(userPredicate = "UCNG").generate())
     }
 
     //part E
     if (ffLevel <= 4) {
-    featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPN", distanceFromMention = Array(1,2,3), mergeTokensToConstituent = true).generate())
+    //featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPN", distanceFromMention = Array(1,2,3), mergeTokensToConstituent = true).generate())
     featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "UPN", distanceFromMention = Array(1,2,3), mergeTokensToConstituent = true).generate())
-    featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPNP", labelType = Label.POS, distanceFromMention = Array(1,2,3), mergeTokensToConstituent = true).generate())
+    //featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPNP", labelType = Label.POS, distanceFromMention = Array(1,2,3), mergeTokensToConstituent = true).generate())
     featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "UPNP", labelType = Label.POS, distanceFromMention = Array(1,2,3), mergeTokensToConstituent = true).generate())
 
-    featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPN1", distanceFromMention = Array(1,2,3), mergeTokensToConstituent = false).generate())
+    //featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPN1", distanceFromMention = Array(1,2,3), mergeTokensToConstituent = false).generate())
     featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "UPN1", distanceFromMention = Array(1,2,3), mergeTokensToConstituent = false).generate())
-    featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPNP1", labelType = Label.POS, distanceFromMention = Array(1,2,3), mergeTokensToConstituent = false).generate())
+    //featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "BPNP1", labelType = Label.POS, distanceFromMention = Array(1,2,3), mergeTokensToConstituent = false).generate())
     featureFunctions.addAll(new PrevNextWordsTokenFeatureFunctionGenerator(userPredicate = "UPNP1", labelType = Label.POS, distanceFromMention = Array(1,2,3), mergeTokensToConstituent = false).generate())
     }