Commits

Elias Ponvert committed 7edd5c8

adding entropy to stats output - using log_2

Comments (0)

Files changed (3)

java/upparse/model/CombinedProb.java

   }
 
   public void writeStats(PrintStream out, String[] tags, Alpha alpha) {
+    final double log2 = Math.log(2);
     for (int t = 0; t < prob.length; t++) {
       final String tag = tags[t];
       final List<RankedRecord<String>> probRec = new ArrayList<RankedRecord<String>>();
           final String label = "P(" + tag + " => " + word + " " + nextTag + ")";
           final double val = prob[t][nextT][w] * backoffHmm.nonLogTrans(t, nextT);
           final double logProb = Math.log(val);
-          if (val != 0 && !Double.isNaN(val) && !Double.isInfinite(val))
-            sumForEntropy += val * logProb;
+          if (val != 0 && !Double.isNaN(val) && !Double.isInfinite(val)) {
+            final double log2prob = logProb / log2;
+            sumForEntropy += val * log2prob;
+          }
           final RankedRecord<String> rec = new RankedRecord<String>(label, val);
           probRec.add(rec);
         }

java/upparse/model/EmissionProbs.java

 
 
   public void writeStats(final PrintStream out, final Alpha alpha, final String[] tags) {
+    final double log2 = Math.log(2);
     for (int t = 0; t < emiss.length; t++) {
       final String tag = tags[t];
       final List<RankedRecord<String>> emissRec = new ArrayList<RankedRecord<String>>();
         final String label = "P(" + word + "|" + tag + ")";
         final double val = emiss[t][w];
         final double probVal = Math.exp(val);
-        if (!Double.isNaN(val) && !Double.isInfinite(val))
-          sumForEntropy += val * probVal;
+        if (!Double.isNaN(val) && !Double.isInfinite(val)) {
+          final double log2prob = val / log2;
+          sumForEntropy += log2prob * probVal;
+        }
         final RankedRecord<String> rec = new RankedRecord<String>(label, probVal);
         emissRec.add(rec);
       }

java/upparse/model/HMM.java

     final String[] tags = getEncoder().tagNames();
     emiss.writeStats(out, alpha, tags);
     
+    final double log2 = Math.log(2);
     for (int t1 = 0; t1 < trans.length; t1++) {
       final String t1Tag = tags[t1];
       double sumForEntropy = 0;
         final String label = "P(" + t2Tag + "|" + t1Tag + ")";
         final double val = trans[t1][t2];
         final double probVal = Math.exp(val);
-        if (!Double.isNaN(val) && !Double.isInfinite(val))
-          sumForEntropy += val * probVal;
+        if (!Double.isNaN(val) && !Double.isInfinite(val)) {
+          final double log2prob = val / log2;
+          sumForEntropy += log2prob * probVal;
+        }
         final double scaleVal = 100.0 * probVal;
         final String templ = "%s = %.1f\n";
         out.format(templ, label, scaleVal);