Commits

Miki Tebeka  committed 9070dfc

start of threshold

  • Participants
  • Parent commits 81683c3

Comments (0)

Files changed (1)

 import matplotlib.pyplot as plt
 from scipy.io import loadmat
 from sklearn.covariance import EmpiricalCovariance, MinCovDet
+from sklearn.metrics import fbeta_score
 
 
 def p1(x, var, mue):
 
     return xs, ys, z
 
-def anplot(data, fn):
+
+def anplot(data, fn, use_exps=True):
     xs, ys, z = calc_contour(data, lambda x: fn(x))
 
     plt.scatter(data[:,0], data[:,1], marker='x')
-    exps = np.arange(-20, -1, 3)
-    fn = np.vectorize(lambda n: 10**n)
-    plt.contour(xs, ys, z) #, fn(exps))
+    if use_exps:
+        exps = np.arange(-20, -1, 3)
+        fn = np.vectorize(lambda n: 10**n)
+        plt.contour(xs, ys, z, fn(exps))
+    else:
+        plt.contour(xs, ys, z)
+
     plt.grid()
     plt.show()
 
     data = loadmat('ex8/ex8data1.mat')
     X = data['X']
 
-    cov = MinCovDet().fit(X)
-    fn = lambda x: cov.mahalanobis([x])
-    anplot(X, cov.score)
+    #cov = MinCovDet().fit(X)
+    cov = EmpiricalCovariance().fit(X)
+    anplot(X, cov.score, False)
+
+
+def find_threshold(fn):
+    raw = loadmat('ex8/ex8data2.mat')
+    X = raw['Xval']
+    y = raw['yval'].ravel()
+
+    dist = np.fromiter((fn(x) for x in X), float)
+
+    best_f = 0
+    best_t = 0
+    for t in np.linspace(dist.min(), dist.max(), 100):
+        fn = np.vectorize(lambda x: 0 if fn(x) < t else 1)
+        f = fbeta_score(y, fn(X), 1.)
+        if f > best_f:
+            best_f = f
+            best_t = t
+
+    return best_t, best_f
+
 
 
 if __name__ == '__main__':
-    anomaly()
-    raw_input()
+    #anomaly()
+    #raw_input()
+
+    data = loadmat('ex8/ex8data1.mat')
+    X = data['X']
+
+    #cov = MinCovDet().fit(X)
+    cov = EmpiricalCovariance().fit(X)
+    fn = lambda x: abs(cov.score(x))
+    t, f = find_threshold(fn)
+    print(t, f)