Commits

Miki Tebeka committed 5fc0db8

multi dim

Comments (0)

Files changed (1)

 from scipy.io import loadmat
 from sklearn.covariance import EmpiricalCovariance, MinCovDet
 from sklearn.metrics import fbeta_score
+from sklearn.decomposition import PCA
 
 
 def p1(x, var, mue):
     anplot(X, cov.score, False)
 
 
-def find_threshold(fn):
-    raw = loadmat('ex8/ex8data1.mat')
+def find_threshold(fn, filename='ex8/ex8data1.mat'):
+    raw = loadmat(filename)
     X = raw['Xval']
     y = raw['yval'].ravel()
 
 
 
 
-def show_threshold():
-
-    data = loadmat('ex8/ex8data1.mat')
+def show_threshold(filename='ex8/ex8data1.mat'):
+    data = loadmat(filename)
     X = data['X']
 
     cov = MinCovDet().fit(X)
     #cov = EmpiricalCovariance().fit(X)
     def dist(x):
         return abs(cov.score(x))
-    t, f = find_threshold(dist)
+    t, f = find_threshold(dist, filename)
     print('threshold: {}\nfscore: {}'.format(t, f))
 
     fn = cov.score
     plt.grid()
     plt.show()
 
+
+def show_threshold2(filename='ex8/ex8data2.mat'):
+    data = loadmat(filename)
+    X = data['X']
+
+    cov = MinCovDet().fit(X)
+    #cov = EmpiricalCovariance().fit(X)
+    def dist(x):
+        return abs(cov.score(x))
+    t, f = find_threshold(dist, filename)
+    print('threshold: {}\nfscore: {}'.format(t, f))
+
+    pca = PCA(2)
+    reduced = pca.fit_transform(X)
+
+    plt.scatter(reduced[:,0], reduced[:,1], marker='x')
+    outliers = []
+    for x in X:
+        if abs(cov.score(x)) > t:
+            outliers.append(x)
+    ored = pca.transform(outliers)
+    plt.scatter(ored[:,0], ored[:,1], marker='o', color='red')
+
+    plt.grid()
+    plt.show()
+
 if __name__ == '__main__':
     #anomaly()
-    show_threshold()
+    show_threshold2()
     raw_input()