# ml-class / ex8.py

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116``` ```#!/usr/bin/env python import numpy as np import matplotlib.pyplot as plt from scipy.io import loadmat from sklearn.covariance import EmpiricalCovariance, MinCovDet from sklearn.metrics import fbeta_score def p1(x, var, mue): return (1/np.sqrt(2*np.pi*var))*(np.e**(-(((x-mue)**2)/2*var))) def p(x, mue, var): total = 1 for xi, vari, muei in zip(x, var, mue): total *= p1(xi, vari, muei) return total def calc_contour(data, fn): xs = np.linspace(data[:,0].min(), data[:,0].max(), 100) ys = np.linspace(data[:,1].min(), data[:,1].max(), 100) z = np.zeros(shape=(len(xs), len(ys))) for x, xv in enumerate(xs): for y, yv in enumerate(ys): z[x, y] = fn(np.array([xv, yv])) return xs, ys, z def anplot(data, fn, use_exps=True): xs, ys, z = calc_contour(data, lambda x: fn(x)) plt.scatter(data[:,0], data[:,1], marker='x') if use_exps: exps = np.arange(-20, -1, 3) fn = np.vectorize(lambda n: 10**n) plt.contour(xs, ys, z, fn(exps)) else: plt.contour(xs, ys, z) plt.grid() plt.show() def anomaly(): data = loadmat('ex8/ex8data1.mat') train = data['X'] mue = train.mean(0) var = train.var(0) fn = lambda x: p(x, mue, var) anplot(train, fn) def anomaly_skl(): data = loadmat('ex8/ex8data1.mat') X = data['X'] #cov = MinCovDet().fit(X) cov = EmpiricalCovariance().fit(X) anplot(X, cov.score, False) def find_threshold(fn): raw = loadmat('ex8/ex8data1.mat') X = raw['Xval'] y = raw['yval'].ravel() dists = np.fromiter((fn(x) for x in X), float) best_f = 0 best_t = 0 for t in np.linspace(dists.min(), dists.max(), 100): preds = (dists > t).astype(int) f = fbeta_score(y, preds, 1.) if f > best_f: best_f = f best_t = t return best_t, best_f def show_threshold(): data = loadmat('ex8/ex8data1.mat') X = data['X'] cov = MinCovDet().fit(X) #cov = EmpiricalCovariance().fit(X) def dist(x): return abs(cov.score(x)) t, f = find_threshold(dist) print('threshold: {}\nfscore: {}'.format(t, f)) fn = cov.score xs, ys, z = calc_contour(X, fn) plt.scatter(X[:,0], X[:,1], marker='x') plt.contour(xs, ys, z) oxs, oys = [], [] for x in X: if abs(cov.score(x)) > t: oxs.append(x[0]) oys.append(x[1]) plt.scatter(oxs, oys, marker='o', color='red') plt.grid() plt.show() if __name__ == '__main__': #anomaly() show_threshold() raw_input() ```