# ml-class / ex8.py

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97``` ```#!/usr/bin/env python import numpy as np import matplotlib.pyplot as plt from scipy.io import loadmat from sklearn.covariance import EmpiricalCovariance, MinCovDet from sklearn.metrics import fbeta_score def p1(x, var, mue): return (1/np.sqrt(2*np.pi*var))*(np.e**(-(((x-mue)**2)/2*var))) def p(x, mue, var): total = 1 for xi, vari, muei in zip(x, var, mue): total *= p1(xi, vari, muei) return total def calc_contour(data, fn): xs = np.linspace(data[:,0].min(), data[:,0].max(), 100) ys = np.linspace(data[:,1].min(), data[:,1].max(), 100) z = np.zeros(shape=(len(xs), len(ys))) for x, xv in enumerate(xs): for y, yv in enumerate(ys): z[x, y] = fn(np.array([xv, yv])) return xs, ys, z def anplot(data, fn, use_exps=True): xs, ys, z = calc_contour(data, lambda x: fn(x)) plt.scatter(data[:,0], data[:,1], marker='x') if use_exps: exps = np.arange(-20, -1, 3) fn = np.vectorize(lambda n: 10**n) plt.contour(xs, ys, z, fn(exps)) else: plt.contour(xs, ys, z) plt.grid() plt.show() def anomaly(): data = loadmat('ex8/ex8data1.mat') train = data['X'] mue = train.mean(0) var = train.var(0) fn = lambda x: p(x, mue, var) anplot(train, fn) def anomaly_skl(): data = loadmat('ex8/ex8data1.mat') X = data['X'] #cov = MinCovDet().fit(X) cov = EmpiricalCovariance().fit(X) anplot(X, cov.score, False) def find_threshold(fn): raw = loadmat('ex8/ex8data2.mat') X = raw['Xval'] y = raw['yval'].ravel() dist = np.fromiter((fn(x) for x in X), float) best_f = 0 best_t = 0 for t in np.linspace(dist.min(), dist.max(), 100): fn = np.vectorize(lambda x: 0 if fn(x) < t else 1) f = fbeta_score(y, fn(X), 1.) if f > best_f: best_f = f best_t = t return best_t, best_f if __name__ == '__main__': #anomaly() #raw_input() data = loadmat('ex8/ex8data1.mat') X = data['X'] #cov = MinCovDet().fit(X) cov = EmpiricalCovariance().fit(X) fn = lambda x: abs(cov.score(x)) t, f = find_threshold(fn) print(t, f) ```