Commits

Jure Žbontar  committed be90a2c

canonical correlation

  • Participants
  • Parent commits cbc622e

Comments (0)

Files changed (3)

 s = np.minimum(np.sum(Y, axis=0), np.sum(1 - Y, axis=0))
 Y = Y[:,s >= 10]
 
-if 1:
+print(X.shape, Y.shape)
+
+method = sys.argv[2]
+if method == 'o':
     def parse(params, argv):
         kwargs = {}
         for p, a in zip(params, argv):
             hyperopt(sys.argv[1], params)
         else:
             kwargs = parse(params, sys.argv[2:])
-            model = mtc.BRFitter(mtc.SKFitter(sklearn.linear_model.LogisticRegression(**kwargs)))
+            kwargs = {}
+            model = mtc.BRFitter(mtc.SKClassifierFitter(sklearn.linear_model.LogisticRegression(**kwargs)))
     elif sys.argv[1] == 'rf':
         params = [
             {'type': 'i', 'name': 'n_estimators', 'trans': lambda i: 2**(i + 6)},
             model = mtc.MLPFitter(**kwargs)
 
     np.random.seed(42)
-    print(Orange.evaluation.cross_validation(model, data, mtc.auc_mt, Orange.evaluation.TTVSplit(n_repeats=3)))
+    print(Orange.evaluation.cross_validation(model, data, mtc.auc_mt, Orange.evaluation.TTVSplit(1)))
 
 
-elif 0:
+if method == 's':
     np.random.seed(42)
 
     import ml_metrics
     model = sklearn.linear_model.LogisticRegression()
-    #model = sklearn.ensemble.RandomForestClassifier(n_estimators=32, criterion='gini', max_features=0.1, random_state=42)
-    #model = sklearn.ensemble.GradientBoostingClassifier()
-    #P = np.zeros_like(Y)
     scores_te, scores_va = [], []
     p_tr = 0.6
     p_te = 0.2
             y = Y[:,j]
             model.fit(X[tr], y[tr])
 
-            if np.unique(y[te]).size == 2:
+
+            try:
                 p = model.predict_proba(X[te])[:,1]
                 scores_te.append(ml_metrics.auc(y[te], p))
+            except ZeroDivisionError:
+                pass
 
-            if np.unique(y[va]).size == 2:
+
+            try:
                 p = model.predict_proba(X[va])[:,1]
                 scores_va.append(ml_metrics.auc(y[va], p))
+            except ZeroDivisionError:
+                pass
+
     print(np.mean(scores_te), np.mean(scores_va))
-
-else:
-    np.random.seed(42)
-
-    import ml_metrics
-    import sklearn
-    import copy
-    import time
-
-    model = mtc.SKFitter(sklearn.linear_model.LogisticRegression())
-
-    scores_te, scores_va = [], []
-    p_tr = 0.6
-    p_te = 0.2
-
-    tt = 0
-
-    mt_domain = Orange.data.Domain.from_numpy(X, Y)
-
-    for i in range(1):
-        inds = np.random.permutation(Y.shape[0])
-        n_tr = int(p_tr * Y.shape[0])
-        n_te = int(p_te * Y.shape[0])
-        tr = inds[:n_tr]
-        te = inds[n_tr:n_tr + n_te]
-        va = inds[n_tr + n_te:]
-        for j in range(Y.shape[1]):
-            y = Y[:,j]
-            t = time.time()
-            domain = Orange.data.Domain(mt_domain.attributes, mt_domain.class_vars[j])
-            data = Orange.data.Table(domain, X, y[:,None])
-            tt += time.time() - t
-            print(tt)
-
-            cls = model(data[tr])
-
-            if np.unique(data.Y[te]).size == 2:
-                p = cls(data[te], cls.Probs)[:,1]
-                scores_te.append(ml_metrics.auc(data.Y[te].flat, p))
-
-    print(np.mean(scores_te))
+import numpy as np
+import scipy.linalg
+import sys
+
+sys.argv.append('r')
+np.random.seed(42)
+
+if sys.argv[1] in {'1', '2', '3', '4'}:
+
+    samples = 100
+    n = 1000
+    m = 35
+    nm = n + m
+
+    cov = np.random.random((nm, nm))
+    cov = (cov + cov.T) / 2 + np.eye(nm)
+
+    data = np.random.multivariate_normal(np.zeros(nm), cov, samples)
+    data -= np.mean(data, axis=0)
+
+    X = data[:,:n]
+    Y = data[:,n:]
+
+    #np.savetxt('data/X', X)
+    #np.savetxt('data/Y', Y)
+
+
+    Sxx = X.T.dot(X) / samples
+    Sxy = X.T.dot(Y) / samples
+    Syx = Y.T.dot(X) / samples
+    Syy = Y.T.dot(Y) / samples
+
+    #Sxx = np.array([[1, .4], [.4, 1]])
+    #Sxy = np.array([[.5, .6], [.3, .4]])
+    #Syx = Sxy.T
+    #Syy = np.array([[1, .2], [.2, 1]])
+
+    Sxx_ = np.linalg.pinv(Sxx + 1e-5 * np.eye(Sxx.shape[0]))
+    Syy_ = np.linalg.pinv(Syy + 1e-5 * np.eye(Syy.shape[0]))
+
+if sys.argv[1] == '1':
+    Sxx_2 = scipy.linalg.sqrtm(Sxx_).real
+
+    u, A = np.linalg.eig(Sxx_2.dot(Sxy).dot(Syy_).dot(Syx).dot(Sxx_2))
+    A = Sxx_2.dot(A.real)
+    B = Syy_.dot(Syx).dot(A)
+
+if sys.argv[1] == '2':
+    _, A = np.linalg.eig(Sxx_.dot(Sxy).dot(Syy_).dot(Syx))
+    A = A.real
+    B = Syy_.dot(Syx).dot(A)
+
+if sys.argv[1] == '3':
+    Sxx_2 = scipy.linalg.sqrtm(Sxx_).real
+    Syy_2 = scipy.linalg.sqrtm(Syy_).real
+
+    U, _, V = np.linalg.svd(Syy_2.dot(Syx).dot(Sxx_2))
+    A = Sxx_2.dot(V.T)
+    B = Syy_2.dot(U)
+
+if sys.argv[1] == '4':
+    Rxx_ = np.linalg.cholesky(Sxx_)
+    Ryy_ = np.linalg.cholesky(Syy_)
+
+    U, d, V = np.linalg.svd(Ryy_.T.dot(Syx).dot(Rxx_))
+    A = Rxx_.dot(V.T)
+    B = Ryy_.dot(U)
+
+if sys.argv[1] == 'r':
+    def center(x, xcenter):
+        if isinstance(xcenter, bool):
+            if xcenter:
+                xcenter = np.mean(x, axis=0)
+            else:
+                xcenter = np.zeros(ncx)
+        x -= xcenter
+        return xcenter
+
+
+    x = np.array([[1,1.1,0.9], [-1,-1.2,-1.2], [2,2.3,2.4], [-2,-2.4,-2.3], [3,3,3], [4,4,4]])
+    y = np.array([[2,2.1],[-1,-1.2],[1,1.3],[-2,-2.4], [3,3], [4,4]])
+    xcenter = True
+    ycenter = True
+
+    assert x.shape[0] == y.shape[0]
+    nr = x.shape[0]
+
+    ncx = x.shape[1]
+    ncy = y.shape[1]
+
+    assert nr != 0 and ncx != 0 and ncy != 0
+
+    xcenter = center(x, xcenter)
+    ycenter = center(y, ycenter)
+
+    qx = np.linalg.qr(x)
+    qy = np.linalg.qr(y)
+
+    dx = np.linalg.matrix_rank(qx)
+    dy = np.linalg.matrix_rank(qy)
+
+    assert dx > 0
+    assert dy > 0
+
+    d = np.zeros((nr, dy))
+
+
+
+
+
+if sys.argv[1] == 'sk':
+    import sklearn.cross_decomposition
+
+    cca = sklearn.cross_decomposition.CCA(n_components=3)
+    cca.fit(X, Y)
+
+    A = cca.x_weights_
+    B = cca.y_weights_
+
+    #print(cca.x_weights_)
+
+#print(A[:,:3])
+
+
+# print A; print; print B
+
+a = A[:,0]
+b = B[:,0]
+
+# print a; print b
+
+s = a.dot(Sxy).dot(b) / np.sqrt(a.dot(Sxx).dot(a) * b.dot(Syy).dot(b))
+print(s)
+import mtc
+import Orange
+import time
+
+import sklearn.linear_model
+import sklearn.cross_validation
+import sklearn.metrics
+
+import numpy as np
+
+data = Orange.data.Table('emotions')
+
+
+cs = np.logspace(-10, 10, 100, base=2)
+
+
+method = 's'
+
+t = time.time()
+if method == 'o':
+    domain = Orange.data.Domain(data.domain.attributes, data.domain.class_vars[0])
+    data = Orange.data.Table(domain, data.X, data.Y[:,1][:,None])
+    for c in cs:
+        fitter = mtc.SKClassifierFitter(sklearn.linear_model.LogisticRegression(C=c))
+        print(c, Orange.evaluation.cross_validation(fitter, data, Orange.evaluation.auc, Orange.evaluation.KFold(10)))
+if method == 's':
+    X, y = data.X, data.Y[:,1]
+    for c in cs:
+        model = sklearn.linear_model.LogisticRegression(C=c)
+        scores = []
+        for tr, te in sklearn.cross_validation.KFold(X.shape[0], 10):
+            model.fit(X[tr], y[tr])
+            p = model.predict_proba(X[te])[:,1]
+            scores.append([sklearn.metrics.roc_auc_score(y[te], p)])
+        print(c, np.mean(scores, axis=0))
+print(time.time() - t)