1. Jure Žbontar
  2. mtc

Commits

Jure Žbontar  committed e1ec452

refactor, FICYREG

  • Participants
  • Parent commits 67e492c
  • Branches default

Comments (0)

Files changed (2)

File cca.py

View file
 np.random.seed(42)
 
 samples = 100
-n = 200
-m = 35
+n = 10
+m = 5
 nm = n + m
 
 cov = np.random.random((nm, nm))
     B = Syy_.dot(Syx).dot(A)
 
 if sys.argv[1] == '2':
-    _, A = np.linalg.eig(Sxx_.dot(Sxy).dot(Syy_).dot(Syx))
+    c, A = np.linalg.eig(Sxx_.dot(Sxy).dot(Syy_).dot(Syx))
     A = A.real
     B = Syy_.dot(Syx).dot(A)
+    print(np.sqrt(c))
 
 if sys.argv[1] == '3':
     Sxx_2 = scipy.linalg.sqrtm(Sxx_).real
 
     A, B = cancor(X, Y)
 
-    print(A)
-    print(B)
-
-
 if sys.argv[1] == 'r2':
     def geigen(Amat, Bmat, Cmat):
         '''matlab geigen function'''
         Cfacinv = np.linalg.inv(Cfac)
         Dmat = Bfacinv.T.dot(Amat).dot(Cfacinv)
         if p >= q:
-            u, values, v = np.linalg.svd(Dmat, full_matrices=False)
+            u, values, v = np.linalg.svd(Dmat)
             Lmat = Bfacinv.dot(u)
+            Lmatinv = u.T.dot(Bfac)
             Mmat = Cfacinv.dot(v.T)
+            Mmatinv = v.dot(Cfac)
         else:
-            u, values, v = np.linalg.svd(Dmat.T, full_matrices=False)
+            u, values, v = np.linalg.svd(Dmat.T)
             Lmat = Bfacinv.dot(v.T)
+            Lmatinv = v.dot(Bfac)
             Mmat = Cfacinv.dot(u)
-        return Lmat, Mmat
+            Mmatinv = u.T.dot(Cfac)
+
+        return Lmat, Lmatinv, Mmat, Mmatinv, values
 
     def rcc(X, Y, lambda1, lambda2):
         '''R package cca function'''
         Cxy = X.T.dot(Y) / (X.shape[0] - 1)
         return geigen(Cxy, Cxx, Cyy)
 
-    A, B = rcc(X, Y, 0.1, 0.1)
+    A, Ainv, B, Binv, values = rcc(X, Y, 0, 0)
+
+    assert np.allclose(A.dot(Ainv), np.eye(A.shape[0]))
+    assert np.allclose(B.dot(Binv), np.eye(B.shape[0]))
 
 
 if sys.argv[1] == 'sk':

File mtc.py

View file
 
 
 # Curds & Whey #
+def curds_whey_fit(X, Y, type='population', rank=0, lambda1=0, lambda2=0, fitter=SKFitter(sklearn.linear_model.LinearRegression(), supports_multiclass=True, regressor=True)):
+    _, _, Rmat, Rmatinv, c = rcc(X, Y, lambda1, lambda2)
+    assert np.allclose(Rmat.dot(Rmatinv), np.eye(Rmat.shape[0]))
+
+    N, p = X.shape
+    q = Y.shape[1]
+    r = p / N
+    c2 = c**2
+    if type == 'population':
+        d = c2 / (c2 + r * (1 - c2))
+    elif type == 'gcv':
+        d = (1 - r) * (c2 - r) / ((1 - r)**2 * c2 + r**2 * (1 - c2))
+    elif type == 'reduced_rank':
+        d = (np.arange(Y.shape[1]) < rank)
+    elif type == 'ficyreg':
+        t = (p - q - 1) / N
+        d = (c2 - t) / (c2 * (1 - t))
+    d[d < 0] = 0
+
+    # TODO: optimize -- manually build domain
+    data = Orange.data.Table(X, Y.dot(Rmat))
+    model = fitter(data)
+    return model, Rmatinv, d
+
+def curds_whey_predict(X, model, Rmatinv, d):
+    return (model(X) * d).dot(Rmatinv)
+
+class CurdsWhey2ClassifierFitter(Orange.classification.Fitter):
+    def __init__(self, type='population', lambda1=0, lambda2=0):
+        self.supports_multiclass = True
+        self.type = type
+        self.lambda1 = lambda1
+        self.lambda2 = lambda2
+
+    def fit(self, X, Y, W):
+        args = curds_whey_fit(X, Y, type=self.type, lambda1=self.lambda1, lambda2=self.lambda2)
+        return CurdsWhey2ClassifierModel(args)
+
+class CurdsWhey2ClassifierModel(Orange.classification.Model):
+    def __init__(self, args):
+        self.args = args
+
+    def predict(self, X):
+        P = np.clip(curds_whey_predict(X, *self.args), 0, 1)
+        return np.dstack((1 - P, P))
+
+
 class CurdsWheyClassifierFitter(Orange.classification.Fitter):
     def __init__(self, type='population'):
         self.supports_multiclass = True
     Cfacinv = np.linalg.inv(Cfac)
     Dmat = Bfacinv.T.dot(Amat).dot(Cfacinv)
     if p >= q:
-        u, values, v = np.linalg.svd(Dmat, full_matrices=False)
+        u, values, v = np.linalg.svd(Dmat)
         Lmat = Bfacinv.dot(u)
+        Lmatinv = u.T.dot(Bfac)
         Mmat = Cfacinv.dot(v.T)
+        Mmatinv = v.dot(Cfac)
     else:
-        u, values, v = np.linalg.svd(Dmat.T, full_matrices=False)
+        u, values, v = np.linalg.svd(Dmat.T)
         Lmat = Bfacinv.dot(v.T)
+        Lmatinv = v.dot(Bfac)
         Mmat = Cfacinv.dot(u)
-    return Lmat, Mmat
+        Mmatinv = u.T.dot(Cfac)
+    return Lmat, Lmatinv, Mmat, Mmatinv, values
 
 def rcc(X, Y, lambda1, lambda2):
     '''R package cca function'''
     Cxx = X.T.dot(X) / (X.shape[0] - 1) + lambda1 * np.eye(X.shape[1])
     Cyy = Y.T.dot(Y) / (X.shape[0] - 1) + lambda2 * np.eye(Y.shape[1])
     Cxy = X.T.dot(Y) / (X.shape[0] - 1)
-    Lmat, Rmat = geigen(Cxy, Cxx, Cyy)
-    return Lmat, Rmat
+    return geigen(Cxy, Cxx, Cyy)
 
 # Reduced-rank regression #
 class ReducedRankClassifierFitter(Orange.classification.Fitter):
-    def __init__(self, rank=2, lambda1=0, lambda2=0,
-                 fitter=SKFitter(sklearn.linear_model.Ridge(), regressor=True, supports_multiclass=True)):
+    def __init__(self, rank=2, lambda1=0, lambda2=0):
         self.supports_multiclass = True
         self.rank = rank
         self.lambda1 = lambda1
         self.lambda2 = lambda2
-        self.fitter = fitter
 
     def fit(self, X, Y, W):
-        _, U = rcc(X, Y, self.lambda1, self.lambda2)
-
-        U_rank = U[:,:self.rank]
-        U_rankinv = np.linalg.pinv(U_rank)
-
-        class_vars = [Orange.data.variable.ContinuousVariable("c{}".format(i)) for i in range(U_rank.shape[1])]
-        domain = Orange.data.Domain(self.domain.attributes, class_vars)
-        data = Orange.data.Table(domain, X, Y.dot(U_rank))
-
-        model = self.fitter(data)
-
-        return ReducedRankClassifierModel(model, U_rankinv)
-
+        args = curds_whey_fit(X, Y, type='reduced_rank', rank=self.rank, lambda1=self.lambda1, lambda2=self.lambda2)
+        return ReducedRankClassifierModel(args)
 
 class ReducedRankClassifierModel(Orange.classification.Model):
-    def __init__(self, model, U_rankinv):
-        self.model = model
-        self.U_rankinv = U_rankinv
-
+    def __init__(self, args):
+        self.args = args
 
     def predict(self, X):
-        P = np.clip(self.model(X).dot(self.U_rankinv), 0, 1)
+        P = np.clip(curds_whey_predict(X, *self.args), 0, 1)
         return np.dstack((1 - P, P))
 
+# Filtered Canonical y-variate Regression #
+class FICYREGClassifierFitter(Orange.classification.Fitter):
+    def __init__(self, lambda1=0, lambda2=0):
+        self.supports_multiclass = True
+        self.lambda1 = lambda1
+        self.lambda2 = lambda2
+
+    def fit(self, X, Y, W):
+        args = curds_whey_fit(X, Y, type='ficyreg', lambda1=self.lambda1, lambda2=self.lambda2)
+        return FICYREGClassifierModel(args)
+
+class FICYREGClassifierModel(Orange.classification.Model):
+    def __init__(self, args):
+        self.args = args
+
+    def predict(self, X):
+        P = np.clip(curds_whey_predict(X, *self.args), 0, 1)
+        return np.dstack((1 - P, P))
+
+
 
 if __name__ == '__main__':
     import sklearn.linear_model
     #print(Orange.evaluation.cross_validation(model, data, Orange.evaluation.CA, Orange.evaluation.KFold()))
 
     data = Orange.data.Table('emotions')
+
+
+
+    #cls = model(data)
+    #print(cls(data, cls.Probs)[:,:,1])
+
+
     #model = BRFitter(SKFitter(sklearn.linear_model.Ridge()))
     #model = PLSClassifierFitter(n_components=2)
     #model = CurdsWheyClassifierFitter()
+    #model = CurdsWhey2ClassifierFitter(lambda1=0.1)
+    #model = ReducedRankClassifierFitter(rank=5, lambda1=0.1, lambda2=0.1)
+    #model = FICYREGClassifierFitter()
+
 
     #model = MTStackFitter(
     #    BRFitter(SKFitter(sklearn.linear_model.LogisticRegression())),
     #    BRFitter(SKFitter(sklearn.linear_model.LogisticRegression()))
     #)
 
-    model = ReducedRankClassifierFitter(rank=5)
-    #model = BRFitter(SKClassifierFitter(sklearn.svm.SVC(probability=True)))
     print(Orange.evaluation.cross_validation(model, data, auc_mt, Orange.evaluation.TTVSplit(n_repeats=5)))
 
     #model = sklearn.linear_model.LogisticRegression()