Commits

Jure Žbontar committed 7eda1d7

stack

  • Participants
  • Parent commits 0530cab

Comments (0)

Files changed (1)

         return np.column_stack([model.predict(X) for model in self.models])
 
 
+class sklearnLearner(Orange.classification.Fitter):
+    def __init__(self, model):
+        self.supports_multiclass = True
+        self.model = model
+
+    def fit(self, X, Y, W):
+        self.model.fit(X, Y)
+        return sklearnModel(self.model)
+
+class sklearnModel(Orange.classification.Model):
+    def __init__(self, model):
+        self.model = model
+
+    def predict(self, X):
+        return self.model.predict(X)
+
 
 # Multilayer perceptron
 def rectified_linear(x):
 #    print(cls(d))
 
 
+
+
 # Curds & Whey
 class CurdsWheyRegressorLearner(Orange.classification.Fitter):
     def __init__(self, type):
 
         return CurdsWheyRegressor(B.dot(A))
 
-
-class CurdsWheyRegressorLearner2(Orange.classification.Fitter):
-    def __init__(self):
-        self.supports_multiclass = True
-
-    def fit(self, X, Y, W=None):
-        cca = sklearn.cross_decomposition.CCA(n_components=2)
-        cca.fit(X, Y)
-
-        print(cca.x_weights_)
-        print(cca.x_scores_)
-        print(cca.x_loadings_)
-        print(cca.x_rotations_)
-
-        return CurdsWheyRegressor(None)
-
-
-
 class CurdsWheyRegressor(Orange.classification.Model):
     def __init__(self, T):
         self.T = T
     def predict(self, X_te):
         return X_te.dot(self.T.T)
 
+# MT Stacking
+class MTStackLearner(Orange.classification.Fitter):
+    def __init__(self, model, stacker, cv=None):
+        self.supports_multiclass = True
+        self.model = model
+        self.stacker = stacker
+        self.cv = cv
+
+    def fit(self, X, Y, W=None):
+        XX = np.zeros_like(Y)
+        YY = np.zeros_like(Y)
+
+        if self.cv == None:
+            self.cv = sklearn.cross_validation.KFold(X.shape[0], 5)
+
+        for tr, te in self.cv:
+            cls = self.model.fit(X[tr], Y[tr])
+            XX[te] = cls.predict(X[te])
+            YY[te] = Y[te]
+
+        cls = self.model.fit(X, Y)
+        stacker_cls = self.stacker.fit(XX, YY)
+
+        return MTStack(cls, stacker_cls)
+
+
+class MTStack(Orange.classification.Model):
+    def __init__(self, cls, stacker_cls):
+        self.cls = cls
+        self.stacker_cls = stacker_cls
+
+    def predict(self, X):
+        print(self.cls(X))
+        return self.cls(X)
+
+
+
+class MeanRegressorLearner(Orange.classification.Fitter):
+    def __init__(self):
+        self.supports_multiclass = True
+
+    def fit(self, X, Y, W):
+        return MeanRegressor(np.mean(Y, axis=0))
+
+class MeanRegressor(Orange.classification.Model):
+    def __init__(self, m):
+        self.m = m
+
+    def predict(self, X):
+        return np.tile(self.m, (X.shape[0], 1))
+
+def make_regression(n_samples=1000, n_features=10, n_targets=5, noise=1.0):
+    n = n_features + n_targets
+    cov = np.random.random((n, n))
+    cov = (cov + cov.T) / 2
+    np.fill_diagonal(cov, 1)
+
+    d = np.random.multivariate_normal(np.zeros(n), cov, n_samples)
+    X = d[:,:n_features]
+    Y = d[:,n_features:]
+
+
+    #cov = np.random.random((n_features, n_features))
+    #cov = (cov + cov.T) / 2
+    #np.fill_diagonal(cov, 1)
+    #
+    #X = np.random.multivariate_normal(np.zeros(n_features), cov, n_samples)
+    #T = np.random.normal(size=(n_features, n_targets))
+    #Y = np.dot(X, T) + np.random.normal(scale=noise, size=(n_samples, n_targets))
+
+    #print(cov)
+    #print(np.dot(X.T, Y) / n_samples)
+
+    return Orange.data.Table(X, Y)
+
+
+
 if __name__ == '__main__':
+    import sklearn.datasets
+    import sklearn.linear_model
+    import sklearn.ensemble
+
+
+
 
     d = Orange.data.Table('test-pls')
     #d.X = (d.X - np.mean(d.X, axis=0)) / np.std(d.X, axis=0)
 
-    model = CurdsWheyRegressorLearner2()
-    model(d)
-
-    #d = np.loadtxt('/home/jure/tmp/water-quality.arff', skiprows=57, delimiter=',')
+    #d = np.loadtxt('/home/jure/tmp/water-quality-nom.arff', skiprows=57, delimiter=',')
     #X = d[:,:16]
     #X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
-
     #d = Orange.data.Table(X, d[:,16:])
 
+    #X, Y = sklearn.datasets.make_regression(n_samples=1000, n_features=100, n_informative=10, n_targets=5, noise=2.0)
+    #d = Orange.data.Table(X, Y)
 
-    #import sklearn.linear_model
+
+#    d = make_regression(n_samples=1000, n_features=10, n_targets=5)
+#    print(np.mean(d.X, axis=0))
+
+    #import pylab as plt
+    #plt.subplot(211)
+    #plt.plot(d.X[:,0], d.X[:,1], '.')
     #
-    #model = BRsklearnLearner(sklearn.linear_model.LinearRegression())
+    #plt.subplot(212)
+    #plt.plot(d.Y[:,0], d.Y[:,1], '.')
+    #
+    #plt.show()
+
+    #CurdsWheyRegressorLearner2()(d)
+
+
+
+#    import sys
+#    sys.exit()
+
+
+    model = MTStackLearner(
+        model=BRsklearnLearner(sklearn.linear_model.Ridge(alpha=1)),
+        stacker=BRsklearnLearner(sklearn.linear_model.Ridge(alpha=1)),
+    )
+    print(np.mean(cross_val_score(model, d)))
+
+    model = MeanRegressorLearner()
+    print(np.mean(cross_val_score(model, d)))
+
+    model = sklearnLearner(sklearn.cross_decomposition.PLSRegression(n_components=2))
+    print(np.mean(cross_val_score(model, d)))
+
+    #model = BRsklearnLearner(sklearn.ensemble.RandomForestRegressor(n_estimators=100))
     #print(np.mean(cross_val_score(model, d)))
-    #
-    #model = BRsklearnLearner(sklearn.linear_model.Ridge(alpha=1))
-    #print(np.mean(cross_val_score(model, d)))
-    #
-    #model = CurdsWheyRegressorLearner('population')
-    #print(np.mean(cross_val_score(model, d)))
+
+    model = BRsklearnLearner(sklearn.linear_model.LinearRegression())
+    print(np.mean(cross_val_score(model, d)))
+
+    model = BRsklearnLearner(sklearn.linear_model.Ridge(alpha=0.1))
+    print(np.mean(cross_val_score(model, d)))
+
+    model = CurdsWheyRegressorLearner('population')
+    print(np.mean(cross_val_score(model, d)))