1. Jure Žbontar
  2. mtc

Commits

Jure Žbontar  committed 47ad525

foo

  • Participants
  • Parent commits 9bb4a7d
  • Branches default

Comments (0)

Files changed (4)

File blaz.py

View file
  • Ignore whitespace
+#! /usr/bin/env python
+
+import mtc
+import numpy as np
+import sys
+
+import Orange.evaluation
+
+import sklearn.linear_model
+import sklearn.svm
+import sklearn.ensemble
+
+from hyperopt import hyperopt
+
+
+data = sys.argv.pop(1)
+
+if data == '1':
+    X = np.loadtxt('data/D1-2/100-genes-X.tab')
+    Y = np.loadtxt('data/D1-2/100-genes-Y.tab')
+
+# remove classes with few positive or negative examples
+s = np.minimum(np.sum(Y, axis=0), np.sum(1 - Y, axis=0))
+Y = Y[:,s >= 10]
+
+data = Orange.data.Table(X, Y)
+if sys.argv[1] == 'lr':
+    params = [
+        {'type': 'c', 'categories': ('l2', 'l1')},  # penalty
+        {'type': 'c', 'categories': (0, 1)},  # dual
+        {'type': 'f', 'trans': lambda i: 2**i},  # C
+        {'type': 'c', 'categories': (1, 0)},  # fit_intercept
+        {'type': 'f', 'trans': lambda i: 3**i},  # intercept_scaling
+        {'type': 'f', 'trans': lambda i: 2**(i - 10)},  # tol
+    ]
+    hyperopt('lr_sk', params)
+
+    model = mtc.BRsklearnLearner(sklearn.linear_model.LogisticRegression())
+elif sys.argv[1] == 'rf':
+    if sys.argv[2] == 'hyperopt':
+        params = [
+            {'name': 'n_estimators', 'type': 'i', 'trans': lambda i: 2**(i + 6)}, # n_estimators
+            {'type': 'c', 'categories': ('gini', 'entropy')}, # criterion
+            {'type': 'f', 'trans': lambda i: i * 0.02 + 0.1}, # max_features
+        ]
+        hyperopt(sys.argv[1], params)
+
+    else:
+        n_estimators = int(sys.argv[2])
+        criterion = sys.argv[3]
+        max_features = float(sys.argv[4])
+
+        model = mtc.BRsklearnLearner(sklearn.ensemble.RandomForestClassifier(
+            n_estimators=n_estimators,
+            criterion=criterion,
+            max_features=max_features,
+        ))
+elif sys.argv[1] == 'gbm':
+    model = mtc.BRsklearnLearner(sklearn.ensemble.GradientBoostingClassifier())
+elif sys.argv[1] == 'svm':
+    pass
+cv = Orange.evaluation.CrossValidation(data, model)
+print(1, mtc.mt_average_score(Orange.evaluation.AUC_binary, data, cv.KFold(5)[1]))
+
+
+#import ml_metrics
+##model = sklearn.linear_model.LogisticRegression()
+#model = sklearn.ensemble.RandomForestClassifier(n_estimators=100, criterion='gini', max_features=0.1)
+##model = sklearn.ensemble.GradientBoostingClassifier()
+#P = np.zeros_like(Y)
+#fold = 0
+#for tr, te in sklearn.cross_validation.KFold(X.shape[0], 5):
+#    for j in range(Y.shape[1]):
+#        y = Y[:,j]
+#        model.fit(X[tr], y[tr])
+#        P[te,j] = model.predict_proba(X[te])[:,1]
+#    fold += 1
+#
+#scores = []
+#for j in range(Y.shape[1]):
+#    scores.append(ml_metrics.auc(Y[:,j], P[:,j]))
+#print(np.mean(scores))

File hyperopt.py

View file
  • Ignore whitespace
+import subprocess
+import sys
+import shelve
+import signal
+import time
+
+import numpy as np
+#import matplotlib.pyplot as plt
+
+class NN:
+    def fit(self, X, y):
+        self.X = X
+        self.y = y
+
+    def predict(self, X_te):
+        y_te = []
+        for x in X_te:
+            d = np.sum((self.X - x)**2, axis=1)
+            y_te.append(np.mean(self.y[d <= np.min(d)]))
+        return np.array(y_te)
+
+def hyperopt(args, params):
+    score_model = NN()
+    def handler(signum, frame):
+        print('\nBEST:', end=' ')
+        eval(args, x2params(x_best))
+        sys.exit()
+    signal.signal(signal.SIGINT, handler)
+
+    def x2params(xs):
+        ys = []
+        for x, p in zip(xs, params):
+            if p['type'] == 'c':
+                ys.append(p['categories'][int(x)])
+            elif p['type'] == 'i':
+                ys.append(int(p['trans'](x)))
+            elif p['type'] == 'f':
+                ys.append(round(p['trans'](x), 12))
+            else:
+                assert False
+        return ys
+
+    def X2dataset(X):
+        X = np.array(X)
+        cols = []
+        for j, p in enumerate(params):
+            if p['type'] == 'c' and len(p['categories']) > 2:
+                cols.append(np.eye(len(p['categories']))[X[:,j]])
+            elif p['type'] in {'i', 'f'} or \
+              (p['type'] == 'c' and len(p['categories']) == 2):
+                cols.append(X[:,j])
+            else:
+                assert False
+        return np.column_stack(cols)
+        
+    # init params
+    X = [[0] * len(params)]
+    for i, param in enumerate(params):
+        if param['type'] == 'c':
+            for j in range(1, len(param['categories'])):
+                x = list(X[0])
+                x[i] = j
+                X.append(x)
+        elif param['type'] in {'i', 'f'}:
+            x = list(X[0])
+            x[i] = 1
+            X.append(x)
+        else:
+            assert False
+    y_score = np.array([eval(args, x2params(x))['va'] for x in X])
+
+    visited = set(map(tuple, X))
+    X_stack = []
+    y_stack = []
+    while True:
+        # candidates
+        candidates = set()
+        for x in X:
+            for j, p in enumerate(params):
+                if p['type'] == 'c':
+                    for i in range(len(p['categories'])):
+                        n = list(x)
+                        n[j] = i
+                        candidates.add(tuple(n))
+                elif p['type'] in {'f', 'i'}:
+                    n1 = list(x)
+                    n2 = list(x)
+                    n1[j] = n1[j] + 1
+                    n2[j] = n2[j] - 1
+                    candidates.add(tuple(n1))
+                    candidates.add(tuple(n2))
+                else:
+                    assert False
+
+        candidates = np.array(list(candidates - visited))
+        np.random.shuffle(candidates)
+        
+        X_te = X2dataset(candidates)
+        X_tr = X2dataset(X)
+        score_model.fit(X_tr, y_score)
+        score = score_model.predict(X_te)
+        x_best = X[np.argmax(y_score)]
+
+        ind = np.argsort(-score)
+        for i in ind:
+            visited.add(tuple(candidates[i]))
+            ps = x2params(candidates[i])
+
+            valid = True
+            for p, param in zip(ps, params):
+                if ('max' in param and p > param['max']):
+                    valid = False
+            if not valid:
+                continue
+                    
+            obj = eval(args, ps)
+            if obj is not None:
+                break
+
+        # append new result
+        X.append(candidates[i])
+        y_score = np.append(y_score, obj['va'])
+
+
+
+eval_cache = shelve.open('pkl/eval_{}.pkl'.format('_'.join(sys.argv[1:])))
+method_args = sys.argv[1:3]
+def eval(method, params):
+    params_key = repr(params)
+    if params_key not in eval_cache:
+        try:
+            t = time.time()
+            o = subprocess.check_output(['./blaz.py'] + method_args + list(map(str, params)), stderr=subprocess.PIPE).decode()
+            te, va = list(map(float, o.replace('[', '').replace(']', '').split()))
+            eval_cache[params_key] = {'te': te, 'va': va, 'time': time.time() - t, 'params': params}
+        except subprocess.CalledProcessError:
+            eval_cache[params_key] = None
+    obj = eval_cache[params_key]
+    if obj:
+        print(obj)
+    return obj
+
+"""
+if sys.argv[1] == 'lr_sk':
+    params = [
+        {'type': 'c', 'categories': ('l2', 'l1')},  # penalty
+        {'type': 'c', 'categories': (0, 1)},  # dual
+        {'type': 'f', 'trans': lambda i: 2**i},  # C
+        {'type': 'c', 'categories': (1, 0)},  # fit_intercept
+        {'type': 'f', 'trans': lambda i: 3**i},  # intercept_scaling
+        {'type': 'f', 'trans': lambda i: 2**(i - 10)},  # tol
+    ]
+    hyperopt('lr_sk', params)
+
+if sys.argv[1] == 'rf_sk':
+    params = [
+        {'name': 'n_estimators', 'type': 'i', 'trans': lambda i: 2**(i + 7)}, # n_estimators
+        {'type': 'c', 'categories': ('gini', 'entropy')}, # criterion
+        {'type': 'f', 'trans': lambda i: i * 0.02 + 0.1}, # max_features
+    ]
+    hyperopt('sk linear_model.LogisticRegression', params)
+
+if sys.argv[1] == 'svm_sk':
+    params = [
+        {'type': 'f', 'trans': lambda i: 2**(3 * i - 5)},  # g
+        {'type': 'f', 'trans': lambda i: 2**(3 * i + 5)},  # c
+    ]
+    hyperopt('svm_sk', params)
+
+if sys.argv[1] == 'mlp_bfgs':
+    params = [
+        {'type': 'i', 'trans': lambda i: 2**(0.5 * i + 4)}, # num_hidden
+        {'type': 'f', 'trans': lambda i: 2**i}, # lambda_
+        {'type': 'i', 'trans': lambda i: 2**(i + 8)}, # maxfun
+    ]
+    hyperopt('mlp_bfgs', params)
+
+if sys.argv[1] == 'gbm_sk':
+    params = [
+        {'type': 'f', 'trans': lambda i: 2**(i - 3)},  # learning_rate
+        {'type': 'i', 'trans': lambda i: 2**(i + 8)},  # n_estimators
+        {'type': 'f', 'trans': lambda i: 0.1 * i + 0.5},  # subsample
+        {'type': 'i', 'trans': lambda i: i + 3},  # max_depth
+        {'type': 'f', 'trans': lambda i: i * 0.02 + 0.1}, # max_features
+    ]
+    hyperopt('gbm_sk', params)
+
+
+if sys.argv[1] == 'mlp_sgd':
+    params = [
+        {'type': 'i', 'trans': lambda i: 2**(0.5 * i + 7)}, # num_hidden
+        {'type': 'f', 'trans': lambda i: 0.2 * i + 0.5}, # dropout
+        {'type': 'f', 'trans': lambda i: 2**(i - 10)}, # lambda_
+        {'type': 'f', 'trans': lambda i: 2**(i - 3)}, # learning_rate
+        {'type': 'i', 'trans': lambda i: 2**(i + 9)}, # iterations
+        {'type': 'f', 'trans': lambda i: 10**(i - 2)}, # scale
+        {'type': 'i', 'trans': lambda i: 2**(i + 3)}, # batch_size
+    ]
+    hyperopt('mlp_sgd', params)
+
+if sys.argv[1] == 'ada_sk':
+    params = [
+        {'type': 'i', 'trans': lambda i: 2**(i + 8), 'max': 2**12}, # n_estimators
+        {'type': 'f', 'trans': lambda i: 2**(i - 1)}, # learning_rate
+        {'type': 'i', 'trans': lambda i: i + 3},  # max_depth
+        {'type': 'c', 'categories': ('SAMME.R', 'SAMME')},  # algorithm
+    ]
+    hyperopt('ada_sk', params)
+
+if sys.argv[1] == 'ert_sk':
+    params = [
+        {'type': 'i', 'trans': lambda i: 2**(i + 7)}, # n_estimators
+        {'type': 'c', 'categories': ('gini', 'entropy')}, # criterion
+        {'type': 'f', 'trans': lambda i: i * 0.02 + 0.1}, # max_features
+    ]
+    hyperopt('ert_sk', params)
+"""

File main.py

  • Ignore whitespace
Empty file added.

File mtc.py

View file
  • Ignore whitespace
-import theano
-import theano.tensor as T
+import mtc
+
+import Orange.data
+import Orange.classification
+import Orange.evaluation
+
+import copy
 
 import numpy as np
 
-import Orange.classification
+def mt_average_score(metric):
+    def f(Y, Y_hat):
+        scores = []
+        for j in range(Y.shape[1]):
+            d = Orange.data.Table(data.X, data.Y[:,j][:,None])
+            scores.append(metric(Y[:,j], (Y_hat[0][:,j], Y_hat[1][:,j])))
+        return np.mean(scores)
+    return f
 
-import sklearn.base
-import sklearn.cross_validation
-import sklearn.cross_decomposition
+ca_mt = mt_average_score(Orange.evaluation.ca)
+auc_mt = mt_average_score(Orange.evaluation.auc)
 
+class BRFitter(Orange.classification.Fitter):
+    def __init__(self, learner):
+        self.supports_multiclass = True
+        self.learner = learner
 
-### evaluation ###
-def mse(Y, P):
-    return np.mean((Y - P)**2)
-
-def cross_val_score(model, data, scoring=mse, cv=None):
-    if cv is None:
-        cv = sklearn.cross_validation.KFold(data.X.shape[0], n_folds=5)
-    scores = []
-    for tr, te in cv:
-        cls = model(data[tr])
-        scores.append(scoring(data.Y[te], cls(data[te])))
-    return np.array(scores)
-
-
-### methods ###
-# Adapter
-class BRsklearnLearner(Orange.classification.Fitter):
-    def __init__(self, model):
-        self.supports_multiclass = True
-        self.model = model
-
-    def fit(self, X, Y, W=None):
+    def fit(self, X, Y, W):
         models = []
         for j in range(Y.shape[1]):
-            m = sklearn.base.clone(self.model)
-            m.fit(X, Y[:,j])
-            models.append(m)
-        return BRsklearn(models)
+            m = copy.deepcopy(self.learner)
+            data = Orange.data.Table(X, Y[:,j][:,None])
+            models.append(m(data))
+        return BRModel(models)
 
-class BRsklearn(Orange.classification.Model):
+class BRModel(Orange.classification.Model):
     def __init__(self, models):
         self.models = models
 
     def predict(self, X):
-        return np.column_stack([model.predict(X) for model in self.models])
+        max_card = max(len(c.values) for c in self.domain.class_vars)
+        V = np.zeros((X.shape[0], len(self.domain.class_vars)))
+        P = np.zeros((X.shape[0], len(self.domain.class_vars), max_card))
+        for j, model in enumerate(self.models):
+            V[:,j], P[:,j,:] = model(X, self.ValueProbs)
+        return V, P
 
-
-class sklearnLearner(Orange.classification.Fitter):
+class SKFitter(Orange.classification.Fitter):
     def __init__(self, model):
-        #self.supports_multiclass = True
         self.model = model
 
     def fit(self, X, Y, W):
         self.model.fit(X, Y.ravel())
-        return sklearnModel(self.model)
+        return SKModel(self.model)
 
-class sklearnModel(Orange.classification.Model):
+class SKModel(Orange.classification.Model):
     def __init__(self, model):
         self.model = model
 
         return self.model.predict_proba(X)
 
 
-# Multilayer perceptron
-def rectified_linear(x):
-    return T.maximum(0.0, x)
+if __name__ == '__main__':
+    import sklearn.linear_model
+    import sklearn.cross_validation
 
-class NeuralNetwork:
-    def __init__(self, input, scale, dropout=None):
-        self.output = self.output_test = input
-        self.scale = scale
-        self.srng = T.shared_randomstreams.RandomStreams(seed=42)
-        self.params = []
-        self.params_init = []
-        self.L2 = 0
+    #data = Orange.data.Table('iris')
+    #data = Orange.data.Table(data.X, data.Y == 0)
+    #model = SKLearner(sklearn.linear_model.LogisticRegression())
+    #print(Orange.evaluation.cross_validation(model, data, Orange.evaluation.CA, Orange.evaluation.KFold()))
 
-        if dropout is not None:
-            self.output *= self.srng.binomial(p=dropout, size=self.output.shape)
-            self.output_test *= dropout
+    data = Orange.data.Table('emotions')
+    model = BRFitter(SKFitter(sklearn.linear_model.LogisticRegression()))
+    print(Orange.evaluation.cross_validation(model, data, auc_mt, Orange.evaluation.TTVSplit()))
 
-    def full(self, n_in, n_out, dropout, activation):
-        W_init = np.random.normal(scale=self.scale, size=(n_in, n_out))
-        b_init = np.zeros(n_out)
-        self.params_init.extend([W_init, b_init])
 
-        W = theano.shared(W_init, borrow=True)
-        b = theano.shared(b_init, borrow=True)
-        self.params.extend([W, b])
+    model = sklearn.linear_model.LogisticRegression()
+    X = data.X
+    Y = data.Y
+    scores = []
+    for tr, te in sklearn.cross_validation.KFold(X.shape[0], 5):
+        for j in range(Y.shape[1]):
+            y = Y[:,j]
+            model.fit(X[tr], y[tr])
+            scores.append(sklearn.metrics.roc_auc_score(y[te],model.predict(X[te])))
+    print(np.mean(scores))
 
-        self.L2 += (W**2).sum()
 
-        self.output = activation(self.output.dot(W) + b)
-        self.output_test = activation(self.output_test.dot(W) + b)
-        if dropout is not None:
-            self.output *= self.srng.binomial(p=dropout, size=self.output.shape)
-            self.output_test *= dropout
-
-class MLPLearner(Orange.classification.Fitter):
-    def __init__(self, layers, dropout, L2_reg, learning_rate, iterations, scale, batch_size):
-        self.supports_multiclass = True
-        self.iterations = iterations
-        self.batch_size = batch_size
-
-        x = T.matrix()
-        y = T.matrix()
-
-        self.model = NeuralNetwork(input=x, scale=scale, dropout=dropout[0])
-
-        for prev, next, drop in zip(layers, layers[1:], dropout[1:]):
-            self.model.full(prev, next, drop, T.nnet.sigmoid)
-
-        out_clipped = T.clip(self.model.output, 1e-15, 1 - 1e-15)
-        cost = T.mean(T.nnet.binary_crossentropy(out_clipped, y)) + L2_reg * self.model.L2 / x.shape[0]
-
-        updates = []
-        for p in self.model.params:
-            updates.append((p, p - learning_rate * T.grad(cost, p)))
-        self.train_model = theano.function(inputs=[x, y], updates=updates)
-        self.get_output = theano.function(inputs=[x], outputs=self.model.output_test)
-
-    def fit(self, X_tr, y_tr, W):
-        # reset params
-        for p, v in zip(self.model.params, self.model.params_init):
-            p.set_value(v)
-
-        epoch = 0
-        while epoch < self.iterations:
-            epoch += 1
-            for i in range(0, X_tr.shape[0] - self.batch_size + 1, self.batch_size):
-                self.train_model(X_tr[i:i + self.batch_size], y_tr[i:i + self.batch_size])
-        return MLPClassifier(self.get_output)
-
-
-class MLPClassifier(Orange.classification.Model):
-    def __init__(self, get_output):
-        self.get_output = get_output
-
-    def predict(self, X_te):
-        return self.get_output(X_te)
-
-
-#if __name__ == '__main__':
-#    d = Orange.data.Table('test-pls')
-#
-#    l = MLPLearner(
-#        layers=[d.X.shape[1], 20, d.Y.shape[1]],
-#        dropout=[0.8, 0.5, None],
-#        L2_reg=0.0001,
-#        learning_rate=0.1,
-#        iterations=100,
-#        scale=0.1,
-#        batch_size=10)
-#    cls = l(d)
-#    print(cls(d))
-
-
-
-
-# Curds & Whey
-class CurdsWheyRegressorLearner(Orange.classification.Fitter):
-    def __init__(self, type):
-        self.supports_multiclass = True
-        self.type = type
-
-    def fit(self, X, Y, W=None):
-        N, p = X.shape
-        r = float(p) / N
-
-        YY_ = np.linalg.inv(Y.T.dot(Y))
-        XX_ = np.linalg.inv(X.T.dot(X))
-        Q = YY_.dot(Y.T).dot(X).dot(XX_).dot(X.T).dot(Y)
-        c2, T = np.linalg.eig(Q)
-        T = T.T
-
-        if self.type == 'population':
-            D = np.diag(c2 / (c2 + r * (1 - c2)))
-        elif self.type == 'gcv':
-            D = np.diag((1 - r) * (c2 - r) / ((1 - r)**2 * c2 + r**2 * (1 - c2)))
-        D[D < 0] = 0
-
-        B = np.linalg.inv(T).dot(D).dot(T)
-        A = XX_.dot(X.T).dot(Y).T
-
-        return CurdsWheyRegressor(B.dot(A))
-
-class CurdsWheyRegressor(Orange.classification.Model):
-    def __init__(self, T):
-        self.T = T
-
-    def predict(self, X_te):
-        return X_te.dot(self.T.T)
-
-# MT Stacking
-class MTStackLearner(Orange.classification.Fitter):
-    def __init__(self, model, stacker, cv=None):
-        self.supports_multiclass = True
-        self.model = model
-        self.stacker = stacker
-        self.cv = cv
-
-    def fit(self, X, Y, W=None):
-        XX = np.zeros_like(Y)
-        YY = np.zeros_like(Y)
-
-        if self.cv == None:
-            self.cv = sklearn.cross_validation.KFold(X.shape[0], 5)
-
-        for tr, te in self.cv:
-            cls = self.model.fit(X[tr], Y[tr])
-            XX[te] = cls.predict(X[te])
-            YY[te] = Y[te]
-
-        cls = self.model.fit(X, Y)
-        stacker_cls = self.stacker.fit(XX, YY)
-
-        return MTStack(cls, stacker_cls)
-
-
-class MTStack(Orange.classification.Model):
-    def __init__(self, cls, stacker_cls):
-        self.cls = cls
-        self.stacker_cls = stacker_cls
-
-    def predict(self, X):
-        return self.stacker_cls.predict(self.cls.predict(X))
-
-
-
-class MeanRegressorLearner(Orange.classification.Fitter):
-    def __init__(self):
-        self.supports_multiclass = True
-
-    def fit(self, X, Y, W):
-        return MeanRegressor(np.mean(Y, axis=0))
-
-class MeanRegressor(Orange.classification.Model):
-    def __init__(self, m):
-        self.m = m
-
-    def predict(self, X):
-        return np.tile(self.m, (X.shape[0], 1))
-
-def make_regression(n_samples=1000, n_features=10, n_targets=5, noise=1.0):
-    n = n_features + n_targets
-    cov = np.random.random((n, n))
-    cov = (cov + cov.T) / 2
-    np.fill_diagonal(cov, 1)
-
-    d = np.random.multivariate_normal(np.zeros(n), cov, n_samples)
-    X = d[:,:n_features]
-    Y = d[:,n_features:]
-
-
-    #cov = np.random.random((n_features, n_features))
-    #cov = (cov + cov.T) / 2
-    #np.fill_diagonal(cov, 1)
-    #
-    #X = np.random.multivariate_normal(np.zeros(n_features), cov, n_samples)
-    #T = np.random.normal(size=(n_features, n_targets))
-    #Y = np.dot(X, T) + np.random.normal(scale=noise, size=(n_samples, n_targets))
-
-    #print(cov)
-    #print(np.dot(X.T, Y) / n_samples)
-
-    return Orange.data.Table(X, Y)
-
-
-
-if __name__ == '__main__':
-    import sklearn.datasets
-    import sklearn.linear_model
-    import sklearn.ensemble
-
-
-
-
-    #d.X = (d.X - np.mean(d.X, axis=0)) / np.std(d.X, axis=0)
-
-    #d = np.loadtxt('/home/jure/tmp/water-quality-nom.arff', skiprows=57, delimiter=',')
-    #X = d[:,:16]
-    #X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
-    #d = Orange.data.Table(X, d[:,16:])
-
-    #X, Y = sklearn.datasets.make_regression(n_samples=1000, n_features=100, n_informative=10, n_targets=5, noise=2.0)
-    #d = Orange.data.Table(X, Y)
-
-
-#    d = make_regression(n_samples=1000, n_features=10, n_targets=5)
-#    print(np.mean(d.X, axis=0))
-
-    #import pylab as plt
-    #plt.subplot(211)
-    #plt.plot(d.X[:,0], d.X[:,1], '.')
-    #
-    #plt.subplot(212)
-    #plt.plot(d.Y[:,0], d.Y[:,1], '.')
-    #
-    #plt.show()
-
-    #CurdsWheyRegressorLearner2()(d)
-
-
-
-#    import sys
-#    sys.exit()
-
-
-    #d = Orange.data.Table('test-pls')
-    #model = MTStackLearner(
-    #    model=BRsklearnLearner(sklearn.linear_model.Ridge(alpha=1)),
-    #    stacker=BRsklearnLearner(sklearn.linear_model.Ridge(alpha=1)),
-    #)
-    #print(np.mean(cross_val_score(model, d)))
-
-    import ml_metrics
-    import Orange.evaluation
-    import sklearn.metrics
-
-
-    d = Orange.data.Table('monks-1')
-
-    model = sklearnLearner(sklearn.linear_model.LogisticRegression())
-    cv = Orange.evaluation.CrossValidation(d, model)
-    p = cv.KFold(5)[1]
-    #print(p)
-    print("orange    ", Orange.evaluation.AUC_binary(d, p))
-    print("ml_metrics", ml_metrics.auc(d.Y.ravel(), p[:,1]))
-    print("sklearn   ", sklearn.metrics.roc_auc_score(d.Y.ravel(), p[:,1]))
-
-
-
-
-    #X, y = d.X, d.Y.ravel()
-    #p = np.zeros_like(y)
-    #model = sklearn.linear_model.LogisticRegression()
-    #
-    #for tr, te in sklearn.cross_validation.KFold(X.shape[0], 5):
-    #    model.fit(X[tr], y[tr])
-    #    p[te] = model.predict_proba(X[te])[:,1]
-    #print(ml_metrics.auc(y, p))
-
-
-
-
-
-    #model = MeanRegressorLearner()
-    #print(np.mean(cross_val_score(model, d)))
-    #
-    #model = sklearnLearner(sklearn.cross_decomposition.PLSRegression(n_components=2))
-    #print(np.mean(cross_val_score(model, d)))
-    #
-    ##model = BRsklearnLearner(sklearn.ensemble.RandomForestRegressor(n_estimators=100))
-    ##print(np.mean(cross_val_score(model, d)))
-    #
-    #model = BRsklearnLearner(sklearn.linear_model.LinearRegression())
-    #print(np.mean(cross_val_score(model, d)))
-    #
-    #model = BRsklearnLearner(sklearn.linear_model.Ridge(alpha=0.1))
-    #print(np.mean(cross_val_score(model, d)))
-    #
-    #model = CurdsWheyRegressorLearner('population')
-    #print(np.mean(cross_val_score(model, d)))