Commits

Jure Žbontar committed c9cb11d

run tests

Comments (0)

Files changed (2)

 import sklearn.linear_model
 import sklearn.svm
 import sklearn.ensemble
+import sklearn.tree
 
 from hyperopt import hyperopt
 
     X = np.loadtxt('data/D1-2/100-genes-X.tab')
     Y = np.loadtxt('data/D1-2/100-genes-Y.tab')
 
+if data == '2':
+    X = np.loadtxt('data/D2-2/1000-genes-X.tab')
+    Y = np.loadtxt('data/D2-2/1000-genes-Y.tab')
+
 
 X = X[:,np.std(X, axis=0) > 1e-7]
 X = (X - np.mean(X, axis=0)) / (np.std(X, axis=0))
 s = np.minimum(np.sum(Y, axis=0), np.sum(1 - Y, axis=0))
 Y = Y[:,s >= 10]
 
-print(X.shape, Y.shape)
+#method = sys.argv.pop(1)
+#if method == 'o':
 
-method = sys.argv[2]
-if method == 'o':
-    def parse(params, argv):
-        kwargs = {}
-        for p, a in zip(params, argv):
-            if p['type'] == 'f':
-                kwargs[p['name']] = float(a)
-            elif p['type'] == 'i':
+def parse(params, argv):
+    kwargs = {}
+    for p, a in zip(params, argv):
+        if p['type'] == 'f':
+            kwargs[p['name']] = float(a)
+        elif p['type'] == 'i':
+            kwargs[p['name']] = int(a)
+        elif p['type'] == 'c':
+            val = p['categories'][0]
+            if isinstance(val, str):
+                kwargs[p['name']] = a
+            elif isinstance(val, int):
                 kwargs[p['name']] = int(a)
-            elif p['type'] == 'c':
-                val = p['categories'][0]
-                if isinstance(val, str):
-                    kwargs[p['name']] = a
-                elif isinstance(val, int):
-                    kwargs[p['name']] = int(a)
-                else:
-                    assert False
             else:
                 assert False
-        return kwargs
+        else:
+            assert False
+    return kwargs
 
-    data = Orange.data.Table(X, Y)
-    if sys.argv[1] == 'lr':
-        params = [
-            {'type': 'c', 'name': 'penalty', 'categories': ('l2', 'l1')},
-            {'type': 'c', 'name': 'dual', 'categories': (0, 1)},
-            {'type': 'f', 'name': 'C', 'trans': lambda i: 2**i},
-            {'type': 'c', 'name': 'fit_intercept', 'categories': (1, 0)},
-            {'type': 'f', 'name': 'intercept_scaling', 'trans': lambda i: 3**i},
-            {'type': 'f', 'name': 'tol', 'trans': lambda i: 2**(i - 10)},
-        ]
-        if sys.argv[2] == 'hyperopt':
-            hyperopt(sys.argv[1], params)
-        else:
-            kwargs = parse(params, sys.argv[2:])
-            kwargs = {}
-            model = mtc.BRFitter(mtc.SKClassifierFitter(sklearn.linear_model.LogisticRegression(**kwargs)))
-    elif sys.argv[1] == 'rf':
-        params = [
-            {'type': 'i', 'name': 'n_estimators', 'trans': lambda i: 2**(i + 6)},
-            {'type': 'c', 'name': 'criterion', 'categories': ('gini', 'entropy')},
-            {'type': 'f', 'name': 'max_features', 'trans': lambda i: i * 0.02 + 0.1},
-        ]
-        if sys.argv[2] == 'hyperopt':
-            hyperopt(sys.argv[1], params)
-        else:
-            kwargs = parse(params, sys.argv[2:])
-            model = mtc.BRFitter(mtc.SKFitter(sklearn.ensemble.RandomForestClassifier(random_state=42, **kwargs)))
-    elif sys.argv[1] == 'gbm':
-        params = [
-            {'type': 'f', 'name': 'learning_rate', 'trans': lambda i: 2**(i - 3)},
-            {'type': 'i', 'name': 'n_estimators', 'trans': lambda i: 2**(i + 8)},
-            {'type': 'f', 'name': 'subsample', 'trans': lambda i: 0.1 * i + 0.5},
-            {'type': 'i', 'name': 'max_depth', 'trans': lambda i: i + 3},
-            {'type': 'f', 'name': 'max_features', 'trans': lambda i: i * 0.02 + 0.1},
-        ]
-        if sys.argv[2] == 'hyperopt':
-            hyperopt(sys.argv[1], params)
-        else:
-            kwargs = parse(params, sys.argv[2:])
-            model = mtc.BRFitter(mtc.SKFitter(sklearn.ensemble.GradientBoostingClassifier(**kwargs)))
-    elif sys.argv[1] == 'mlp':
-        params = [
-            {'type': 'i', 'name': 'num_hidden', 'trans': lambda i: 2**(0.5 * i + 7)},
-            #{'type': 'f', 'name': 'dropout1', 'trans': lambda i: 0.2 * i + 0.8},
-            #{'type': 'f', 'name': 'dropout2', 'trans': lambda i: 0.2 * i + 0.5},
-            {'type': 'f', 'name': 'L2_reg', 'trans': lambda i: 2**(i - 10)},
-            {'type': 'f', 'name': 'learning_rate', 'trans': lambda i: 2**(i - 3)},
-            {'type': 'i', 'name': 'iterations', 'trans': lambda i: 2**(i + 9)},
-            {'type': 'f', 'name': 'scale', 'trans': lambda i: 10**(i - 2)},
-            {'type': 'i', 'name': 'batch_size', 'trans': lambda i: 2**(i + 3)},
-        ]
-        if sys.argv[2] == 'hyperopt':
-            hyperopt(sys.argv[1], params)
-        else:
-            kwargs = parse(params, sys.argv[2:])
-            kwargs['layers'] = (X.shape[1], kwargs['num_hidden'], Y.shape[1])
-            kwargs['dropout'] = [1, 1, 1]
-            del kwargs['num_hidden']
-            model = mtc.MLPFitter(**kwargs)
+data = Orange.data.Table(X, Y)
+if sys.argv[1] == 'lr':
+    params = [
+        {'type': 'c', 'name': 'penalty', 'categories': ('l2', 'l1')},
+        {'type': 'c', 'name': 'dual', 'categories': (0, 1)},
+        {'type': 'f', 'name': 'C', 'trans': lambda i: 2**i},
+        {'type': 'c', 'name': 'fit_intercept', 'categories': (1, 0)},
+        {'type': 'f', 'name': 'intercept_scaling', 'trans': lambda i: 3**i},
+        {'type': 'f', 'name': 'tol', 'trans': lambda i: 2**(i - 10)},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        #kwargs = {}
+        model = mtc.BRFitter(mtc.SKFitter(sklearn.linear_model.LogisticRegression(**kwargs)))
+elif sys.argv[1] == 'linr':
+    model = mtc.SKFitter(sklearn.linear_model.LinearRegression(), supports_multiclass=True)
+elif sys.argv[1] == 'svm':
+    params = [
+        {'type': 'f', 'name': 'gamma', 'trans': lambda i: 2**(3 * i - 5)},  # g
+        {'type': 'f', 'name': 'C', 'trans': lambda i: 2**(3 * i + 5)},  # c
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.BRFitter(mtc.SKFitter(sklearn.svm.SVC(probability=True, **kwargs)))
+elif sys.argv[1] == 'ada':
+    params = [
+        {'type': 'i', 'name': 'n_estimators', 'trans': lambda i: 2**(i + 8), 'max': 2**12}, # n_estimators
+        {'type': 'f', 'name': 'learning_rate', 'trans': lambda i: 2**(i - 1)}, # learning_rate
+        {'type': 'i', 'name': 'max_depth', 'trans': lambda i: i + 3},  # max_depth
+        {'type': 'c', 'name': 'algorithm', 'categories': ('SAMME.R', 'SAMME')},  # algorithm
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        tree = sklearn.tree.DecisionTreeClassifier(max_depth=kwargs['max_depth'], min_samples_leaf=1)
+        del kwargs['max_depth']
+        model = mtc.BRFitter(mtc.SKFitter(sklearn.ensemble.AdaBoostClassifier(**kwargs)))
+elif sys.argv[1] == 'ridge':
+    params = [
+        {'type': 'f', 'name': 'alpha', 'trans': lambda i: 2**i},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.BRFitter(mtc.SKFitter(sklearn.linear_model.Ridge(**kwargs)))
+elif sys.argv[1] == 'rf':
+    params = [
+        {'type': 'i', 'name': 'n_estimators', 'trans': lambda i: 2**(i + 6)},
+        {'type': 'c', 'name': 'criterion', 'categories': ('gini', 'entropy')},
+        {'type': 'f', 'name': 'max_features', 'trans': lambda i: i * 0.02 + 0.1},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.BRFitter(mtc.SKFitter(sklearn.ensemble.RandomForestClassifier(random_state=42, **kwargs)))
+elif sys.argv[1] == 'gbm':
+    params = [
+        {'type': 'f', 'name': 'learning_rate', 'trans': lambda i: 2**(i - 3)},
+        {'type': 'i', 'name': 'n_estimators', 'trans': lambda i: 2**(i + 8)},
+        {'type': 'f', 'name': 'subsample', 'trans': lambda i: 0.1 * i + 0.5},
+        {'type': 'i', 'name': 'max_depth', 'trans': lambda i: i + 3},
+        {'type': 'f', 'name': 'max_features', 'trans': lambda i: i * 0.02 + 0.1},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.BRFitter(mtc.SKFitter(sklearn.ensemble.GradientBoostingClassifier(**kwargs)))
+elif sys.argv[1] == 'mlp':
+    params = [
+        {'type': 'i', 'name': 'num_hidden', 'trans': lambda i: 2**(0.5 * i + 7)},
+        #{'type': 'f', 'name': 'dropout1', 'trans': lambda i: 0.2 * i + 0.8},
+        #{'type': 'f', 'name': 'dropout2', 'trans': lambda i: 0.2 * i + 0.5},
+        {'type': 'f', 'name': 'L2_reg', 'trans': lambda i: 2**(i - 10)},
+        {'type': 'f', 'name': 'learning_rate', 'trans': lambda i: 2**(i - 3)},
+        {'type': 'i', 'name': 'iterations', 'trans': lambda i: 2**(i + 9)},
+        {'type': 'f', 'name': 'scale', 'trans': lambda i: 10**(i - 2)},
+        {'type': 'i', 'name': 'batch_size', 'trans': lambda i: 2**(i + 3)},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        kwargs['layers'] = (X.shape[1], kwargs['num_hidden'], Y.shape[1])
+        kwargs['dropout'] = [1, 1, 1]
+        del kwargs['num_hidden']
+        model = mtc.MLPFitter(**kwargs)
 
-    np.random.seed(42)
-    print(Orange.evaluation.cross_validation(model, data, mtc.auc_mt, Orange.evaluation.TTVSplit(1)))
+elif sys.argv[1] == 'pls':
+    params = [
+        {'type': 'i', 'name': 'n_components', 'trans': lambda i: 5 * i + 15}
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.PLSClassifierFitter(**kwargs)
 
+elif sys.argv[1] == 'cw':
+    params = [
+        {'type': 'c', 'name': 'type', 'categories': ('population', 'gcv')},
+        {'type': 'f', 'name': 'lambda1', 'trans': lambda i: 2**(i - 3)},
+        {'type': 'f', 'name': 'lambda2', 'trans': lambda i: 2**(i - 3)},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.CurdsWhey2ClassifierFitter(**kwargs)
 
-if method == 's':
-    np.random.seed(42)
+elif sys.argv[1] == 'stack':
+    params = [
+        {'type': 'f', 'name': 'C', 'trans': lambda i: 2**i},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        m1 = mtc.BRFitter(mtc.SKFitter(sklearn.ensemble.RandomForestClassifier(n_estimators=512)))
+        m2 = mtc.BRFitter(mtc.SKFitter(sklearn.linear_model.LogisticRegression()))
+        model = mtc.MTStackFitter(m1, m2)
 
-    import ml_metrics
-    model = sklearn.linear_model.LogisticRegression()
-    scores_te, scores_va = [], []
-    p_tr = 0.6
-    p_te = 0.2
-    for i in range(1):
-        inds = np.random.permutation(Y.shape[0])
-        n_tr = int(p_tr * Y.shape[0])
-        n_te = int(p_te * Y.shape[0])
-        tr = inds[:n_tr]
-        te = inds[n_tr:n_tr + n_te]
-        va = inds[n_tr + n_te:]
-        for j in range(Y.shape[1]):
-            y = Y[:,j]
-            model.fit(X[tr], y[tr])
+elif sys.argv[1] == 'rrr':
+    params = [
+        {'type': 'i', 'name': 'rank', 'trans': lambda i: 5 * i + 15},
+        {'type': 'f', 'name': 'lambda1', 'trans': lambda i: 2**(i - 3)},
+        {'type': 'f', 'name': 'lambda2', 'trans': lambda i: 2**(i - 3)},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.ReducedRankClassifierFitter(**kwargs)
 
+elif sys.argv[1] == 'ficyreg':
+    params = [
+        {'type': 'f', 'name': 'lambda1', 'trans': lambda i: 2**(i - 3)},
+        {'type': 'f', 'name': 'lambda2', 'trans': lambda i: 2**(i - 3)},
+    ]
+    if sys.argv[2] == 'hyperopt':
+        hyperopt(sys.argv[1], params)
+    else:
+        kwargs = parse(params, sys.argv[2:])
+        model = mtc.FICYREGClassifierFitter(**kwargs)
 
-            try:
-                p = model.predict_proba(X[te])[:,1]
-                scores_te.append(ml_metrics.auc(y[te], p))
-            except ZeroDivisionError:
-                pass
+np.random.seed(42)
+print(Orange.evaluation.cross_validation(model, data, mtc.auc_mt, Orange.evaluation.TTVSplit(3)))
 
 
-            try:
-                p = model.predict_proba(X[va])[:,1]
-                scores_va.append(ml_metrics.auc(y[va], p))
-            except ZeroDivisionError:
-                pass
-
-    print(np.mean(scores_te), np.mean(scores_va))
+#if method == 's':
+#    np.random.seed(42)
+#
+#    import ml_metrics
+#    model = sklearn.linear_model.LogisticRegression()
+#    scores_te, scores_va = [], []
+#    p_tr = 0.6
+#    p_te = 0.2
+#    for i in range(1):
+#        inds = np.random.permutation(Y.shape[0])
+#        n_tr = int(p_tr * Y.shape[0])
+#        n_te = int(p_te * Y.shape[0])
+#        tr = inds[:n_tr]
+#        te = inds[n_tr:n_tr + n_te]
+#        va = inds[n_tr + n_te:]
+#        for j in range(Y.shape[1]):
+#            y = Y[:,j]
+#            model.fit(X[tr], y[tr])
+#
+#
+#            try:
+#                p = model.predict_proba(X[te])[:,1]
+#                scores_te.append(ml_metrics.auc(y[te], p))
+#            except ZeroDivisionError:
+#                pass
+#
+#
+#            try:
+#                p = model.predict_proba(X[va])[:,1]
+#                scores_va.append(ml_metrics.auc(y[va], p))
+#            except ZeroDivisionError:
+#                pass
+#
+#    print(np.mean(scores_te), np.mean(scores_va))
 
 
 # Curds & Whey #
-def curds_whey_fit(X, Y, type='population', rank=0, lambda1=0, lambda2=0, fitter=SKFitter(sklearn.linear_model.LinearRegression(), supports_multiclass=True, regressor=True)):
+def curds_whey_fit(X, Y, type='population', rank=0, lambda1=0, lambda2=0,
+                   fitter=SKFitter(sklearn.linear_model.Ridge(), supports_multiclass=True, regressor=True)):
     _, _, Rmat, Rmatinv, c = rcc(X, Y, lambda1, lambda2)
     assert np.allclose(Rmat.dot(Rmatinv), np.eye(Rmat.shape[0]))