Commits

Jure Žbontar committed 09d951b

documentation

  • Participants
  • Parent commits 49b380e

Comments (0)

Files changed (2)

File doc/index.rst

 .. function :: ca_mt(actual, predicted)
 .. function :: auc_mt(actual, predicted)
 
+---------
+
+.. autofunction:: rcc
+
 Methods
 --------
 
 .. autoclass :: BRFitter
+.. autoclass :: MLPFitter
+.. autoclass :: PLSClassifierFitter
+.. autoclass :: CurdsWheyClassifierFitter
 
 Indices and tables
 ==================
 
     .. code:: python
 
-	import numpy as np
-	import mtc
-	import Orange
-	import Orange.classification.logistic_regression
-
-	def my_metric(actual, predicted):
-	    # predicted is a tuple of predicted values and predicted probabilites
-	    return np.mean(actual == predicted[0])
-
-	data = Orange.data.Table('emotions')
-	fitter = mtc.BRFitter(Orange.classification.logistic_regression.LogisticRegressionLearner())
-	model = fitter(data)
-	predicted = model(data, model.ValueProbs)
-	my_mtc_metric = mtc.mt_average_score(my_metric)
-	print(my_mtc_metric(data.Y, predicted))
+	>>> import numpy as np
+	>>> import mtc
+	>>> import Orange
+	>>> import Orange.classification.logistic_regression
+	>>> def my_metric(actual, predicted):
+	...     # predicted is a tuple of predicted values and predicted probabilites
+	...     return np.mean(actual == predicted[0])
+        ...
+	>>> data = Orange.data.Table('emotions')
+	>>> fitter = mtc.BRFitter(Orange.classification.logistic_regression.LogisticRegressionLearner())
+	>>> model = fitter(data)
+	>>> predicted = model(data, model.ValueProbs)
+	>>> my_mtc_metric = mtc.mt_average_score(my_metric)
+	>>> my_mtc_metric(data.Y, predicted)
+        0.815626756605
 
 
     '''
 
 # Binary Relevacne #
 class BRFitter(Orange.classification.Fitter):
-    def __init__(self, learner):
+    '''Binary relevance method.
+
+    For a multitarget problem with `n` classes, train `n` independent models, one for each class.
+
+    :param fitter: A single-target fitter (e.q. `Orange.classification.logisti_regression.LogisticRegressionLearner()`).
+    :type fitter: Orange.classification.Fitter
+
+    .. code::
+
+        >>> import mtc
+        >>> import Orange.classification.logistic_regression
+        >>> data = Orange.data.Table('emotions')
+        >>> fitter = mtc.BRFitter(Orange.classification.logistic_regression.LogisticRegressionLearner())
+        >>> model = fitter(data)
+        >>> model(data)
+        [[ 0.  0.  1.  0.  1.  0.]
+         [ 1.  0.  0.  0.  0.  1.]
+         [ 0.  0.  0.  0.  0.  1.]
+         ..., 
+         [ 0.  0.  1.  1.  1.  0.]
+         [ 0.  0.  0.  0.  0.  1.]
+         [ 0.  1.  1.  0.  0.  0.]]
+    '''
+
+    def __init__(self, fitter):
         self.supports_multiclass = True
-        self.learner = learner
+        self.fitter = fitter
 
     def fit(self, X, Y, W):
         models = []
 
         for j in range(Y.shape[1]):
-            m = copy.deepcopy(self.learner)
+            m = copy.deepcopy(self.fitter)
 
             # Optimization -- building a Table from numpy is slow if not given a domain
             domain = Orange.data.Domain(self.domain.attributes, self.domain.class_vars[j])
             self.output_test *= dropout
 
 class MLPFitter(Orange.classification.Fitter):
+    '''Multilayer Perceptron
+
+    Implements multilayer perceptrions with dropout, L2 regularization
+    and sigmoid activation function. The weights are trained using
+    mini-batch stochastic gradient descent. Requires 
+    `Theano 0.6 <http://deeplearning.net/software/theano/>`_.
+
+    Please make sure the features are on the same scale.
+
+    :param layers: The number of units on each layer (including the number the number of units on the input and output layers). This parameter determines the number of hidden layers.
+    :type layers: list
+    :param dropout: The amount of dropout used per layer. A value of 0 implies no dropout and 1 implies we drop everything.
+    :type dropout: list
+    :param L2_reg: The amount of L2 regularization.
+    :type L2_reg: float
+    :param learning_rate: Learning rate in stochastic gradient descent.
+    :type learning_rate: float
+    :param iterations: Number of iterations of stochastic gradient descent. No other stopping mechanism is implemented at this time.
+    :type iterations: int
+    :param scale: Use normal distribution N(0, scale) to randomly initialize the weights.
+    :type scale: float
+    :param batch_size: Size of a batch in stochastic gradient descent.
+    :type batch_size: int
+
+    .. code::
+
+        >>> import numpy as np
+        >>> import mtc
+        >>> import Orange
+        >>> data = Orange.data.Table('emotions')
+        >>> data.X = (data.X - np.mean(data.X, axis=0)) / np.mean(data.X, axis=0)  # Don't forget to normalize your data
+        >>> fitter = mtc.MLPFitter([data.X.shape[1], 50, data.Y.shape[1]], [0.8, 0.5, 1], 0.0001, 0.5, 500, 0.1, 10)
+        >>> model = fitter(data)
+        >>> model(data)
+        [[0 0 1 0 0 0]
+         [1 0 0 0 0 1]
+         [0 0 0 0 0 1]
+         ..., 
+         [0 0 1 1 1 0]
+         [0 0 0 0 0 1]
+         [0 1 1 0 0 0]]
+    '''
+
     def __init__(self, layers, dropout, L2_reg, learning_rate, iterations, scale, batch_size):
         self.supports_multiclass = True
         self.iterations = iterations
 
 # Partial Least Squares Regression #
 def PLSClassifierFitter(**kwargs):
+    '''Partial least squares classifier
+
+    Wraps `sklearn.cross_decomposition.PLSRegression <http://scikit-learn.org/stable/modules/generated/sklearn.cross_decomposition.PLSRegression.html>`_.
+
+    :param n_components: Number of components to keep.
+    :type n_components: int
+    :param scale: whether to scale the data.
+    :type scale: boolean
+    :param max_iter: the maximum number of iterations of the NIPALS inner loop.
+    :type max_iter: int
+    :param tol: Tolerance used in the iterative algorithm default 1e-06.
+    :type tol: non-negative real
+
+    .. code::
+
+        >>> import numpy as np
+        >>> import mtc
+        >>> import Orange
+        >>> data = Orange.data.Table('emotions')
+        >>> fitter = mtc.PLSClassifierFitter(n_components=5)
+        >>> model = fitter(data)
+        >>> model(data)
+        [[0 0 1 0 0 0]
+         [1 0 0 0 0 1]
+         [0 0 0 0 0 1]
+         ..., 
+         [0 0 1 1 1 0]
+         [0 0 0 0 0 0]
+         [0 1 1 0 0 0]]
+
+    '''
     fitter = SKFitter(sklearn.cross_decomposition.PLSRegression(**kwargs), supports_multiclass=True)
     fitter.supports_multiclass = True
     return fitter
 def curds_whey_predict(X, model, Rmatinv, d):
     return (model(X) * d).dot(Rmatinv)
 
-class CurdsWhey2ClassifierFitter(Orange.classification.Fitter):
+class CurdsWheyClassifierFitter(Orange.classification.Fitter):
     def __init__(self, type='population', lambda1=0, lambda2=0):
         self.supports_multiclass = True
         self.type = type
 
     def fit(self, X, Y, W):
         args = curds_whey_fit(X, Y, type=self.type, lambda1=self.lambda1, lambda2=self.lambda2)
-        return CurdsWhey2ClassifierModel(args)
+        return CurdsWheyClassifierModel(args)
 
-class CurdsWhey2ClassifierModel(Orange.classification.Model):
+class CurdsWheyClassifierModel(Orange.classification.Model):
     def __init__(self, args):
         self.args = args
 
         return np.dstack((1 - P, P))
 
 
-class CurdsWheyClassifierFitter(Orange.classification.Fitter):
+class _CurdsWheyClassifierFitter(Orange.classification.Fitter):
     def __init__(self, type='population'):
         self.supports_multiclass = True
         self.type = type
         B = np.linalg.inv(T).dot(D).dot(T)
         A = XX_.dot(X.T).dot(Y).T
 
-        return CurdsWheyClassifierModel(B.dot(A))
+        return _CurdsWheyClassifierModel(B.dot(A))
 
-class CurdsWheyClassifierModel(Orange.classification.Model):
+class _CurdsWheyClassifierModel(Orange.classification.Model):
     def __init__(self, T):
         self.T = T
 
     return Lmat, Lmatinv, Mmat, Mmatinv, values
 
 def rcc(X, Y, lambda1, lambda2):
-    '''R package cca function'''
+    '''Regularized Canonical Correlation Analysis
+
+    Rewrite of the `rcc` function in `CCA package <http://cran.r-project.org/web/packages/CCA/index.html>`_ for R.
+
+    :param X: matrix containing the X coordinates
+    :type X: array-like, shape = [n_samples, n_features]
+    :param Y: matrix containing the Y coordinates
+    :type Y: array-like, shape = [n_samples, n_responses]
+    :param lambda1: Regularization parameter for X
+    :type lambda1: non-negative real
+    :param lambda2: Regularization parameter for Y
+    :type lambda2: non-negative real
+    :returns: (X_weights, X_weights_inv, Y_score, Y_score_inv, correlations)
+
+    .. code::
+
+        >>> import mtc
+        >>> import Orange
+        >>> data = Orange.data.Table('emotions')
+        >>> X_weights, _, Y_weights, _, _ = mtc.rcc(data.X, data.Y, 0.1, 0.1)
+    '''
     #xcenter = X.mean(axis=0)
     #ycenter = Y.mean(axis=0)
     #X = X - xcenter
     #print(Orange.evaluation.cross_validation(model, data, Orange.evaluation.CA, Orange.evaluation.KFold()))
 
     data = Orange.data.Table('emotions')
-
-
+    data.X = (data.X - np.mean(data.X, axis=0)) / np.mean(data.X, axis=0)
 
     #cls = model(data)
     #print(cls(data, cls.Probs)[:,:,1])
 
 
-    #model = BRFitter(SKFitter(sklearn.linear_model.Ridge()))
+    #model = BRFitter(SKFitter(sklearn.linear_model.LogisticRegression()))
+    model = MLPFitter([data.X.shape[1], 50, data.Y.shape[1]], [0.8, 0.5, 1], 0.0, 0.5, 500, 0.1, 5)
     #model = PLSClassifierFitter(n_components=2)
     #model = CurdsWheyClassifierFitter()
     #model = CurdsWhey2ClassifierFitter(lambda1=0.1)