Commits

Aleš Erjavec committed e4b810f

Added 'multinomial_treatment' parameter to LIBLINEAR derived learners.

  • Participants
  • Parent commits b02e47b

Comments (0)

Files changed (3)

File Orange/classification/logreg.py

 import Orange
 from Orange.utils import deprecated_keywords, deprecated_members
 from Orange.data import preprocess
+from Orange.data.continuization import DomainContinuizer
 import decimal
 import math
 
 Logistic regression learner from LIBLINEAR
 """
 
-from Orange.data import preprocess
+
 class LibLinearLogRegLearner(Orange.core.LinearLearner):
     """A logistic regression learner from `LIBLINEAR`_.
-    
+
     Supports L2 regularized learning.
-    
+
     .. _`LIBLINEAR`: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
-     
+
     """
 
     L2R_LR = Orange.core.LinearLearner.L2R_LR
     __new__ = Orange.utils._orange__new__(base=Orange.core.LinearLearner)
 
     def __init__(self, solver_type=L2R_LR, C=1.0, eps=0.01, normalization=True,
-            bias=-1.0, **kwargs):
+            bias=-1.0, multinomial_treatment=DomainContinuizer.NValues,
+            **kwargs):
         """
-        :param solver_type: One of the following class constants: 
+        :param solver_type: One of the following class constants:
             ``L2_LR``, ``L2_LR_DUAL``, ``L1R_LR``.
-        
-        :param C: Regularization parameter (default 1.0). Higher values of C mean 
-            less regularization (C is a coefficient for the loss function).
+
+        :param C: Regularization parameter (default 1.0). Higher values of
+            C mean less regularization (C is a coefficient for the loss
+            function).
         :type C: float
-        
+
         :param eps: Stopping criteria (default 0.01)
         :type eps: float
-        
+
         :param normalization: Normalize the input data prior to learning
             (default True)
         :type normalization: bool
 
         :param bias: If positive, use it as a bias (default -1).
         :type bias: float
-        
+
+        :param multinomial_treatment: Defines how to handle multinomial
+            features for learning. It can be one of the
+            :class:`~.DomainContinuizer` `multinomial_treatment`
+            constants (default: `DomainContinuizer.NValues`).
+
+        :type multinomial_treatment: int
+
+        .. versionadded:: 2.6.1
+            Added `multinomial_treatment`
+
         """
         self.solver_type = solver_type
         self.C = C
         self.eps = eps
         self.normalization = normalization
         self.bias = bias
+        self.multinomial_treatment = multinomial_treatment
 
         for name, value in kwargs.items():
             setattr(self, name, value)
             raise TypeError("Can only learn a discrete class.")
 
         if data.domain.has_discrete_attributes(False) or self.normalization:
-            dc = Orange.data.continuization.DomainContinuizer()
-            dc.multinomial_treatment = dc.NValues
+            dc = DomainContinuizer()
+            dc.multinomial_treatment = self.multinomial_treatment
             dc.class_treatment = dc.Ignore
             dc.continuous_treatment = \
                     dc.NormalizeByVariance if self.normalization else dc.Leave
-            c_domain = dc(data) 
+            c_domain = dc(data)
             data = data.translate(c_domain)
         return super(LibLinearLogRegLearner, self).__call__(data, weight_id)

File Orange/classification/svm/__init__.py

                         SVMClassifierSparse as _SVMClassifierSparse
 
 from Orange.data import preprocess
+from Orange.data.preprocess import DomainContinuizer
 
 from Orange import feature as variable
 
     __new__ = _orange__new__(base=Orange.core.LinearLearner)
 
     def __init__(self, solver_type=L2R_L2LOSS_DUAL, C=1.0, eps=0.01,
-                 bias=1.0, normalization=True, **kwargs):
+                 bias=1.0, normalization=True,
+                 multinomial_treatment=DomainContinuizer.NValues, **kwargs):
         """
         :param solver_type: One of the following class constants:
             ``L2R_L2LOSS_DUAL``, ``L2R_L2LOSS``,
             to learning (default ``True``)
         :type normalization: bool
 
-        .. note:: If the training data contains discrete features they are
-            replaced by indicator columns one for each value of the feature
-            regardless of the value of `normalization`. This is different
-            then in :class:`SVMLearner` where this is done only if
+        :param multinomial_treatment: Defines how to handle multinomial
+            features for learning. It can be one of the
+            :class:`~.DomainContinuizer` `multinomial_treatment`
+            constants (default: `DomainContinuizer.NValues`).
+
+        :type multinomial_treatment: int
+
+        .. versionadded:: 2.6.1
+            Added `multinomial_treatment`
+
+        .. note:: By default if the training data contains discrete features
+            they are replaced by indicator columns one for each value of the
+            feature regardless of the value of `normalization`. This is
+            different then in :class:`SVMLearner` where this is done only if
             `normalization` is ``True``.
 
         Example
         self.C = C
         self.bias = bias
         self.normalization = normalization
+        self.multinomial_treatment = multinomial_treatment
 
         for name, val in kwargs.items():
             setattr(self, name, val)
+
         if self.solver_type not in [self.L2R_L2LOSS_DUAL, self.L2R_L2LOSS,
                 self.L2R_L1LOSS_DUAL, self.L1R_L2LOSS]:
-            warnings.warn("""\
-Deprecated 'solver_type', use
-'Orange.classification.logreg.LibLinearLogRegLearner'
-to build a logistic regression model using LIBLINEAR.
-""",
-                DeprecationWarning)
+            warnings.warn(
+                " Deprecated 'solver_type', use "
+                "'Orange.classification.logreg.LibLinearLogRegLearner'"
+                "to build a logistic regression models using LIBLINEAR.",
+                DeprecationWarning
+            )
 
     def __call__(self, data, weight_id=None):
         if not isinstance(data.domain.class_var, variable.Discrete):
             raise TypeError("Can only learn a discrete class.")
 
         if data.domain.has_discrete_attributes(False) or self.normalization:
-            dc = Orange.data.continuization.DomainContinuizer()
-            dc.multinomial_treatment = dc.NValues
+            dc = DomainContinuizer()
+            dc.multinomial_treatment = self.multinomial_treatment
             dc.class_treatment = dc.Ignore
             dc.continuous_treatment = \
                     dc.NormalizeBySpan if self.normalization else dc.Leave
     __new__ = _orange__new__(base=Orange.core.LinearLearner)
 
     def __init__(self, C=1.0, eps=0.01, bias=1.0,
-                 normalization=True, **kwargs):
+                 normalization=True,
+                 multinomial_treatment=DomainContinuizer.NValues,
+                 **kwargs):
         """\
         :param C: Regularization parameter (default 1.0)
         :type C: float
             (default True)
         :type normalization: bool
 
+        :param multinomial_treatment: Defines how to handle multinomial
+            features for learning. It can be one of the
+            :class:`~.DomainContinuizer` `multinomial_treatment`
+            constants (default: `DomainContinuizer.NValues`).
+
+        :type multinomial_treatment: int
+
+        .. versionadded:: 2.6.1
+            Added `multinomial_treatment`
+
         """
         self.C = C
         self.eps = eps
         self.bias = bias
         self.normalization = normalization
+        self.multinomial_treatment = multinomial_treatment
         for name, val in kwargs.items():
             setattr(self, name, val)
 
             raise TypeError("Can only learn a discrete class.")
 
         if data.domain.has_discrete_attributes(False) or self.normalization:
-            dc = Orange.data.continuization.DomainContinuizer()
-            dc.multinomial_treatment = dc.NValues
+            dc = DomainContinuizer()
+            dc.multinomial_treatment = self.multinomial_treatment
             dc.class_treatment = dc.Ignore
             dc.continuous_treatment = \
                     dc.NormalizeBySpan if self.normalization else dc.Leave

File Orange/testing/unit/tests/test_linear.py

 from Orange.testing import testing
 from Orange.testing.testing import datasets_driven
 from Orange.classification.svm import LinearSVMLearner
+from Orange.data.preprocess import DomainContinuizer
 try:
     import unittest2 as unittest
 except:
 import numpy as np
 
 
+def clone(obj):
+    return cPickle.loads(cPickle.dumps(obj))
+
+
 def decision_values(classifier, instance):
     """Return the decision values (numpy.array) for classifying `instance`.
     """
 @testing.test_on_data
 def test_learner_with_bias_on(self, dataset):
     learner = self.learner
-    learner_b = cPickle.loads(cPickle.dumps(learner))
+    learner_b = clone(learner)
     learner_b.bias = 1
     try:
         self.learner = learner_b
         classify_from_weights_test(self, classifier, data)
 
 
+def multinomial_test(self):
+    data = Orange.data.Table("lenses")
+    learner = clone(self.learner)
+    learner.multinomial_treatment = DomainContinuizer.NValues
+    classifier = learner(data)
+    self.assertEqual(len(classifier.domain), 7)
+
+    learner.multinomial_treatment = DomainContinuizer.FrequentIsBase
+    classifier = learner(data)
+    self.assertEqual(len(classifier.domain), 6)
+
+    learner.multinomial_treatment = DomainContinuizer.ReportError
+    with self.assertRaises(Orange.core.KernelException):
+        classifier = learner(data)
+
+
 @datasets_driven(datasets=testing.CLASSIFICATION_DATASETS)
 class TestLinearSVMLearnerL2R_L2LOSS_DUAL(testing.LearnerTestCase):
     LEARNER = LinearSVMLearner(sover_type=LinearSVMLearner.L2R_L2LOSS_DUAL)
     test_learner_on = test_learner_on
     test_learner_with_bias_on = test_learner_with_bias_on
     test_missing_instances = missing_instances_test
+    test_multinomial = multinomial_test
 
 
 @datasets_driven(datasets=testing.CLASSIFICATION_DATASETS)
     test_learner_on = test_learner_on
     test_learner_with_bias_on = test_learner_with_bias_on
     test_missing_instances = missing_instances_test
+    test_multinomial = multinomial_test
 
 
 @datasets_driven(datasets=testing.CLASSIFICATION_DATASETS)
     test_learner_on = test_learner_on
     test_learner_with_bias_on = test_learner_with_bias_on
     test_missing_instances = missing_instances_test
+    test_multinomial = multinomial_test
 
 
 @datasets_driven(datasets=testing.CLASSIFICATION_DATASETS)
     test_learner_on = test_learner_on
     test_learner_with_bias_on = test_learner_with_bias_on
     test_missing_instances = missing_instances_test
+    test_multinomial = multinomial_test
 
 
 @datasets_driven(datasets=testing.CLASSIFICATION_DATASETS)
     test_learner_on = test_learner_on
     test_learner_with_bias_on = test_learner_with_bias_on
     test_missing_instances = missing_instances_test
+    test_multinomial = multinomial_test
 
 
 @datasets_driven(datasets=testing.CLASSIFICATION_DATASETS)
     test_learner_on = test_learner_on
     test_learner_with_bias_on = test_learner_with_bias_on
     test_missing_instances = missing_instances_test
+    test_multinomial = multinomial_test
 
 
 if __name__ == "__main__":