Commits

Aleš Erjavec committed dfa6d31

Preserve the domain's meta attributes and class_vars.

  • Participants
  • Parent commits 94e1207

Comments (0)

Files changed (3)

Orange/feature/selection.py

 
 from operator import itemgetter
 
+import Orange.data
 import Orange.core as orange
 
 from Orange.feature.scoring import score_all
 
 
+def _select_features_subset(data, features):
+    """Select the `features` from the `data`.
+
+    .. note::
+        The `features` must be a subset of the `data.domain.features`.
+
+    """
+    def as_descriptor(arg):
+        """Ensure `arg` is an descriptor from `data.domain`"""
+        return data.domain[arg]
+
+    features = map(as_descriptor, features)
+    domain = Orange.data.Domain(features, data.domain.class_var,
+                                class_vars=data.domain.class_vars)
+    domain.add_metas(data.domain.get_metas())
+    return Orange.data.Table(domain, data)
+
+
 def top_rated(scores, n, highest_best=True):
     """Return n top-rated features from the list of scores.
 
     :param n: number of features to select
     :type n: int
     :rtype: :obj:`Orange.data.Table`
+
     """
-    return data.select(top_rated(scores, n) + [data.domain.classVar.name])
+    features = top_rated(scores, n)
+    return _select_features_subset(data, features)
 
 selectBestNAtts = select
 select_best_n = select
     :type threshold: float
     :rtype: :obj:`Orange.data.Table`
     """
-    return data.select(above_threshold(scores, threshold) + \
-                       [data.domain.classVar.name])
+    features = above_threshold(scores, threshold)
+    return _select_features_subset(data, features)
 
 selectAttsAboveThresh = select_above_threshold
 

Orange/testing/unit/tests/test_feature_selection.py

                                                     self.scores, threshold)
         self.assertEqual(above, [a.name for a in new_data.domain.attributes])
         self.assertEqual(new_data.domain.class_var, self.data.domain.class_var)
-        
-        
-        
-        
-        
-    
+
+    def test_select_features_subset(self):
+        data = Orange.data.Table("lenses")
+
+        d1 = selection._select_features_subset(data, [])
+        self.assertSequenceEqual(d1.domain.features, [])
+        self.assertIs(d1.domain.class_var, data.domain.class_var)
+
+        d1 = selection._select_features_subset(data, [data.domain[0]])
+        self.assertSequenceEqual(d1.domain.features, [data.domain[0]])
+        self.assertIs(d1.domain.class_var, data.domain.class_var)
+
+        domain = Orange.data.Domain(data.domain.features[:2],
+                                    data.domain.class_var,
+                                    class_vars=[data.domain.features[2]])
+        domain.add_metas({-1, data.domain.features[3]})
+        data = Orange.data.Table(domain, data)
+
+        d1 = selection._select_features_subset(data, [data.domain[0]])
+        self.assertSequenceEqual(d1.domain.features, [data.domain[0]])
+        self.assertIs(d1.domain.class_var, data.domain.class_var)
+        self.assertSequenceEqual(d1.domain.class_vars, data.domain.class_vars)
+        self.assertEqual(d1.domain.get_metas(), data.domain.get_metas())
+
 if __name__ == "__main__":
     unittest.main()
-    

docs/reference/rst/Orange.feature.selection.rst

 The module also includes a learner that incorporates feature subset
 selection.
 
+
+.. versionadded:: 2.7.1
+   `select`, `select_above_threshold` and `select_relief` now preserve
+   the domain's meta attributes and `class_vars`.
+
 --------------------------------------
 Functions for feature subset selection
 --------------------------------------