Commits

Aleš Erjavec  committed 356c325

Removed 'Earth' code from Orange (moved to 'orangecontrib.earth' package).

  • Participants
  • Parent commits 030195b

Comments (0)

Files changed (22)

File Orange/OrangeWidgets/Data/OWRank.py

 
 import Orange
 from Orange.feature import scoring
-from Orange.regression import earth
 from Orange.classification import svm
 from Orange.ensemble import forest
 
 
 
 MEASURE_PARAMS = {
-    earth.ScoreEarthImportance: [
-        {"name": "t",
-         "type": int,
-         "display_name": "Num. models.",
-         "range": (1, 20),
-         "default": 10,
-         "doc": "Number of models to train for feature scoring."},
-        {"name": "terms",
-         "type": int,
-         "display_name": "Max. num of terms",
-         "range": (3, 200),
-         "default": 10,
-         "doc": "Maximum number of terms in the forward pass"},
-        {"name": "degree",
-         "type": int,
-         "display_name": "Max. term degree",
-         "range": (1, 3),
-         "default": 2,
-         "doc": "Maximum degree of terms included in the model."}
-    ],
     scoring.Relief: [
         {"name": "k",
          "type": int,
     score_meta(
         "Random Forests", "RF", forest.ScoreFeature,
         params=MEASURE_PARAMS[forest.ScoreFeature]),
-    score_meta(
-        "Earth Importance", "Earth imp.", earth.ScoreEarthImportance,
-        params=MEASURE_PARAMS[earth.ScoreEarthImportance],
-    )
 ]
 
 _DEFAULT_SELECTED = set(m.name for m in SCORES[:6])

File Orange/OrangeWidgets/Regression/OWEarth.py

-"""
-<name>Earth</name>
-<description>Multivariate Adaptive Regression Splines (MARS)</description>
-<category>Regression</category>
-<icon>icons/EarthMars.svg</icon>
-<priority>100</priority>
-<tags>MARS, Multivariate, Adaptive, Regression, Splines</tags>
-"""
-
-from OWWidget import *
-import OWGUI
-import Orange
-
-from Orange.regression import earth
-from orngWrap import PreprocessedLearner
-
-
-class OWEarth(OWWidget):
-    settingsList = ["name", "degree", "terms", "penalty"]
-
-    def __init__(self, parent=None, signalManager=None,
-                 title="Earth"):
-        OWWidget.__init__(self, parent, signalManager, title,
-                          wantMainArea=False)
-
-        self.inputs = [("Data", Orange.data.Table, self.set_data),
-                       ("Preprocessor", PreprocessedLearner,
-                        self.set_preprocessor)]
-
-        self.outputs = [("Learner", earth.EarthLearner, Default),
-                        ("Predictor", earth.EarthClassifier, Default),
-                        ("Basis Matrix", Orange.data.Table)]
-
-        self.name = "Earth Learner"
-        self.degree = 1
-        self.terms = 21
-        self.penalty = 2
-
-        self.loadSettings()
-
-        #####
-        # GUI
-        #####
-
-        OWGUI.lineEdit(self.controlArea, self, "name",
-                       box="Learner/Classifier Name",
-                       tooltip="Name for the learner/predictor")
-
-        box = OWGUI.widgetBox(self.controlArea, "Forward Pass", addSpace=True)
-        OWGUI.spin(box, self, "degree", 1, 3, step=1,
-                   label="Max. term degree",
-                   tooltip="Maximum degree of the terms derived "
-                           "(number of hinge functions).")
-        s = OWGUI.spin(box, self, "terms", 1, 200, step=1,
-                       label="Max. terms",
-                       tooltip="Maximum number of terms derived in the "
-                               "forward pass.")
-        s.control.setSpecialValueText("Automatic")
-
-        box = OWGUI.widgetBox(self.controlArea, "Pruning Pass", addSpace=True)
-        OWGUI.doubleSpin(box, self, "penalty", min=0.0, max=10.0, step=0.25,
-                   label="Knot penalty")
-
-        OWGUI.button(self.controlArea, self, "&Apply",
-                     callback=self.apply)
-
-        self.data = None
-        self.preprocessor = None
-        self.resize(300, 200)
-
-        self.apply()
-
-    def set_data(self, data=None):
-        self.data = data
-
-    def set_preprocessor(self, pproc=None):
-        self.preprocessor = pproc
-
-    def handleNewSignals(self):
-        self.apply()
-
-    def apply(self):
-        learner = earth.EarthLearner(
-            degree=self.degree,
-            terms=self.terms if self.terms >= 2 else None,
-            penalty=self.penalty,
-            name=self.name)
-
-        predictor = None
-        basis_matrix = None
-        if self.preprocessor:
-            learner = self.preprocessor.wrapLearner(learner)
-
-        self.error(0)
-        if self.data is not None:
-            try:
-                predictor = learner(self.data)
-                predictor.name = self.name
-            except Exception, ex:
-                self.error(0, "An error during learning: %r" % ex)
-
-            if predictor is not None:
-                base_features = predictor.base_features()
-                basis_domain = Orange.data.Domain(
-                    base_features,
-                    self.data.domain.class_var,
-                    self.data.domain.class_vars)
-                basis_domain.add_metas(self.data.domain.get_metas())
-                basis_matrix = Orange.data.Table(basis_domain, self.data)
-
-        self.send("Learner", learner)
-        self.send("Predictor", predictor)
-        self.send("Basis Matrix", basis_matrix)
-
-    def sendReport(self):
-        self.reportSettings(
-            "Learning parameters",
-            [("Degree", self.degree),
-             ("Terms", self.terms if self.terms >= 2 else "Automatic"),
-             ("Knot penalty", "%.2f" % self.penalty)
-             ])
-
-        self.reportData(self.data)
-
-if __name__ == "__main__":
-    app = QApplication(sys.argv)
-    w = OWEarth()
-    w.set_data(Orange.data.Table("auto-mpg"))
-    w.show()
-    app.exec_()
-    w.saveSettings()

File Orange/__init__.py

 
 _import("regression")
 _import("regression.base")
-_import("regression.earth")
 _import("regression.lasso")
 _import("regression.linear")
 _import("regression.mean")

File Orange/regression/earth.py

-"""\
-====================================================
-Multivariate Adaptive Regression Splines (``earth``)
-====================================================
-
-.. index:: regression, linear model
-
-`Multivariate adaptive regression splines (MARS)`_ is a non-parametric
-regression method that extends a linear model with non-linear
-interactions.
-
-This module borrows the implementation of the technique from the `Earth R
-package`_ by Stephen Milborrow.
-
-.. _`Multivariate adaptive regression splines (MARS)`:
-        http://en.wikipedia.org/wiki/Multivariate_adaptive_regression_splines
-
-.. _`Earth R package`: http://cran.r-project.org/web/packages/earth/index.html
-
-Example ::
-
-    >>> import Orange
-    >>> data = Orange.data.Table("housing")
-    >>> c = Orange.regression.earth.EarthLearner(data, degree=2, terms=10)
-    >>> print c
-    MEDV =
-       23.587
-       +11.896 * max(0, RM - 6.431)
-       +1.142 * max(0, 6.431 - RM)
-       -0.612 * max(0, LSTAT - 6.120)
-       -228.795 * max(0, NOX - 0.647) * max(0, RM - 6.431)
-       +0.023 * max(0, TAX - 307.000) * max(0, 6.120 - LSTAT)
-       +0.029 * max(0, 307.000 - TAX) * max(0, 6.120 - LSTAT)
-
-
-.. autoclass:: EarthLearner
-    :members:
-
-.. autoclass:: EarthClassifier
-    :members:
-
-
-Utility functions
------------------
-
-.. autofunction:: gcv
-
-.. autofunction:: plot_evimp
-
-.. autofunction:: bagged_evimp
-
-.. autoclass:: ScoreEarthImportance
-
-"""
-
-import Orange
-from Orange.feature import Discrete, Continuous
-from Orange.data import Table, Domain
-from Orange.data.preprocess import DomainContinuizer
-
-import numpy
-
-
-def is_discrete(var):
-    return isinstance(var, Discrete)
-
-
-def is_continuous(var):
-    return isinstance(var, Continuous)
-
-
-def expand_discrete(var):
-    """ Expand a discrete variable ``var`` returning one continuous indicator
-    variable for each value of ``var`` (if the number of values is grater
-    then 2 else return only one indicator variable).
-
-    """
-    if len(var.values) > 2:
-        values = var.values
-    elif len(var.values) == 2:
-        values = var.values[-1:]
-    else:
-        values = var.values[:1]
-    new_vars = []
-    for value in values:
-        new = Continuous("{0}={1}".format(var.name, value))
-        new.get_value_from = cls = Orange.core.ClassifierFromVar(whichVar=var)
-        cls.transformer = Orange.core.Discrete2Continuous()
-        cls.transformer.value = int(Orange.core.Value(var, value))
-        new.source_variable = var
-        new_vars.append(new)
-    return new_vars
-
-
-def select_attrs(table, features, class_var=None,
-                 class_vars=None, metas=None):
-    """ Select only ``attributes`` from the ``table``.
-    """
-    if class_vars is None:
-        domain = Domain(features, class_var)
-    else:
-        domain = Domain(features, class_var, class_vars=class_vars)
-    if metas:
-        domain.add_metas(metas)
-    return Table(domain, table)
-
-
-class EarthLearner(Orange.regression.base.BaseRegressionLearner):
-    """Earth learner class. Supports both regression and classification
-    problems. For classification, class values are expanded into
-    continuous indicator columns (one for each value if the number of
-    values is grater then 2), and a multi response model is fit to these
-    new columns. The resulting classifier the computes response
-    values on new instances to select the final predicted class.
-
-    """
-    def __new__(cls, instances=None, weight_id=None, **kwargs):
-        self = Orange.regression.base.BaseRegressionLearner.__new__(cls)
-        if instances is not None:
-            self.__init__(**kwargs)
-            return self.__call__(instances, weight_id)
-        else:
-            return self
-
-    def __init__(self, degree=1, terms=21, penalty=None, thresh=1e-3,
-                 min_span=0, new_var_penalty=0, fast_k=20, fast_beta=1,
-                 pruned_terms=None, scale_resp=True, store_instances=True,
-                **kwds):
-        """Initialize the learner instance.
-
-        :param degree: Maximum degree (num. of hinge functions per term)
-            of the terms in the model (default: 1).
-        :type degree: int
-        :param terms: Maximum number of terms in the forward pass
-                (default: 21).  If set to ``None``, ``min(200, max(20, 2
-                * n_attributes)) + 1`` will be used, like the default
-                setting in earth R package.
-        :type terms: int
-        :param penalty: Penalty for hinges in the GCV computation (used
-            in the pruning pass). Default is 3.0 if ``degree`` is above 1,
-            and 2.0 otherwise.
-        :type penalty: float
-        :param thresh: Threshold for RSS decrease in the forward pass
-            (default: 0.001).
-        :type thresh: float
-        :param min_span: TODO.
-        :param new_var_penalty: Penalty for introducing a new variable
-            in the model during the forward pass (default: 0).
-        :type new_var_penalty: float
-        :param fast_k: Fast k.
-        :param fast_beta: Fast beta.
-        :param pruned_terms: Maximum number of terms in the model after
-            pruning (default: ``None``, no limit).
-        :type pruned_terms: int
-        :param scale_resp: Scale responses prior to forward pass (default:
-            ``True``); ignored for models with multiple responses.
-        :type scale_resp: bool
-        :param store_instances: Store training instances in the model
-            (default: ``True``).
-        :type store_instances: bool
-
-        .. todo:: min_span, prunning_method (need Leaps like functionality,
-            currently only eval_subsets_using_xtx is implemented).
-
-        """
-
-        super(EarthLearner, self).__init__()
-
-        self.degree = degree
-        self.terms = terms
-        if penalty is None:
-            penalty = 3 if degree > 1 else 2
-        self.penalty = penalty
-        self.thresh = thresh
-        self.min_span = min_span
-        self.new_var_penalty = new_var_penalty
-        self.fast_k = fast_k
-        self.fast_beta = fast_beta
-        self.pruned_terms = pruned_terms
-        self.scale_resp = scale_resp
-        self.store_instances = store_instances
-        self.__dict__.update(kwds)
-
-        self.continuizer.class_treatment = DomainContinuizer.Ignore
-
-    def __call__(self, instances, weight_id=None):
-        expanded_class = None
-        multitarget = False
-
-        if instances.domain.class_var:
-            instances = self.impute_table(instances)
-            instances = self.continuize_table(instances)
-
-            if is_discrete(instances.domain.class_var):
-                # Expand a discrete class with indicator columns
-                expanded_class = expand_discrete(instances.domain.class_var)
-                y_table = select_attrs(instances, expanded_class)
-                (y, ) = y_table.to_numpy_MA("A")
-                (x, ) = instances.to_numpy_MA("A")
-            elif is_continuous(instances.domain.class_var):
-                x, y, _ = instances.to_numpy_MA()
-                y = y.reshape((-1, 1))
-            else:
-                raise ValueError("Cannot handle the response.")
-        elif instances.domain.class_vars:
-            # Multi-target domain
-            if not all(map(is_continuous, instances.domain.class_vars)):
-                raise TypeError("Only continuous multi-target classes are supported.")
-            x_table = select_attrs(instances, instances.domain.attributes)
-            y_table = select_attrs(instances, instances.domain.class_vars)
-
-            # Impute and continuize only the x_table
-            x_table = self.impute_table(x_table)
-            x_table = self.continuize_table(x_table)
-            domain = Domain(x_table.domain.attributes,
-                            class_vars=instances.domain.class_vars)
-
-            (x, ) = x_table.to_numpy_MA("A")
-            (y, ) = y_table.to_numpy_MA("A")
-
-            multitarget = True
-        else:
-            raise ValueError("Class variable expected.")
-
-        # check for non-finite values in y.
-        if not numpy.isfinite(y).all():
-            raise ValueError("Non-finite values present in Y")
-
-        # mask non-finite values in x.
-        x = numpy.ma.masked_invalid(x, copy=False)
-
-        if self.scale_resp and y.shape[1] == 1:
-            sy = y - numpy.ma.mean(y, axis=0)
-            sy = sy / numpy.ma.std(sy, axis=0)
-        else:
-            sy = y
-
-        # replace masked values with means.
-        if numpy.ma.is_masked(sy):
-            mean_sy = numpy.ma.mean(sy, axis=0)
-            sy = numpy.where(sy.mask, mean_sy, sy)
-
-        if numpy.ma.is_masked(x):
-            mean_x = numpy.ma.mean(x, axis=0)
-            x = numpy.where(x.mask, mean_x, x)
-
-        terms = self.terms
-        if terms is None:
-            # Automatic maximum number of terms
-            terms = min(200, max(20, 2 * x.shape[1])) + 1
-
-        n_terms, used, bx, dirs, cuts = forward_pass(x, sy,
-            degree=self.degree, terms=terms, penalty=self.penalty,
-            thresh=self.thresh, fast_k=self.fast_k, fast_beta=self.fast_beta,
-            new_var_penalty=self.new_var_penalty)
-
-        # discard unused terms from bx, dirs, cuts
-        bx = bx[:, used]
-        dirs = dirs[used, :]
-        cuts = cuts[used, :]
-
-        # pruning
-        used, subsets, rss_per_subset, gcv_per_subset = \
-            pruning_pass(bx, y, self.penalty,
-                         pruned_terms=self.pruned_terms)
-
-        # Fit betas
-        bx_used = bx[:, used]
-        betas, res, rank, s = numpy.linalg.lstsq(bx_used, y)
-
-        return EarthClassifier(instances.domain, used, dirs, cuts, betas.T,
-                               subsets, rss_per_subset, gcv_per_subset,
-                               instances=instances if self.store_instances else None,
-                               multitarget=multitarget,
-                               expanded_class=expanded_class
-                               )
-
-
-def soft_max(values):
-    values = numpy.asarray(values)
-    return numpy.exp(values) / numpy.sum(numpy.exp(values))
-
-
-class EarthClassifier(Orange.core.ClassifierFD):
-    """ Earth classifier.
-    """
-    def __init__(self, domain, best_set, dirs, cuts, betas, subsets=None,
-                 rss_per_subset=None, gcv_per_subset=None, instances=None,
-                 multitarget=False, expanded_class=None,
-                 original_domain=None, **kwargs):
-        self.multitarget = multitarget
-        self.domain = domain
-        self.class_var = domain.class_var
-        if self.multitarget:
-            self.class_vars = domain.class_vars
-
-        self.best_set = best_set
-        self.dirs = dirs
-        self.cuts = cuts
-        self.betas = betas
-        self.subsets = subsets
-        self.rss_per_subset = rss_per_subset
-        self.gcv_per_subset = gcv_per_subset
-        self.instances = instances
-        self.expanded_class = expanded_class
-        self.original_domain = original_domain
-        self.__dict__.update(kwargs)
-
-    def __call__(self, instance, result_type=Orange.core.GetValue):
-        if self.multitarget and self.domain.class_vars:
-            resp_vars = list(self.domain.class_vars)
-        elif is_discrete(self.class_var):
-            resp_vars = self.expanded_class
-        else:
-            resp_vars = [self.class_var]
-
-        vals = self.predict(instance)
-        vals = [var(val) for var, val in zip(resp_vars, vals)]
-
-        from Orange.statistics.distribution import Distribution
-
-        if not self.multitarget and is_discrete(self.class_var):
-            dist = Distribution(self.class_var)
-            if len(self.class_var.values) == 2:
-                probs = [1 - float(vals[0]), float(vals[0])]
-            else:
-                probs = soft_max(map(float, vals))
-
-            for val, p in zip(self.class_var.values, probs):
-                dist[val] = p
-            value = dist.modus()
-            vals, probs = [value], [dist]
-        else:
-            probs = []
-            for var, val in zip(resp_vars, vals):
-                dist = Distribution(var)
-                dist[val] = 1.0
-                probs.append(dist)
-
-        if not self.multitarget:
-            vals, probs = vals[0], probs[0]
-
-        if result_type == Orange.core.GetValue:
-            return vals
-        elif result_type == Orange.core.GetBoth:
-            return vals, probs
-        else:
-            return probs
-
-    def base_matrix(self, instances=None):
-        """Return the base matrix (bx) of the Earth model for the table.
-        If table is not supplied, the base matrix of the training instances
-        is returned.
-        Base matrix is a len(instances) x num_terms matrix of computed values
-        of terms in the model (not multiplied by beta) for each instance.
-
-        :param instances: Input instances for the base matrix.
-        :type instances: :class:`Orange.data.Table`
-
-        """
-        if instances is None:
-            instances = self.instances
-        instances = select_attrs(instances, self.domain.attributes)
-        (data,) = instances.to_numpy_MA("A")
-        bx = base_matrix(data, self.best_set, self.dirs, self.cuts)
-        return bx
-
-    def base_features(self):
-        """Return a list of features for the included Earth terms.
-        The attributes can be used in Orange's domain translation
-        (i.e. they define the proper ``get_value_from`` functions).
-
-        """
-        terms = []
-        dirs = self.dirs[self.best_set]
-        cuts = self.cuts[self.best_set]
-        # For faster domain translation all the features share
-        # this _instance_cache.
-        _instance_cache = {}
-        for dir, cut in zip(dirs[1:], cuts[1:]):  # Drop the intercept (first column)
-            hinge = [_format_knot(self, attr.name, dir1, cut1) \
-                     for (attr, dir1, cut1) in \
-                     zip(self.domain.attributes, dir, cut) \
-                     if dir1 != 0.0]
-            term_name = " * ".join(hinge)
-            term = Orange.feature.Continuous(term_name)
-            term.get_value_from = term_computer(
-                term, self.domain, dir, cut,
-                _instance_cache=_instance_cache
-            )
-
-            terms.append(term)
-        return terms
-
-    def predict(self, instance):
-        """ Predict the response value(s)
-
-        :param instance: Data instance
-        :type instance: :class:`Orange.data.Instance`
-
-        """
-        data = Orange.data.Table(self.domain, [instance])
-        bx = self.base_matrix(data)
-        bx_used = bx[:, self.best_set]
-        vals = numpy.dot(bx_used, self.betas.T).ravel()
-        return vals
-
-    def used_attributes(self, term=None):
-        """Return the used terms for term (index). If no term is given,
-        return all attributes in the model.
-
-        :param term: term index
-        :type term: int
-
-        """
-        if term is None:
-            return reduce(set.union, [self.used_attributes(i) \
-                                      for i in range(self.best_set.size)],
-                          set())
-
-        attrs = self.domain.attributes
-
-        used_mask = self.dirs[term, :] != 0.0
-        return [a for a, u in zip(attrs, used_mask) if u]
-
-    def evimp(self, used_only=True):
-        """ Return the estimated variable importances.
-
-        :param used_only: if True return only used attributes
-
-        """
-        return evimp(self, used_only)
-
-    def __reduce__(self):
-        return (type(self), (self.domain, self.best_set, self.dirs,
-                            self.cuts, self.betas),
-                dict(self.__dict__))
-
-    def to_string(self, percision=3, indent=3):
-        """ Return a string representation of the model.
-        """
-        return format_model(self, percision, indent)
-
-    def __str__(self):
-        return self.to_string()
-
-"""
-Utility functions
------------------
-"""
-
-
-def base_matrix(data, best_set, dirs, cuts):
-    """ Return the base matrix for the earth model.
-
-    :param data: Input data
-    :type data: :class:`numpy.ndarray`
-
-    :param best_set: A array of booleans indicating used terms.
-    :type best_set: :class:`numpy.ndarray`
-
-    :param dirs: Earth model's dirs members
-    :type dirs: :class:`numpy.ndarray`
-
-    :param cuts: Earth model's cuts members
-    :type cuts: :class:`numpy.ndarray`
-
-    """
-    data = numpy.asarray(data)
-    best_set = numpy.asarray(best_set)
-    dirs = numpy.asarray(dirs)
-    cuts = numpy.asarray(cuts)
-
-    bx = numpy.zeros((data.shape[0], best_set.shape[0]))
-    bx[:, 0] = 1.0  # The intercept
-    for termi in range(1, best_set.shape[0]):
-        term_dirs = dirs[termi]
-        term_cuts = cuts[termi]
-
-        dir_p1 = numpy.where(term_dirs == 1)[0]
-        dir_m1 = numpy.where(term_dirs == -1)[0]
-        dir_2 = numpy.where(term_dirs == 2)[0]
-
-        x1 = data[:, dir_p1] - term_cuts[dir_p1]
-        x2 = term_cuts[dir_m1] - data[:, dir_m1]
-        x3 = data[:, dir_2]
-
-        x1 = numpy.where(x1 > 0.0, x1, 0.0)
-        x2 = numpy.where(x2 > 0.0, x2, 0.0)
-
-        X = numpy.hstack((x1, x2, x3))
-        X = numpy.cumprod(X, axis=1)
-        bx[:, termi] = X[:, -1] if X.size else 0.0
-
-    return bx
-
-
-def gcv(rss, n, n_effective_params):
-    """ Return the generalized cross validation.
-
-    .. math:: gcv = rss / (n * (1 - NumEffectiveParams / n) ^ 2)
-
-    :param rss: Residual sum of squares.
-    :param n: Number of training instances.
-    :param n_effective_params: Number of effective paramaters.
-
-    """
-    return  rss / (n * (1. - n_effective_params / n) ** 2)
-
-
-class term_computer(Orange.core.ClassifierFD):
-    """An utility class for computing basis terms. Can be used as
-    a :obj:`~Orange.feature.Descriptior.get_value_from` member.
-
-    """
-    def __init__(self, term_var=None, domain=None, dir=None, cut=None,
-                 _instance_cache=None):
-        self.class_var = term_var
-        self.domain = domain
-
-        self.dir = dir
-        self.cut = cut
-
-        if dir is not None:
-            self.mask = self.dir != 0
-            self.masked_dir = self.dir[self.mask]
-            self.masked_cut = self.cut[self.mask]
-        else:
-            # backcompat. with old pickled format.
-            self.mask = self.masked_dir = self.masked_cut = None
-
-        self._instance_cache = _instance_cache
-
-    def __call__(self, instance, return_what=Orange.core.GetValue):
-        instance = self._instance_as_masked_array(instance)
-
-        if self.mask is None:
-            self.mask = self.dir != 0
-            self.masked_dir = self.dir[self.mask]
-            self.masked_cut = self.cut[self.mask]
-
-        values = instance[self.mask]
-        if numpy.ma.is_masked(values):
-            # Can't compute the term.
-            return self.class_var("?")
-
-        # Works faster with plain arrays
-        values = numpy.array(values)
-        values -= self.masked_cut
-        values *= self.masked_dir
-
-        values[values < 0] = 0
-        value = numpy.prod(values)
-
-        return self.class_var(value)
-
-    def _instance_as_masked_array(self, instance):
-        array = None
-        if self._instance_cache is not None:
-            array = self._instance_cache.get(instance, None)
-
-        if array is None:
-            table = Orange.data.Table(self.domain, [instance])
-            (array,) = table.to_numpy_MA("A")
-            array = array[0]
-
-            if self._instance_cache is not None:
-                self._instance_cache.clear()
-                self._instance_cache[instance] = array
-        return array
-
-    def __reduce__(self):
-        return (type(self), (self.class_var, self.domain, self.dir, self.cut),
-                dict(self.__dict__.items()))
-
-
-"""
-Multi-label utility functions
-"""
-
-
-"""
-ctypes interface to ForwardPass and EvalSubsetsUsingXtx.
-"""
-
-import ctypes
-from numpy import ctypeslib
-import orange
-
-_c_orange_lib = ctypeslib.load_library(orange.__file__, "")
-_c_forward_pass_ = _c_orange_lib.EarthForwardPass
-
-_c_forward_pass_.argtypes = \
-    [ctypes.POINTER(ctypes.c_int),  # pnTerms:
-     ctypeslib.ndpointer(dtype=ctypes.c_bool, ndim=1),  # FullSet
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=2, flags="F_CONTIGUOUS"), # bx
-     ctypeslib.ndpointer(dtype=ctypes.c_int, ndim=2, flags="F_CONTIGUOUS"),    # Dirs
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=2, flags="F_CONTIGUOUS"), # Cuts
-     ctypeslib.ndpointer(dtype=ctypes.c_int, ndim=1),  # nFactorsInTerms
-     ctypeslib.ndpointer(dtype=ctypes.c_int, ndim=1),  # nUses
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=2, flags="F_CONTIGUOUS"), # x
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=2, flags="F_CONTIGUOUS"), # y
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=1), # Weights
-     ctypes.c_int,  # nCases
-     ctypes.c_int,  # nResp
-     ctypes.c_int,  # nPred
-     ctypes.c_int,  # nMaxDegree
-     ctypes.c_int,  # nMaxTerms
-     ctypes.c_double,   # Penalty
-     ctypes.c_double,   # Thresh
-     ctypes.c_int,  # nFastK
-     ctypes.c_double,   # FastBeta
-     ctypes.c_double,   # NewVarPenalty
-     ctypeslib.ndpointer(dtype=ctypes.c_int, ndim=1),  # LinPreds
-     ctypes.c_bool, # UseBetaCache
-     ctypes.c_char_p    # sPredNames
-     ]
-
-
-def forward_pass(x, y, degree=1, terms=21, penalty=None, thresh=0.001,
-                  fast_k=21, fast_beta=1, new_var_penalty=2):
-    """ Do earth forward pass.
-    """
-    x = numpy.asfortranarray(x, dtype=ctypes.c_double)
-    y = numpy.asfortranarray(y, dtype=ctypes.c_double)
-    if x.shape[0] != y.shape[0]:
-        raise ValueError("First dimensions of x and y must be the same.")
-    if y.ndim == 1:
-        y = y.reshape((-1, 1), order="F")
-    if penalty is None:
-        penalty = 2
-    n_cases = x.shape[0]
-    n_preds = x.shape[1]
-
-    n_resp = y.shape[1] if y.ndim == 2 else y.shape[0]
-
-    # Output variables
-    n_term = ctypes.c_int()
-    full_set = numpy.zeros((terms,), dtype=ctypes.c_bool, order="F")
-    bx = numpy.zeros((n_cases, terms), dtype=ctypes.c_double, order="F")
-    dirs = numpy.zeros((terms, n_preds), dtype=ctypes.c_int, order="F")
-    cuts = numpy.zeros((terms, n_preds), dtype=ctypes.c_double, order="F")
-    n_factors_in_terms = numpy.zeros((terms,), dtype=ctypes.c_int, order="F")
-    n_uses = numpy.zeros((n_preds,), dtype=ctypes.c_int, order="F")
-    weights = numpy.ones((n_cases,), dtype=ctypes.c_double, order="F")
-    lin_preds = numpy.zeros((n_preds,), dtype=ctypes.c_int, order="F")
-    use_beta_cache = True
-
-    # These tests are performed in ForwardPass, and if they fail the function
-    # calls exit. So we must check it here and raise a exception to avoid a
-    # process shutdown.
-    if n_cases < 8:
-        raise ValueError("Need at least 8 data instances.")
-    if n_cases > 1e8:
-        raise ValueError("To many data instances.")
-    if n_resp < 1:
-        raise ValueError("No response column.")
-    if n_resp > 1e6:
-        raise ValueError("To many response columns.")
-    if n_preds < 1:
-        raise ValueError("No predictor columns.")
-    if n_preds > 1e5:
-        raise ValueError("To many predictor columns.")
-    if degree <= 0 or degree > 100:
-        raise ValueError("Invalid 'degree'.")
-    if terms < 3 or terms > 10000:
-        raise ValueError("'terms' must be in >= 3 and <= 10000.")
-    if penalty < 0 and penalty != -1:
-        raise ValueError("Invalid 'penalty' (the only legal negative value is -1).")
-    if penalty > 1000:
-        raise ValueError("Invalid 'penalty' (must be <= 1000).")
-    if thresh < 0.0 or thresh >= 1.0:
-        raise ValueError("Invalid 'thresh' (must be in [0.0, 1.0) ).")
-    if fast_beta < 0 or fast_beta > 1000:
-        raise ValueError("Invalid 'fast_beta' (must be in [0, 1000] ).")
-    if new_var_penalty < 0 or new_var_penalty > 10:
-        raise ValueError("Invalid 'new_var_penalty' (must be in [0, 10] ).")
-    if (numpy.var(y, axis=0) <= 1e-8).any():
-        raise ValueError("Variance of y is zero (or near zero).")
-
-    _c_forward_pass_(ctypes.byref(n_term), full_set, bx, dirs, cuts,
-                     n_factors_in_terms, n_uses, x, y, weights, n_cases,
-                     n_resp, n_preds, degree, terms, penalty, thresh,
-                     fast_k, fast_beta, new_var_penalty, lin_preds,
-                     use_beta_cache, None)
-    return n_term.value, full_set, bx, dirs, cuts
-
-
-_c_eval_subsets_xtx = _c_orange_lib.EarthEvalSubsetsUsingXtx
-
-_c_eval_subsets_xtx.argtypes = \
-    [ctypeslib.ndpointer(dtype=ctypes.c_bool, ndim=2, flags="F_CONTIGUOUS"),   # PruneTerms
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=1),   # RssVec
-     ctypes.c_int,  # nCases
-     ctypes.c_int,  # nResp
-     ctypes.c_int,  # nMaxTerms
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=2, flags="F_CONTIGUOUS"),  # bx
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=2, flags="F_CONTIGUOUS"),  # y
-     ctypeslib.ndpointer(dtype=ctypes.c_double, ndim=1)  # WeightsArg
-     ]
-
-_c_eval_subsets_xtx.restype = ctypes.c_int
-
-
-def subset_selection_xtx(X, Y):
-    """ Subsets selection using EvalSubsetsUsingXtx in the Earth package.
-    """
-    X = numpy.asfortranarray(X, dtype=ctypes.c_double)
-    Y = numpy.asfortranarray(Y, dtype=ctypes.c_double)
-    if Y.ndim == 1:
-        Y = Y.reshape((-1, 1), order="F")
-
-    if X.shape[0] != Y.shape[0]:
-        raise ValueError("First dimensions of bx and y must be the same")
-
-    var_count = X.shape[1]
-    resp_count = Y.shape[1]
-    cases = X.shape[0]
-    subsets = numpy.zeros((var_count, var_count), dtype=ctypes.c_bool,
-                              order="F")
-    rss_vec = numpy.zeros((var_count,), dtype=ctypes.c_double, order="F")
-    weights = numpy.ones((cases,), dtype=ctypes.c_double, order="F")
-
-    rval = _c_eval_subsets_xtx(subsets, rss_vec, cases, resp_count, var_count,
-                        X, Y, weights)
-    if rval == 1:
-        raise numpy.linalg.LinAlgError("Lin. dep. terms in X")
-    elif rval == 2:
-        raise Exception("Trying to prune the intercept.")
-    elif rval != 0:
-        raise Exception("Error %i" % rval)
-
-    subsets_ind = numpy.zeros((var_count, var_count), dtype=int)
-    for i, used in enumerate(subsets.T):
-        subsets_ind[i, :i + 1] = numpy.where(used)[0]
-
-    return subsets_ind, rss_vec
-
-
-def subset_selection_xtx_numpy(X, Y):
-    """ A numpy implementation of EvalSubsetsUsingXtx in the Earth package.
-    Using numpy.linalg.lstsq
-
-    """
-    X = numpy.asarray(X)
-    Y = numpy.asarray(Y)
-
-    var_count = X.shape[1]
-    rss_vec = numpy.zeros(var_count)
-    working_set = range(var_count)
-    subsets = numpy.zeros((var_count, var_count), dtype=int)
-
-    for subset_size in reversed(range(var_count)):
-        subsets[subset_size, :subset_size + 1] = working_set
-        X_work = X[:, working_set]
-        b, res, rank, s = numpy.linalg.lstsq(X_work, Y)
-        if res.size > 0:
-            rss_vec[subset_size] = numpy.sum(res)
-        else:
-            rss_vec[subset_size] = numpy.sum((Y - numpy.dot(X_work, b)) ** 2)
-
-        XtX = numpy.dot(X_work.T, X_work)
-        iXtX = numpy.linalg.pinv(XtX)
-        diag = numpy.diag(iXtX).reshape((-1, 1))
-
-        if subset_size == 0:
-            break
-
-        delta_rss = b ** 2 / diag
-        delta_rss = numpy.sum(delta_rss, axis=1)
-        delete_i = numpy.argmin(delta_rss[1:]) + 1  # Keep the intercept
-        del working_set[delete_i]
-    return subsets, rss_vec
-
-
-def subset_selection_xtx2(X, Y):
-    """ Another implementation (this uses qr decomp).
-    """
-    from Orange.misc import linalg
-    X = numpy.asfortranarray(X, dtype=ctypes.c_double)
-    Y = numpy.asfortranarray(Y, dtype=ctypes.c_double)
-    col_count = X.shape[1]
-    working_set = range(col_count)
-    subsets = numpy.zeros((col_count, col_count), dtype=int)
-    rss_vec = numpy.zeros((col_count,))
-    QR, k, _, jpvt = linalg.qr_decomp(X)
-
-    if k < col_count:
-        # remove jpvt[k:] from the work set. Will have zero 
-        # entries in the subsets matrix, and inf rss
-        for i in sorted(jpvt[k:], reverse=True):
-            del working_set[i]
-            rss_vec[len(working_set)] = float("inf")
-        col_count = len(working_set)
-
-    for subset_size in reversed(range(col_count)):
-        subsets[subset_size, :subset_size + 1] = working_set
-        X_work = X[:, working_set]
-        b, rsd, rank = linalg.qr_lstsq(X_work, Y)
-        rss_vec[subset_size] = numpy.sum(rsd ** 2)
-        XtX = numpy.dot(X_work.T, X_work)
-        iXtX = numpy.linalg.pinv(XtX)
-        diag = numpy.diag(iXtX)
-
-        if subset_size == 0:
-            break
-
-        delta_rss = b ** 2 / diag
-        delete_i = numpy.argmin(delta_rss[1:]) + 1  # Keep the intercept
-        del working_set[delete_i]
-    return subsets, rss_vec
-
-
-def pruning_pass(bx, y, penalty, pruned_terms=-1):
-    """ Do pruning pass
-
-    .. todo:: pruned_terms, Leaps
-
-    """
-    try:
-        subsets, rss_vec = subset_selection_xtx(bx, y)
-    except numpy.linalg.LinAlgError:
-        subsets, rss_vec = subset_selection_xtx_numpy(bx, y)
-
-    cases, terms = bx.shape
-    n_effective_params = numpy.arange(terms) + 1.0
-    n_effective_params += penalty * (n_effective_params - 1.0) / 2.0
-
-    gcv_vec = gcv(rss_vec, cases, n_effective_params)
-
-    min_i = numpy.argmin(gcv_vec)
-    used = numpy.zeros((terms), dtype=bool)
-
-    used[subsets[min_i, :min_i + 1]] = True
-
-    return used, subsets, rss_vec, gcv_vec
-
-"""
-Printing functions.
-"""
-
-
-def format_model(model, percision=3, indent=3):
-    """ Return a formated string representation of the earth model.
-    """
-    if model.multitarget:
-        r_vars = list(model.domain.class_vars)
-    elif is_discrete(model.class_var):
-        r_vars = model.expanded_class
-    else:
-        r_vars = [model.class_var]
-
-    r_names = [v.name for v in r_vars]
-    betas = model.betas
-
-    resp = []
-    for name, betas in zip(r_names, betas):
-        resp.append(_format_response(model, name, betas,
-                                     percision, indent))
-    return "\n\n".join(resp)
-
-
-def _format_response(model, resp_name, betas, percision=3, indent=3):
-    header = "%s =" % resp_name
-    indent = " " * indent
-    fmt = "%." + str(percision) + "f"
-    terms = [([], fmt % betas[0])]
-    beta_i = 0
-    for i, used in enumerate(model.best_set[1:], 1):
-        if used:
-            beta_i += 1
-            beta = fmt % abs(betas[beta_i])
-            knots = [_format_knot(model, attr.name, d, c, percision) \
-                     for d, c, attr in \
-                     zip(model.dirs[i], model.cuts[i], model.domain.attributes) \
-                     if d != 0]
-            term_attrs = [a for a, d in zip(model.domain.attributes, model.dirs[i]) \
-                          if d != 0]
-            term_attrs = sorted(term_attrs)
-            sign = "-" if betas[beta_i] < 0 else "+"
-            if knots:
-                terms.append((term_attrs,
-                              sign + " * ".join([beta] + knots)))
-            else:
-                terms.append((term_attrs, sign + beta))
-    # Sort by len(term_attrs), then by term_attrs
-    terms = sorted(terms, key=lambda t: (len(t[0]), t[0]))
-    return "\n".join([header] + [indent + t for _, t in terms])
-
-
-def _format_knot(model, name, dir, cut, percision=3):
-    fmt = "%%.%if" % percision
-    if dir == 1:
-        txt = ("max(0, %s - " + fmt + ")") % (name, cut)
-    elif dir == -1:
-        txt = ("max(0, " + fmt + " - %s)") % (cut, name)
-    elif dir == 2:
-        txt = name
-    return txt
-
-
-"""\
-Variable importance estimation
-------------------------------
-"""
-
-from collections import defaultdict
-
-
-def collect_source(vars):
-    """ Given a list of variables ``var``, return a mapping from source
-    variables (``source_variable`` or ``get_value_from.variable`` members)
-    back to the variables in ``vars`` (assumes the default preprocessor in
-    EarthLearner).
-
-    """
-    source = defaultdict(list)
-    for var in vars:
-        svar = None
-        if var.source_variable:
-            source[var.source_variable].append(var)
-        elif isinstance(var.get_value_from, Orange.core.ClassifierFromVar):
-            source[var.get_value_from.variable].append(var)
-        elif isinstance(var.get_value_from, Orange.core.ImputeClassifier):
-            source[var.get_value_from.classifier_from_var.variable].append(var)
-        else:
-            source[var].append(var)
-    return dict(source)
-
-
-def map_to_source_var(var, sources):
-    """
-    """
-    if var in sources:
-        return var
-    elif var.source_variable in sources:
-        return var.source_variable
-    elif isinstance(var.get_value_from, Orange.core.ClassifierFromVar):
-        return map_to_source_var(var.get_value_from.variable, sources)
-    elif isinstance(var.get_value_from, Orange.core.ImputeClassifier):
-        var = var.get_value_from.classifier_from_var.variable
-        return map_to_source_var(var, sources)
-    else:
-        return None
-
-
-def evimp(model, used_only=True):
-    """ Return the estimated variable importance for the model.
-
-    :param model: Earth model.
-    :type model: `EarthClassifier`
-
-    """
-    if model.subsets is None:
-        raise ValueError("No subsets. Use the learner with 'prune=True'.")
-
-    subsets = model.subsets
-    n_subsets = numpy.sum(model.best_set)
-
-    rss = -numpy.diff(model.rss_per_subset)
-    gcv = -numpy.diff(model.gcv_per_subset)
-    attributes = list(model.domain.variables)
-
-    attr2ind = dict(zip(attributes, range(len(attributes))))
-    importances = numpy.zeros((len(attributes), 4))
-    importances[:, 0] = range(len(attributes))
-
-    for i in range(1, n_subsets):
-        term_subset = subsets[i, :i + 1]
-        used_attributes = reduce(set.union, [model.used_attributes(term) \
-                                             for term in term_subset], set())
-        for attr in used_attributes:
-            importances[attr2ind[attr]][1] += 1.0
-            importances[attr2ind[attr]][2] += gcv[i - 1]
-            importances[attr2ind[attr]][3] += rss[i - 1]
-    imp_min = numpy.min(importances[:, [2, 3]], axis=0)
-    imp_max = numpy.max(importances[:, [2, 3]], axis=0)
-
-    #Normalize importances.
-    importances[:, [2, 3]] = 100.0 * (importances[:, [2, 3]] \
-                            - [imp_min]) / ([imp_max - imp_min])
-
-    importances = list(importances)
-    # Sort by n_subsets and gcv.
-    importances = sorted(importances, key=lambda row: (row[1], row[2]),
-                         reverse=True)
-    importances = numpy.array(importances)
-
-    if used_only:
-        importances = importances[importances[:, 1] > 0.0]
-
-    res = [(attributes[int(row[0])], tuple(row[1:])) for row in importances]
-    return res
-
-
-def plot_evimp(evimp):
-    """ Plot the variable importances as returned from
-    :obj:`EarthClassifier.evimp` call.
-
-    ::
-
-        import Orange
-        data = Orange.data.Table("housing")
-        c = Orange.regression.earth.EarthLearner(data, degree=3)
-        Orange.regression.earth.plot_evimp(c.evimp())
-
-    .. image:: files/earth-evimp.png
-
-    The left axis is the nsubsets measure and on the right are the normalized
-    RSS and GCV.
-
-    """
-    from Orange.ensemble.bagging import BaggedClassifier
-    if isinstance(evimp, EarthClassifier):
-        evimp = evimp.evimp()
-    elif isinstance(evimp, BaggedClassifier):
-        evimp = bagged_evimp(evimp)
-
-    import pylab
-    fig = pylab.figure()
-    axes1 = fig.add_subplot(111)
-    attrs = [a for a, _ in evimp]
-    imp = [s for _, s in evimp]
-    imp = numpy.array(imp)
-    X = range(len(attrs))
-    l1 = axes1.plot(X, imp[:, 0], "b-", label="nsubsets")
-    axes2 = axes1.twinx()
-
-    l2 = axes2.plot(X, imp[:, 1], "g-", label="gcv")
-    l3 = axes2.plot(X, imp[:, 2], "r-", label="rss")
-
-    x_axis = axes1.xaxis
-    x_axis.set_ticks(X)
-    x_axis.set_ticklabels([a.name for a in attrs], rotation=90)
-
-    axes1.yaxis.set_label_text("nsubsets")
-    axes2.yaxis.set_label_text("normalized gcv or rss")
-
-    axes1.legend((l1[0], l2[0], l3[0]), ("nsubsets", "gcv", "rss"))
-
-    axes1.set_title("Variable importance")
-    fig.show()
-
-
-def bagged_evimp(classifier, used_only=True):
-    """ Extract combined (average) evimp from an instance of BaggedClassifier
-
-    Example::
-
-        from Orange.ensemble.bagging import BaggedLearner
-        bc = BaggedLearner(EarthLearner(degree=3, terms=10), data)
-        bagged_evimp(bc)
-
-    """
-    def assert_type(object, class_):
-        if not isinstance(object, class_):
-            raise TypeError("Instance of %r expected." % (class_))
-
-    from Orange.ensemble.bagging import BaggedClassifier
-
-    assert_type(classifier, BaggedClassifier)
-    bagged_imp = defaultdict(list)
-    attrs_by_name = defaultdict(list)
-    for c in classifier.classifiers:
-        assert_type(c, EarthClassifier)
-        imp = evimp(c, used_only=used_only)
-        for attr, score in imp:
-            bagged_imp[attr.name].append(score)  # map by name
-            attrs_by_name[attr.name].append(attr)
-
-    for attr, scores in bagged_imp.items():
-        scores = numpy.average(scores, axis=0)
-        bagged_imp[attr] = tuple(scores)
-
-    bagged_imp = sorted(bagged_imp.items(),
-                        key=lambda t: (t[1][0], t[1][1]),
-                        reverse=True)
-
-    bagged_imp = [(attrs_by_name[name][0], scores) for name, scores in bagged_imp]
-
-    if used_only:
-        bagged_imp = [(a, r) for a, r in bagged_imp if r[0] > 0]
-    return bagged_imp
-
-"""
-High level interface for measuring variable importance
-(compatible with Orange.feature.scoring module).
-
-"""
-from Orange.feature import scoring
-
-
-class ScoreEarthImportance(scoring.Score):
-    """ A subclass of :class:`Orange.feature.scoring.Score` that.
-    scores features based on their importance in the Earth
-    model using ``bagged_evimp``.
-
-    """
-    # Return types
-    NSUBSETS = 0
-    RSS = 1
-    GCV = 2
-
-    handles_discrete = True
-    handles_continuous = True
-    computes_thresholds = False
-    needs = scoring.Score.Generator
-
-    def __new__(cls, attr=None, data=None, weight_id=None, **kwargs):
-        self = scoring.Score.__new__(cls)
-        if attr is not None and data is not None:
-            self.__init__(**kwargs)
-            # TODO: Should raise a warning, about caching
-            return self.__call__(attr, data, weight_id)
-        elif not attr and not data:
-            return self
-        else:
-            raise ValueError("Both 'attr' and 'data' arguments expected.")
-
-    def __init__(self, t=10, degree=2, terms=10, score_what="nsubsets",
-                 cached=True):
-        """
-        :param t: Number of earth models to train on the data
-            (using BaggedLearner).
-
-        :param score_what: What to return as a score.
-            Can be one of: "nsubsets", "rss", "gcv" or class constants
-            NSUBSETS, RSS, GCV.
-
-        """
-        self.t = t
-        self.degree = degree
-        self.terms = terms
-        if isinstance(score_what, basestring):
-            score_what = {"nsubsets": self.NSUBSETS, "rss": self.RSS,
-                          "gcv": self.GCV}.get(score_what, None)
-
-        if score_what not in range(3):
-            raise ValueError("Invalid  'score_what' parameter.")
-
-        self.score_what = score_what
-        self.cached = cached
-        self._cache_ref = None
-        self._cached_evimp = None
-
-    def __call__(self, attr, data, weight_id=None):
-        ref = self._cache_ref
-        if ref is not None and ref is data:
-            evimp = self._cached_evimp
-        else:
-            from Orange.ensemble.bagging import BaggedLearner
-            bc = BaggedLearner(EarthLearner(degree=self.degree,
-                            terms=self.terms), t=self.t)(data, weight_id)
-            evimp = bagged_evimp(bc, used_only=False)
-            self._cache_ref = data
-            self._cached_evimp = evimp
-
-        evimp = dict(evimp)
-        score = evimp.get(attr, None)
-
-        if score is None:
-            source = collect_source(evimp.keys())
-            if attr in source:
-                # Return average of source var scores
-                return numpy.average([evimp[v][self.score_what] \
-                                      for v in source[attr]])
-            else:
-                raise ValueError("Attribute %r not in the domain." % attr)
-        else:
-            return score[self.score_what]
-
-
-class ScoreRSS(scoring.Score):
-
-    handles_discrete = False
-    handles_continuous = True
-    computes_thresholds = False
-
-    def __new__(cls, attr=None, data=None, weight_id=None, **kwargs):
-        self = scoring.Score.__new__(cls)
-        if attr is not None and data is not None:
-            self.__init__(**kwargs)
-            # TODO: Should raise a warning, about caching
-            return self.__call__(attr, data, weight_id)
-        elif not attr and not data:
-            return self
-        else:
-            raise ValueError("Both 'attr' and 'data' arguments expected.")
-
-    def __init__(self):
-        self._cache_data = None
-        self._cache_rss = None
-
-    def __call__(self, attr, data, weight_id=None):
-        ref = self._cache_data
-        if ref is not None and ref is data:
-            rss = self._cache_rss
-        else:
-            x, y = data.to_numpy_MA("1A/c")
-            try:
-                subsets, rss = subset_selection_xtx2(x, y)
-            except numpy.linalg.LinAlgError:
-                subsets, rss = subset_selection_xtx_numpy(x, y)
-            rss_diff = -numpy.diff(rss)
-            rss = numpy.zeros_like(rss)
-            for s_size in range(1, subsets.shape[0]):
-                subset = subsets[s_size, :s_size + 1]
-                rss[subset] += rss_diff[s_size - 1]
-            rss = rss[1:]  # Drop the intercept
-            self._cache_data = data
-            self._cache_rss = rss
-
-        index = list(data.domain.attributes).index(attr)
-        return rss[index]

File Orange/testing/unit/tests/test_earth.py

-import Orange
-from Orange.testing import testing
-from Orange.testing.testing import datasets_driven, test_on_data
-from Orange.regression import earth
-import numpy
-
-try:
-    import unittest2 as unittest
-except:
-    import unittest
-
-
-@datasets_driven(datasets=testing.REGRESSION_DATASETS + \
-                 testing.CLASSIFICATION_DATASETS)
-class TestEarthLearner(testing.LearnerTestCase):
-
-    def setUp(self):
-        self.learner = earth.EarthLearner(degree=2, terms=10)
-
-    @test_on_data
-    def test_learner_on(self, dataset):
-        if len(dataset) < 30:
-            raise unittest.SkipTest("Not enough examples.")
-        testing.LearnerTestCase.test_learner_on(self, dataset)
-        str = self.classifier.to_string()
-        evimp = self.classifier.evimp()
-
-        # Test base_features (make sure the domain translation works)
-        basis_features = self.classifier.base_features()
-        basis_domain = Orange.data.Domain(basis_features, None)
-        basis_table = Orange.data.Table(basis_domain, dataset)
-        basis_matrix = self.classifier.base_matrix(dataset)
-        # Filter best set
-        basis_matrix = basis_matrix[:, self.classifier.best_set]
-        # Remove intercept
-        basis_matrix = basis_matrix[:, 1:]
-        basis_matrix_a = basis_table.to_numpy_MA("A")[0]
-        # Fill unknowns
-        basis_matrix[basis_matrix_a.mask] = 0
-        basis_matrix_a = basis_matrix_a.filled(0)
-        diff = basis_matrix - basis_matrix_a
-        self.assertAlmostEqual(numpy.max(diff), 0, places=3)
-
-    @test_on_data
-    def test_bagged_evimp(self, dataset):
-        from Orange.ensemble.bagging import BaggedLearner
-        bagged_learner = BaggedLearner(earth.EarthLearner(terms=10, degree=2),
-                                       t=5)
-
-        bagged_classifier = bagged_learner(dataset)
-        evimp = earth.bagged_evimp(bagged_classifier, used_only=False)
-
-
-@datasets_driven(datasets=testing.REGRESSION_DATASETS + \
-                 testing.CLASSIFICATION_DATASETS)
-class TestScoreEarthImportance(testing.MeasureAttributeTestCase):
-    def setUp(self):
-        from Orange.regression.earth import ScoreEarthImportance
-        self.measure = ScoreEarthImportance(t=5, score_what="rss")
-
-
-@datasets_driven(datasets=["multitarget-synthetic"])
-class TestEarthMultitarget(unittest.TestCase):
-    @test_on_data
-    def test_multi_target_on_data(self, dataset):
-        self.learner = earth.EarthLearner(degree=2, terms=10)
-
-        self.predictor = self.multi_target_test(self.learner, dataset)
-
-        self.assertTrue(bool(self.predictor.multitarget))
-
-        s = str(self.predictor)
-        self.assertEqual(s, self.predictor.to_string())
-        self.assertNotEqual(s, self.predictor.to_string(3, 6))
-
-    def multi_target_test(self, learner, data):
-        indices = Orange.data.sample.SubsetIndices2(p0=0.3)(data)
-        learn = data.select(indices, 1)
-        test = data.select(indices, 0)
-
-        predictor = learner(learn)
-        self.assertIsInstance(predictor, Orange.classification.Classifier)
-        self.multi_target_predictor_interface(predictor, learn.domain)
-
-        from Orange.evaluation import testing as _testing
-
-        r = _testing.test_on_data([predictor], test)
-
-        def all_values(vals):
-            for v in vals:
-                self.assertIsInstance(v, Orange.core.Value)
-
-        def all_dists(dist):
-            for d in dist:
-                self.assertIsInstance(d, Orange.core.Distribution)
-
-        for ex in test:
-            preds = predictor(ex, Orange.core.GetValue)
-            all_values(preds)
-
-            dist = predictor(ex, Orange.core.GetProbabilities)
-            all_dists(dist)
-
-            preds, dist = predictor(ex, Orange.core.GetBoth)
-            all_values(preds)
-            all_dists(dist)
-
-            for d in dist:
-                if isinstance(d, Orange.core.ContDistribution):
-                    dist_sum = sum(d.values())
-                else:
-                    dist_sum = sum(d)
-
-                self.assertGreater(dist_sum, 0.0)
-                self.assertLess(abs(dist_sum - 1.0), 1e-3)
-
-        return predictor
-
-    def multi_target_predictor_interface(self, predictor, domain):
-        self.assertTrue(hasattr(predictor, "class_vars"))
-        self.assertIsInstance(predictor.class_vars, (list, Orange.core.VarList))
-        self.assertTrue(all(c1 == c2 for c1, c2 in \
-                            zip(predictor.class_vars, domain.class_vars)))
-
-
-def load_tests(loader, tests, ignore):
-    import doctest
-    tests.addTests(doctest.DocTestSuite(earth))
-    return tests
-
-
-if __name__ == "__main__":
-    unittest.main()
-

File docs/reference/rst/Orange.regression.earth.rst

-.. automodule:: Orange.regression.earth

File docs/reference/rst/Orange.regression.rst

    Orange.regression.linear
    Orange.regression.lasso
    Orange.regression.pls
-   Orange.regression.earth
    Orange.regression.tree
    Orange.regression.mean
 

File docs/tutorial/rst/code/regression-cv.py

 
 lin = Orange.regression.linear.LinearRegressionLearner()
 lin.name = "lin"
-earth = Orange.regression.earth.EarthLearner()
-earth.name = "mars"
+rf = Orange.ensemble.forest.RandomForestLearner()
+rf.name = "rf"
 tree = Orange.regression.tree.TreeLearner(m_pruning = 2)
 tree.name = "tree"
 
-learners = [lin, earth, tree]
+learners = [lin, rf, tree]
 
 res = Orange.evaluation.testing.cross_validation(learners, data, folds=5)
 rmse = Orange.evaluation.scoring.RMSE(res)

File docs/tutorial/rst/code/regression-other.py

 
 lin = Orange.regression.linear.LinearRegressionLearner(train)
 lin.name = "lin"
-earth = Orange.regression.earth.EarthLearner(train)
-earth.name = "mars"
+rf = Orange.ensemble.forest.RandomForestLearner(train)
+rf.name = "rf"
 tree = Orange.regression.tree.TreeLearner(train)
 tree.name = "tree"
 
-models = [lin, earth, tree]
+models = [lin, rf, tree]
 
 print "y    " + " ".join("%-4s" % l.name for l in models)
 for d in test[:3]:

File docs/tutorial/rst/regression.rst

 Following is initialization of few other regressors and their prediction of the first five data instances in housing price data set:
 
 .. index::
-   single: regression; mars
    single: regression; linear
 
 .. literalinclude:: code/regression-other.py
 
 Looks like the housing prices are not that hard to predict::
 
-   y    lin  mars tree
-   21.4 24.8 23.0 20.1
-   15.7 14.4 19.0 17.3
-   36.5 35.7 35.6 33.8
+   y    lin  rf   tree
+   12.7 11.3 15.3 19.1
+   13.8 20.2 14.1 13.1
+   19.3 20.8 20.7 23.3
+
 
 Cross Validation
 ----------------
 .. index: 
    single: regression; root mean squared error
 
-`MARS <http://en.wikipedia.org/wiki/Multivariate_adaptive_regression_splines>`_ has the lowest root mean squared error::
+Random forest has the lowest root mean squared error::
 
    Learner  RMSE
    lin      4.83
-   mars     3.84
+   rf       3.73
    tree     5.10
-

File docs/widgets/rst/index.rst

 
    regression/mean.rst
    regression/linear.rst
-   regression/earth.rst
    regression/regressiontree.rst
    regression/regressiontreegraph.rst
    regression/pade.rst
     "lib_cfg", ["libraries", "library_dirs", "include_dirs"])
 
 site_cfg = namedtuple(
-    "site_cfg", ["libsvm", "liblinear", "blas", "R", "qhull"])
+    "site_cfg", ["libsvm", "liblinear", "blas", "qhull"])
 
 
 def libs_parse(text):
     libsvm = parse_lib_opt(parser, "libsvm")
     liblinear = parse_lib_opt(parser, "liblinear")
     blas = parse_lib_opt(parser, "blas")
-    R = parse_lib_opt(parser, "R")
     qhull = parse_lib_opt(parser, "qhull")
 
-    return site_cfg(libsvm, liblinear, blas, R, qhull)
+    return site_cfg(libsvm, liblinear, blas, qhull)
 
 
 # Get the command for building orangeqt extension from
 else:
     orange_sources += get_source_files("source/orange/blas/", "c")
 
-if site.R:
-    # Link external R library (for linpack)
-    orange_libraries += site.R.libraries
-    orange_library_dirs += site.R.library_dirs
-else:
-    orange_sources += get_source_files("source/orange/linpack/", "c")
-
 if site.liblinear:
     # Link external LIBLINEAR library
     orange_libraries += site.liblinear.libraries

File source/orange/Makefile

 #########################################################################
 
 ifndef BLAS_LIB
-BLAS_OBJECTS = obj/daxpy.o obj/ddot.o obj/dnrm2.o obj/dscal.o obj/dcopy.o
+BLAS_OBJECTS = obj/daxpy.o obj/ddot.o obj/dnrm2.o obj/dscal.o
 else
 BLAS_OBJECTS =
 LINKOPTIONS += -l$(BLAS_LIB)
 
 obj/%.o : libsvm/%.cpp libsvm/%.h
 	$(COMPILER) $(COMPILEOPTIONS) -c $< -o $@
-	
 
-LINPACK_OBJECTS = obj/dqrsl.o obj/dqrdc2.o obj/dtrsl.o obj/linpack.o
 
-obj/%.o : linpack/%.c linpack/linpack.h
-	$(CCOMPILER) $(COMPILEOPTIONS) -c $< -o $@
-
-ALL_OBJECTS = $(ORANGE_OBJECTS) $(BLAS_OBJECTS) $(LINPACK_OBJECTS) $(LIBLINEAR_OBJECTS) $(LIBSVM_OBJECTS)
+ALL_OBJECTS = $(ORANGE_OBJECTS) $(BLAS_OBJECTS) $(LIBLINEAR_OBJECTS) $(LIBSVM_OBJECTS)
 
 $(OLD)/orange.so:	ppp/stamp px/stamp $(ALL_OBJECTS)
 	$(LINKER) $(ALL_OBJECTS) $(LINKOPTIONS) -o $(OLD)/orange.so

File source/orange/Orange.vcproj

 				>
 			</File>
 			<File
-				RelativePath=".\earth.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\earth.hpp"
-				>
-			</File>
-			<File
 				RelativePath="errors.cpp"
 				>
 			</File>
 					>
 				</File>
 				<File
-					RelativePath=".\blas\dcopy.c"
-					>
-				</File>
-				<File
 					RelativePath=".\blas\ddot.c"
 					>
 				</File>
 				</File>
 			</Filter>
 			<Filter
-				Name="linpack"
-				>
-				<File
-					RelativePath=".\linpack\dqrdc2.c"
-					>
-				</File>
-				<File
-					RelativePath=".\linpack\dqrsl.c"
-					>
-				</File>
-				<File
-					RelativePath=".\linpack\dtrsl.c"
-					>
-				</File>
-				<File
-					RelativePath=".\linpack\linpack.c"
-					>
-				</File>
-				<File
-					RelativePath=".\linpack\linpack.h"
-					>
-				</File>
-			</Filter>
-			<Filter
 				Name="liblinear"
 				>
 				<File

File source/orange/earth.cpp

-// This code is derived from code in the Rational Fortran file dmarss.r which is
-// part of the R and S mda package by Hastie and Tibshirani.
-// Comments containing "TODO" mark known issues
-//
-// See the R earth documentation for descriptions of the principal data structures.
-// See also www.milbo.users.sonic.net.
-//
-// Stephen Milborrow Feb 2007 Petaluma
-//
-//-----------------------------------------------------------------------------
-// ...
-//-----------------------------------------------------------------------------
-// References:
-//
-// HastieTibs: Trevor Hastie and Robert Tibshirani
-//      S library mda version 0.3.2 dmarss.r Ratfor code
-//      Modifications for R by Kurt Hornik, Friedrich Leisch, Brian Ripley
-//
-// FriedmanMars: Multivariate Adaptive Regression Splines (with discussion)
-//      Annals of Statistics 19/1, 1--141, 1991
-//
-// FriedmanFastMars: Friedman "Fast MARS"
-//      Dep. of Stats. Stanford, Tech Report 110, May 1993
-//
-// Miller: Alan Miller (2nd ed. 2002) Subset Selection in Regression
-//
-//-----------------------------------------------------------------------------
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// A copy of the GNU General Public License is available at
-// http://www.r-project.org/Licenses
-//
-//-----------------------------------------------------------------------------
-
-/*
-    This file is part of Orange.
-
-    Copyright 1996-2011 Faculty of Computer and Information Science, University of Ljubljana
-    Contact: janez.demsar@fri.uni-lj.si
-
-    Orange is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Orange is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Orange.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/*
- 	 Changes to earth.c from earth R package:
- 	 - Added defines for STANDALONE, USING_BLAS, _DEBUG
- 	 - Removed  #include <crtdbg.h> for windows
- 	 - Fix defines for ISNAN and FINITE to work on non MSC compilers
- 	 - Removed debugging code for windows
- 	 - Removed definitions of bool, true false
- 	 - Define _C_ as "C" for all compilers
- 	 - Define c linkage for error, xerbla
- 	 - Replaced POS_INF static global variable with numeric_limits<double>::infinity()
- 	 - Added #include <limits>
- 	 - Changed include of earth.h to earth.ppp and moved it before the module level defines
- 	 - Changed EvalSubsetsUsingXtX to return an error code if lin. dep. terms in bx
-
-	- TODO: Move global vars inside the functions using them (most are local)
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <float.h>
-#include <math.h>
-#include <limits>
-
-#include "earth.ppp"
-
-#define STANDALONE 1
-#define USING_BLAS 1
-#define _DEBUG 0
-
-#if !STANDALONE
-#define USING_R 1
-#endif // STANDALONE
-
-
-#if _MSC_VER && _DEBUG
-    #include <crtdbg.h> // microsoft malloc debugging library
-#endif
-
-#if _MSC_VER            // microsoft
-    #define _C_ "C"
-    #if _DEBUG          // debugging enabled?
-        // disable warning: too many actual params for macro (for malloc1 and calloc1)
-        #pragma warning(disable: 4002)
-    #endif
-#else
-    #define _C_ "C"
-//    #ifndef bool
-//        typedef int bool;
-//        #define false 0
-//        #define true  1
-//    #endif
-#endif
-
-#if USING_R             // R with gcc
-    #include "R.h"
-    #include "Rinternals.h" // needed for Allowed function handling
-    #include "allowed.h"
-    #define printf Rprintf
-    #define FINITE(x) R_FINITE(x)
-    #define ASSERT(x)   \
-        if (!(x)) error("internal assertion failed in file %s line %d: %s\n", \
-                        __FILE__, __LINE__, #x)
-#else
-    #define warning printf
-    extern "C" { void error(const char *args, ...); }
-	#ifdef _MSC_VER
-		#define ISNAN(x)  _isnan(x)
-		#define FINITE(x) _finite(x)
-	#else
-		#define ISNAN(x)  isnan(x)
-		#define FINITE(x) finite(x)
-	#endif // _MSC_VER
-
-    #define ASSERT(x)   \
-        if (!(x)) error("internal assertion failed in file %s line %d: %s\n", \
-                        __FILE__, __LINE__, #x)
-#endif
-
-//#include "earth.h"
-
-extern _C_ int dqrdc2_(double *x, int *ldx, int *n, int *p,
-                        double *tol, int *rank,
-                        double *qraux, int *pivot, double *work);
-
-extern _C_ int dqrsl_(double *x, int *ldx, int *n, int *k,
-                        double *qraux, double *y,
-                        double *qy, double *qty, double *b,
-                        double *rsd, double *xb, int *job, int *info);
-
-extern _C_ void dtrsl_(double *t, int *ldt, int *n, double *b, int *job, int *info);
-
-extern _C_ void daxpy_(const int *n, const double *alpha,
-                        const double *dx, const int *incx,
-                        double *dy, const int *incy);
-
-extern _C_ double ddot_(const int *n,
-                        const double *dx, const int *incx,
-                        const double *dy, const int *incy);
-
-#define sq(x)       ((x) * (x))
-#ifndef max
-#define max(a,b)    (((a) > (b)) ? (a) : (b))
-#endif
-#ifndef min
-#define min(a,b)    (((a) < (b)) ? (a) : (b))
-#endif
-
-#define INLINE      inline
-#define USE_BLAS    1     // 1 is faster (tested on Windows XP Pentium with R BLAS)
-                          // also, need USE_BLAS to use bxOrthCenteredT
-
-#define FAST_MARS   1     // 1 to use techniques in FriedmanFastMars (see refs)
-
-#define IOFFSET     1     // printfs only: 1 to convert 0-based indices to 1-based in printfs
-                          // use 0 for C style indices in messages to the user
-
-static const char   *VERSION    = "version 3.2-0"; // change if you modify this file!
-static const double BX_TOL      = 0.01;
-static const double QR_TOL      = 0.01;
-static const double MIN_GRSQ    = -10.0;
-static const double ALMOST_ZERO = 1e-10;
-static const int    ONE         = 1;        // parameter for BLAS routines
-#if _MSC_VER                                // microsoft compiler
-static const double ZERO        = 0.0;
-//static const double POS_INF     = (1.0 / ZERO);
-static const double POS_INF  	= std::numeric_limits<double>::infinity();
-#else
-//static const double POS_INF     = (1.0 / 0.0);
-static const double POS_INF  	= std::numeric_limits<double>::infinity();
-#endif
-static const int    MAX_DEGREE  = 100;
-
-// Poor man's array indexing -- not pretty, but pretty useful.
-//
-// Note that we use column major ordering. C programs usually use row major
-// ordering but we don't here because the functions in this file are called
-// by R and call Fortran routines which use column major ordering.
-
-#define Dirs_(iTerm,iPred)      Dirs[(iTerm) + (iPred)*(nMaxTerms)]
-#define Cuts_(iTerm,iPred)      Cuts[(iTerm) + (iPred)*(nMaxTerms)]
-
-#define bx_(iCase,iTerm)                bx             [(iCase) + (iTerm)*(nCases)]
-#define bxOrth_(iCase,iTerm)            bxOrth         [(iCase) + (iTerm)*(nCases)]
-#define bxOrthCenteredT_(iTerm,iCase)   bxOrthCenteredT[(iTerm) + (iCase)*(nMaxTerms)]
-#define x_(iCase,iPred)                 x              [(iCase) + (iPred)*(nCases)]
-#define xOrder_(iCase,iPred)            xOrder         [(iCase) + (iPred)*(nCases)]
-#define y_(iCase,iResp)                 y              [(iCase) + (iResp)*(nCases)]
-#define Residuals_(iCase,iResp)         Residuals      [(iCase) + (iResp)*(nCases)]
-#define ycboSum_(iTerm,iResp)           ycboSum        [(iTerm) + (iResp)*(nMaxTerms)]
-#define Betas_(iTerm,iResp)             Betas          [(iTerm) + (iResp)*(nUsedCols)]
-
-// Global copies of some input parameters.  These stay constant for the entire MARS fit.
-static double TraceGlobal;      // copy of Trace parameter
-static int nMinSpanGlobal;      // copy of nMinSpan parameter
-
-static void FreeBetaCache(void);
-static char *sFormatMemSize(const unsigned MemSize, const bool Align);
-
-//-----------------------------------------------------------------------------
-// malloc and its friends are redefined (a) so under Microsoft C using
-// crtdbg.h we can easily track alloc errors and (b) so FreeR() doesn't
-// re-free any freed blocks and (c) so out of memory conditions are
-// immediately detected.
-// So DON'T USE free, malloc, and calloc.  Use free1, malloc1, and calloc1 instead.
-
-// free1 is a macro so we can zero p
-#define free1(p) { if (p) free(p); p = NULL; }
-
-#if _MSC_VER && _DEBUG  // microsoft C and debugging enabled?
-
-#define malloc1(size) _malloc_dbg((size), _NORMAL_BLOCK, __FILE__, __LINE__)
-#define calloc1(num, size) \
-                      _calloc_dbg((num), (size), _NORMAL_BLOCK, __FILE__, __LINE__)
-#else
-static void *malloc1(size_t size, const char *args, ...)
-{
-    void *p = malloc(size);
-    if (!p || TraceGlobal == 1.5) {
-        if (args == NULL)
-            printf("malloc %s\n", sFormatMemSize(size, true));
-        else {
-            char s[100];
-            va_list p;
-            va_start(p, args);
-            vsprintf(s, args, p);
-            va_end(p);
-            printf("malloc %s: %s\n", sFormatMemSize(size, true), s);
-        }
-        fflush(stdout);
-    }
-    if (!p)
-        error("Out of memory (could not allocate %s)", sFormatMemSize(size, false));
-    return p;
-}
-
-static void *calloc1(size_t num, size_t size, const char *args, ...)
-{
-    void *p = calloc(num, size);
-    if (!p || TraceGlobal == 1.5) {
-        if (args == NULL)
-            printf("calloc %s\n", sFormatMemSize(size, true));
-        else {
-            char s[100];
-            va_list p;
-            va_start(p, args);
-            vsprintf(s, args, p);
-            va_end(p);
-            printf("calloc %s: %s\n", sFormatMemSize(size, true), s);
-        }
-        fflush(stdout);