# HG changeset patch
# User markotoplak
# Date 1380886518 7200
# Node ID ba8bc7d59e7a92a8e8b9998917c07f0c88064b67
# Parent a3f2e290432ea2c02af07eab6b527037c8cda0de
Updates to documentation.
diff git a/README.rst b/README.rst
 a/README.rst
+++ b/README.rst
@@ 1,9 +1,8 @@
Orange Reliability
==================
Orange Reliability is an addon for Orange_ data mining software package. It
extends Orange by providing functionality to estimate reliability of individual
regression and classification predictions.
+Orange Reliability is an addon for Orange_ data mining software package that
+enables the estimation of reliabilities for individual predictions.
.. _Orange: http://orange.biolab.si/
diff git a/docs/rst/Orange.evaluation.reliability.rst b/docs/rst/Orange.evaluation.reliability.rst
 a/docs/rst/Orange.evaluation.reliability.rst
+++ b/docs/rst/Orange.evaluation.reliability.rst
@@ 167,3 +167,7 @@
.. [Bosnic2010] BosniÄ‡, Z., Kononenko, I. (2010) `Automatic selection of reliability estimates for individual regression predictions. `_ *The Knowledge Engineering Review* 25(1), pp. 2747.
.. [Pevec2011] Pevec, D., Å trumbelj, E., Kononenko, I. (2011) `Evaluating Reliability of Single Classifications of Neural Networks. `_ *Adaptive and Natural Computing Algorithms*, 2011, pp. 2230.
+
+.. [Wolpert1992] Wolpert, David H. (1992) `Stacked generalization.` *Neural Networks*, Vol. 5, 1992, pp. 241259.
+
+.. [Dzeroski2004] Dzeroski, S. and Zenko, B. (2004) `Is combining classifiers with stacking better than selecting the best one?` *Machine Learning*, Vol. 54, 2004, pp. 255273.
diff git a/docs/rst/index.rst b/docs/rst/index.rst
 a/docs/rst/index.rst
+++ b/docs/rst/index.rst
@@ 1,9 +1,8 @@
Orange Reliability documentation
================================
Orange Reliability is an addon for Orange_ data mining software package. It
extends Orange by providing functionality to estimate reliability of individual
regression and classification predictions.
+Orange Reliability is an addon for Orange_ data mining software package that
+enables the estimation of reliabilities for individual predictions.
.. _Orange: http://orange.biolab.si/
diff git a/orangecontrib/reliability/__init__.py b/orangecontrib/reliability/__init__.py
 a/orangecontrib/reliability/__init__.py
+++ b/orangecontrib/reliability/__init__.py
@@ 275,24 +275,20 @@
class SensitivityAnalysis:
"""
 :param e: List of possible :math:`\epsilon` values for SAvar and SAbias
 reliability estimates.
+ :param e: Values of :math:`\epsilon`.
:type e: list of floats
:rtype: :class:`Orange.evaluation.reliability.SensitivityAnalysisClassifier`
 To estimate the reliability of prediction for given instance,
 the learning set is extended with this instance, labeled with
 :math:`K + \epsilon (l_{max}  l_{min})`,
 where :math:`K` denotes the initial prediction,
 :math:`\epsilon` is sensitivity parameter and :math:`l_{min}` and
 :math:`l_{max}` denote lower and the upper bound of the learning
 instances' labels. After computing different sensitivity predictions
 using different values of :math:`\epsilon`, the prediction are combined
 into SAvar and SAbias. SAbias can be used in a signed or absolute form.
+ To estimate the reliability of prediction for a given instance,
+ the learning set is extended with that instance with the label changes to
+ :math:`K + \epsilon (l_{max}  l_{min})` (:math:`K` is the initial prediction,
+ :math:`\epsilon` a sensitivity parameter, and :math:`l_{min}` and
+ :math:`l_{max}` the lower and upper bounds of labels on training data)
+ Results for multiple values of :math:`\epsilon` are combined
+ into SAvar and SAbias. SAbias can be used either in a signed or absolute form.
:math:`SAvar = \\frac{\sum_{\epsilon \in E}(K_{\epsilon}  K_{\epsilon})}{E}`

:math:`SAbias = \\frac{\sum_{\epsilon \in E} (K_{\epsilon}  K ) + (K_{\epsilon}  K)}{2 E}`
@@ 363,13 +359,9 @@
:rtype: :class:`Orange.evaluation.reliability.ReferenceExpectedErrorClassifier`
 Reference reliability estimation method for classification [Pevec2011]_:
+ Reference estimate for classification: :math:`O_{ref} = 2 (\hat y  \hat y ^2) = 2 \hat y (1\hat y)`, where :math:`\hat y` is the estimated probability of the predicted class [Pevec2011]_.
 :math:`O_{ref} = 2 (\hat y  \hat y ^2) = 2 \hat y (1\hat y)`,

 where :math:`\hat y` is the estimated probability of the predicted class.

 Note that for this method, in contrast with all others, a greater estimate means lower reliability (greater expected error).
+ A greater estimate means a greater expected error.
"""
def __init__(self, name="reference"):
@@ 396,36 +388,36 @@
:param m: Number of bagging models to be used with BAGV estimate
:type m: int
+ :param for instances: Optional. If test instances
+ are given as a parameter, this class can compute their reliabilities
+ on the fly, which saves memory.
+
+ :type for_intances: Orange.data.Table
+
:rtype: :class:`Orange.evaluation.reliability.BaggingVarianceClassifier`
 :math:`m` different bagging models are constructed and used to estimate
 the value of dependent variable for a given instance. In regression,
 the variance of those predictions is used as a prediction reliability
 estimate.
+
+ :math:`m` different bagging models are used to estimate
+ the value of dependent variable for a given instance. For regression,
+ the variance of predictions is a reliability
+ estimate:
 :math:`BAGV = \\frac{1}{m} \sum_{i=1}^{m} (K_i  K)^2`

 where :math:`K = \\frac{\sum_{i=1}^{m} K_i}{m}` and :math:`K_i` are
 predictions of individual constructed models. Note that a greater value
 implies greater error.
+ :math:`BAGV = \\frac{1}{m} \sum_{i=1}^{m} (K_i  K)^2`, where
+ :math:`K = \\frac{\sum_{i=1}^{m} K_i}{m}` and :math:`K_i` are
+ predictions of individual models.
For classification, 1 minus the average Euclidean distance between class
probability distributions predicted by the model, and distributions
 predicted by the individual bagged models, is used as the BAGV reliability
 measure. Note that in this case a greater value implies a better
+ predicted by the individual bagged models, is the BAGV reliability
+ measure. For classification, a greater value implies a better
prediction.
 This reliability measure can run out of memory fast if individual classifiers
 use a lot of memory, as it build m of them, thereby using :math:`m` times memory
 for a single classifier. If instances for measuring predictions
 are given as a parameter, this class can only compute their reliability,
 which saves memory.

+ This reliability measure can run out of memory if individual classifiers themselves
+ use a lot of memory; it needs :math:`m` times memory
+ for a single classifier.
"""
def __init__(self, m=50, name="bv", randseed=0, for_instances=None):
 """
 for_instances:
 """
+
self.m = m
self.name = name
self.select_with_repeat = Orange.core.MakeRandomIndicesMultiple()
@@ 500,39 +492,28 @@
class LocalCrossValidation:
"""
 :param k: Number of nearest neighbours used in LCV estimate
+ :param k: Number of nearest neighbours used. Default: 0, which denotes
+ 1/20 of data set size (or 5, whichever is greater).
:type k: int
 :param distance: function that computes a distance between two discrete
+ :param distance: Function that computes a distance between two discrete
distributions (used only in classification problems). The default
is Hellinger distance.
:type distance: function
 :param distance_weighted: for classification reliability estimation,
+ :param distance_weighted: For classification,
use an average distance between distributions, weighted by :math:`e^{d}`,
where :math:`d` is the distance between predicted instance and the
neighbour.
:rtype: :class:`Orange.evaluation.reliability.LocalCrossValidationClassifier`
 :math:`k` nearest neighbours to the given instance are found and put in
 a separate data set. On this data set, a leaveoneout validation is
 performed. Reliability estimate for regression is then the distance
 weighted absolute prediction error. In classification, 1 minus the average
+ Leaveoneout validation is
+ performed on :math:`k` nearest neighbours to the given instance.
+ Reliability estimate for regression is then the distance
+ weighted absolute prediction error. For classification, it is 1 minus the average
distance between the predicted class probability distribution and the
(trivial) probability distributions of the nearest neighbour.

 If a special value 0 is passed as :math:`k` (as is by default),
 it is set as 1/20 of data set size (or 5, whichever is greater).

 Summary of the algorithm for regression:

 1. Determine the set of k nearest neighours :math:`N = { (x_1, c_1),...,
 (x_k, c_k)}`.
 2. On this set, compute leaveoneout predictions :math:`K_i` and
 prediction errors :math:`E_i =  C_i  K_i `.
 3. :math:`LCV(x) = \\frac{ \sum_{(x_i, c_i) \in N} d(x_i, x) * E_i }{ \sum_{(x_i, c_i) \in N} d(x_i, x) }`

"""
def __init__(self, k=0, distance=hellinger_dist, distance_weighted=True, name="lcv"):
self.k = k
@@ 602,7 +583,7 @@
class CNeighbours:
"""
 :param k: Number of nearest neighbours used in CNK estimate
+ :param k: Number of nearest neighbours.
:type k: int
:param distance: function that computes a distance between two discrete
@@ 612,20 +593,13 @@
:rtype: :class:`Orange.evaluation.reliability.CNeighboursClassifier`
 For regression, CNK is defined for an unlabeled instance as a difference
 between average label of its nearest neighbours and its prediction. CNK
 can be used as a signed or absolute estimate.

 :math:`CNK = \\frac{\sum_{i=1}^{k}C_i}{k}  K`

 where :math:`k` denotes number of neighbors, C :sub:`i` denotes neighbours'
 labels and :math:`K` denotes the instance's prediction. Note that a greater
 value implies greater prediction error.
+ For regression, CNK is defined a difference
+ between average label of its nearest neighbours and the prediction. CNK
+ can be either signed or absolute. A greater value implies greater prediction error.
For classification, CNK is equal to 1 minus the average distance between
predicted class distribution and (trivial) class distributions of the
 $k$ nearest neighbours from the learning set. Note that in this case
 a greater value implies better prediction.
+ $k$ nearest neighbours from the learning set. A greater value implies better prediction.
"""
def __init__(self, k=5, distance=hellinger_dist, name = "cnk"):
@@ 904,8 +878,9 @@
"""
This methods develops a model that integrates reliability estimates
 from all available reliability scoring techniques. To develop such
 model it needs to performs internal crossvalidation, similarly to :class:`ICV`.
+ from all available reliability scoring techniques (see [Wolpert1992]_ and [Dzeroski2004]_). It
+ performs internal crossvalidation and therefore takes roughly the same time
+ as :class:`ICV`.
:param stack_learner: a data modelling method. Default (if None): unregularized linear regression with prior normalization.
:type stack_learner: :obj:`Orange.classification.Learner`
@@ 917,7 +892,7 @@
:type box_learner: :obj:`int`
:param save_data: If True, save the data used for training the
 model for intergration into resulting classifier's .data attribute (default False).
+ integration model into resulting classifier's .data attribute (default False).
:type box_learner: :obj:`bool`
"""
diff git a/setup.py b/setup.py
 a/setup.py
+++ b/setup.py
@@ 15,7 +15,7 @@
NAME = 'OrangeReliability'
DOCUMENTATION_NAME = 'Orange Reliability'
VERSION = '0.2.13'
+VERSION = '0.2.14'
DESCRIPTION = 'Orange Reliability addon for Orange data mining software package.'
LONG_DESCRIPTION = open(os.path.join(os.path.dirname(__file__), 'README.rst')).read()