Commits

Chris Mutel committed df289df

Add discrete uniform distribution

Comments (0)

Files changed (5)

 
 setup(
     name='bw-stats-toolkit',
-    version='0.6',
+    version='0.7',
     author='Chris Mutel',
     author_email='cmutel@gmail.com',
     url='https://bitbucket.org/cmutel/bw-stats-toolkit',

stats_toolkit/distributions/__init__.py

 from geometric import UniformUncertainty, TriangularUncertainty
 from bernoulli import BernoulliUncertainty
 from beta import BetaUncertainty
+from discrete_uniform import DiscreteUniform

stats_toolkit/distributions/discrete_uniform.py

+# -*- coding: utf-8 -*
+from base import UncertaintyBase
+from numpy import isnan, array, random, vstack
+from scipy import stats
+from stats_toolkit.utils import one_row_params_array
+from stats_toolkit.errors import InvalidParamsError,\
+    ImproperBoundsError
+
+
+class DiscreteUniform(UncertaintyBase):
+    """Discrete uniform distribution. In SciPy, the uniform distribution is defined from loc to loc+scale."""
+    id = 7
+    description = "Discrete uniform uncertainty"
+
+    @classmethod
+    def validate(cls, params):
+        # No mean value
+        if isnan(params['maximum']).sum():
+            raise InvalidParamsError("Maximum values must always be defined.")
+        # Minimum <= Maximum
+        if (params['minimum'] >= params['maximum']).sum():
+            raise ImproperBoundsError
+
+    @classmethod
+    def _fix_minimum(cls, params):
+        mask = isnan(params['minimum'])
+        params['minimum'][mask] = 0
+        return params
+
+    @classmethod
+    def random_variables(cls, params, size, seeded_random=None):
+        if not seeded_random:
+            seeded_random = random
+        params = cls._fix_minimum(params)
+        # randint has different behaviour than e.g. uniform. We can't pass in
+        # arrays, but have to process them line by line.
+        return vstack([seeded_random.randint(
+            params['minimum'][i],  # Minimum (low)
+            params['maximum'][i],  # Maximum (high)
+            size=size
+            ) for i in range(params.shape[0])])
+
+    @classmethod
+    def cdf(cls, params, vector):
+        vector = cls.check_2d_inputs(params, vector)
+        results = zeros(vector.shape)
+        params = cls._fix_minimum(params)
+        for row in range(params.shape[0]):
+            results[row, :] = stats.randint.cdf(vector[row, :],
+                loc=params[row]['minimum'], scale=params[row]['maximum'] - \
+                params[row]['minimum'])
+        return results
+
+    @classmethod
+    def ppf(cls, params, percentages):
+        percentages = cls.check_2d_inputs(params, percentages)
+        params = cls._fix_minimum(params)
+        scale = (params['maximum'] - params['minimum']).reshape(
+            params.shape[0], 1)
+        return percentages * scale + params['minimum'].reshape(
+            params.shape[0], 1)
+
+    @classmethod
+    @one_row_params_array
+    def statistics(cls, params):
+        params = cls._fix_minimum(params)
+        mean = (params['maximum'] + params['minimum']) / 2
+        return {'mean': mean, 'mode': mean, 'median': mean,
+            'lower': params['minimum'], 'upper': params['maximum']}
+
+    @classmethod
+    @one_row_params_array
+    def pdf(cls, params, xs=None):
+        params = cls._fix_minimum(params)
+        if xs == None:
+            xs = (params['minimum'], params['maximum'])
+        percentage = 1 / (params['maximum'] - params['minimum'])
+        ys = array([float(percentage) for x in xs])
+        return array([float(x) for x in xs]), ys

stats_toolkit/tests/distributions/discrete_uniform.py

+from __future__ import division
+from numpy import *
+from scipy import stats
+from stats_toolkit.distributions import DiscreteUniform
+from stats_toolkit.tests.uncertainty import UncertaintyTestCase
+from stats_toolkit.errors import ImproperBoundsError, \
+    UndefinedDistributionError, InvalidParamsError, UnreasonableBoundsError
+
+
+class DiscreteUniformTestCase(UncertaintyTestCase):
+    pass
+    # def test_random_variables_broadcasting(self):
+    #     params = self.make_params_array(length = 2)
+    #     params[:]['amount'] = 2
+    #     params[:]['sigma'] = 5
+    #     results = BetaUncertainty.random_variables(params, 1000)
+    #     self.assertEqual(results.shape, (2, 1000))
+    #     self.assertTrue(0.26 < average(results[0,:]) < 0.3)
+    #     self.assertTrue(0.26 < average(results[1,:]) < 0.3)
+
+    # def test_random_variables(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     results = BetaUncertainty.random_variables(params, 1000)
+    #     self.assertEqual(results.shape, (1, 1000))
+    #     self.assertTrue(0.26 < average(results) < 0.3)
+
+    # def test_random_variables_scaling(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     params['maximum'] = 5
+    #     results = BetaUncertainty.random_variables(params, 1000)
+    #     self.assertEqual(results.shape, (1, 1000))
+    #     self.assertTrue(0.26*5 < average(results) < 0.3*5)
+    #     params = self.make_params_array(length = 2)
+    #     params[:]['amount'] = 2
+    #     params[:]['sigma'] = 5
+    #     params[0]['maximum'] = 5
+    #     params[1]['maximum'] = 10
+    #     results = BetaUncertainty.random_variables(params, 1000)
+    #     self.assertEqual(results.shape, (2, 1000))
+    #     self.assertTrue(0.26*5 < average(results[0,:]) < 0.3*5)
+    #     self.assertTrue(0.26*10 < average(results[1,:]) < 0.3*10)
+
+    # def test_alpha_validation(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 0
+    #     params['sigma'] = 5
+    #     self.assertRaises(InvalidParamsError, 
+    #         BetaUncertainty.validate, params)
+
+    # def test_beta_validation(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 0
+    #     self.assertRaises(InvalidParamsError, 
+    #         BetaUncertainty.validate, params)
+
+    # def test_scale_valdiation(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     params['maximum'] = 0
+    #     self.assertRaises(InvalidParamsError, 
+    #         BetaUncertainty.validate, params)
+    #     params['maximum'] = -1
+    #     self.assertRaises(InvalidParamsError, 
+    #         BetaUncertainty.validate, params)
+
+    # def test_cdf(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     xs = arange(0.1, 1, 0.1).reshape((1,-1))
+    #     reference = stats.beta.cdf(xs, 2, 5)
+    #     calculated = BetaUncertainty.cdf(params, xs)
+    #     self.assertTrue(allclose(reference, calculated))
+    #     self.assertEqual(reference.shape, calculated.shape)
+
+    # def test_cdf_scaling(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     params['maximum'] = 2
+    #     xs = arange(0.2, 2, 0.2).reshape((1,-1))
+    #     reference = stats.beta.cdf(xs, 2, 5, scale=2)
+    #     calculated = BetaUncertainty.cdf(params, xs)
+    #     self.assertTrue(allclose(reference, calculated))
+    #     self.assertEqual(reference.shape, calculated.shape)
+
+    # def test_ppf(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     xs = arange(0.1, 1, 0.1).reshape((1,-1))
+    #     reference = stats.beta.ppf(xs, 2, 5)
+    #     calculated = BetaUncertainty.ppf(params, xs)
+    #     self.assertTrue(allclose(reference, calculated))
+    #     self.assertEqual(reference.shape, calculated.shape)
+
+    # def test_ppf_scaling(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     params['maximum'] = 2
+    #     xs = arange(0.1, 1, 0.1).reshape((1,-1))
+    #     reference = stats.beta.ppf(xs, 2, 5, scale=2)
+    #     calculated = BetaUncertainty.ppf(params, xs)
+    #     self.assertTrue(allclose(reference, calculated))
+    #     self.assertEqual(reference.shape, calculated.shape)
+
+    # def test_pdf(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     xs = arange(0.1, 1, 0.1)
+    #     reference = stats.beta.pdf(xs, 2, 5)
+    #     calculated = BetaUncertainty.pdf(params, xs)
+    #     self.assertTrue(allclose(reference, calculated[1]))
+    #     self.assertEqual(reference.shape, calculated[1].shape)
+    #     self.assertTrue(allclose(xs, calculated[0]))
+    #     self.assertEqual(xs.shape, calculated[0].shape)
+    #     self.assertEqual(calculated[1].shape, calculated[0].shape)
+
+    # def test_pdf_no_xs(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     xs = arange(0, 1, 1./200) # 200 is default number of points
+    #     reference = stats.beta.pdf(xs, 2, 5)
+    #     calculated = BetaUncertainty.pdf(params)
+    #     self.assertTrue(allclose(reference, calculated[1]))
+    #     self.assertEqual(reference.shape, calculated[1].shape)
+    #     self.assertTrue(allclose(xs, calculated[0]))
+    #     self.assertEqual(xs.shape, calculated[0].shape)
+    #     self.assertEqual(calculated[1].shape, calculated[0].shape)
+
+    # def test_pdf_scaling(self):
+    #     params = self.make_params_array()
+    #     params['amount'] = 2
+    #     params['sigma'] = 5
+    #     params['maximum'] = 2
+    #     xs = arange(0.2, 2, 0.2)
+    #     reference = stats.beta.pdf(xs, 2, 5, scale=2)
+    #     calculated = BetaUncertainty.pdf(params, xs)
+    #     self.assertTrue(allclose(reference, calculated[1]))
+    #     self.assertEqual(reference.shape, calculated[1].shape)
+    #     self.assertTrue(allclose(xs, calculated[0]))
+    #     self.assertEqual(xs.shape, calculated[0].shape)
+    #     self.assertEqual(calculated[1].shape, calculated[0].shape)
+
+    # def test_seeded_random(self):
+    #     sr = self.seeded_random()
+    #     params = self.make_params_array()
+    #     params['sigma'] = params['amount'] = 1
+    #     self.assertTrue(allclose(
+    #         BetaUncertainty.random_variables(params, 4, seeded_random=sr), 
+    #         array([0.59358266, 0.84368537, 0.01394206, 0.87557834])
+    #         ))
+

stats_toolkit/uncertainty_choices.py

-from errors import UndefinedDistributionError, \
-    InvalidParamsError, ImproperBoundsError
 from distributions import *
-from numpy import repeat, random, zeros, tile, exp, log, isnan
-from scipy import stats
 
-DEFAULT_DISTRIBUTIONS = (UndefinedUncertainty, NoUncertainty, 
-    LognormalUncertainty, NormalUncertainty, UniformUncertainty, 
-    TriangularUncertainty, BernoulliUncertainty, BetaUncertainty)
+DEFAULT_DISTRIBUTIONS = (UndefinedUncertainty, NoUncertainty,
+    LognormalUncertainty, NormalUncertainty, UniformUncertainty,
+    TriangularUncertainty, BernoulliUncertainty, BetaUncertainty,
+    DiscreteUniform)
 
-CUSTOM_DISTRIBUTIONS = () # Don't forget to add trailing comma if needed
+CUSTOM_DISTRIBUTIONS = ()  # Don't forget to add trailing comma if needed
 
 DISTRIBUTIONS = DEFAULT_DISTRIBUTIONS + CUSTOM_DISTRIBUTIONS
 
+
 class UncertaintyChoices(object):
     """An iterable for uncertainty choices"""
     def __init__(self):
         self.id_dict = {}
         for dist in self.choices:
             if dist.id in self.id_dict:
-                raise ValueError, "Uncertainty id %i is already in use by %s" %\
-                    (dist.id, self.id_dict[dist.id])
+                raise ValueError("Uncertainty id %i is already in use by %s" %\
+                    (dist.id, self.id_dict[dist.id]))
             self.id_dict[dist.id] = dist
 
     def __iter__(self):
         """Formatted for Django ChoiceField"""
         return [(obj.id, obj.description) for obj in self.choices]
 
-uncertainty_choices = UncertaintyChoices()
+uncertainty_choices = UncertaintyChoices()