Source

container / statistics.py

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

from decimal import Decimal


"""Module to make some statistical calculations."""

__author__ = 'Thorsten Weimann <weimann.th(at)gmail.com>'
__version__ = '0.1'
__license__ = 'MIT'


def to_decimal(v):
    if isinstance(v, Decimal):
        return v
    elif isinstance(v, int):
        return Decimal(v)
    elif isinstance(v, float):
        if hasattr(Decimal, 'from_float'):
            return Decimal.from_float(v)
    return Decimal(str(v).replace(',', '.'))


class NumberOfValuesError(Exception): pass


class Dataset(object):

    def __init__(self, values, sample=True):
        """Initializes a new `Dataset` object.

        :parameters:
            values : List
                List of values for analysis (string, integer, float, Decimal).
                All values are converted to `Decimal`.
            sample : Boolean
                If True, the values are a random sample. If false, the values
                are the whole population.
        """
        self.sample = sample
        self.values = []
        self.add(*values)

    def __str__(self):
        try:
            return 'Dataset({:.4f}, {:2f}, {})'.format(
                self.standard_deviation, self.average, self.count)
        except NumberOfValuesError:
            return 'Dataset({} values)'.format(self.count)

    def __len__(self):
        """Counts the number of values in this dataset.

        :returns: Number of values.
        :rtype: Integer
        """
        return len(self.values)

    def add(self, *values):
        """Adds the given value(s) to the dataset.

        :parameters:
            values : String, Integer, Float, Decimal
                See values parameter in `Dataset.__init__`.
        """
        for v in values:
            self.values.append(to_decimal(v))

    def copy(self):
        """Constructs a copy of the dataset.

        :returns: Copy of this object instance.
        :rtype: Dataset
        """
        vals = self.values[:]
        return Dataset(vals, self.sample)

    def check(self):
        """Checks the number of values in this dataset. Raises
        `NumberOfValuesError` on less than three values.

        :returns: True, if the dataset has more than two values.
        :rtype: Boolean
        :raises: NumberOfValuesError
        """
        if not self.values:
            raise NumberOfValuesError('Dataset contains no values.')
        elif self.count < 3:
            raise NumberOfValuesError('Dataset contains only {0} values. '
                'Calculation is possible with three or more values.'.format(
                    self.count))
        return True

    @property
    def max(self):
        """Calculates the maximum value of the dataset.

        :returns: Maximum value.
        :rtype: Decimal
        """
        return max(self.values)

    @property
    def min(self):
        """Calculates the minimum value of the dataset.

        :returns: Minimum value.
        :rtype: Decimal
        """
        return min(self.values)

    @property
    def range(self):
        """Calculates the statistical range (R) of the dataset.

        :returns: Range of the dataset.
        :rtype: Decimal
        """
        return self.max - self.min

    def sorted(self, reverse=False):
        """Sorts the values in the dataset from minimum to maximum or from
        maximum to minimum if `reverse` is true.

        :parameters:
            reverse : Boolean
                Set sortorder from max to min.

        :returns: Sorted list of values.
        :rtype: List
        """
        return sorted(self.values, reverse=reverse)

    count = n = property(__len__)
    R = range