Source

aichallenge-py / test_mdp.py

Full commit
# -*- coding: utf-8 -*-
#
#  test_mdp.py
#  aichallenge-py
#
#  Created by Lars Yencken on 2011-11-13.
#  Copyright 2011 Lars Yencken. All rights reserved.
#

"""
Test cases for solving MDPs.
"""

import unittest

import numpy as np

import mdp

class MDPPlaneTest(unittest.TestCase):
    def setUp(self):
        pass

    def test_square(self):
        reward = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]])
        expected = np.array(
                [[ 0.3333, 0.6666, 0.3333],
                [ 0.6666, 1.3333, 0.6666],
                [ 0.3333, 0.6666, 0.3333]]
            )
        value = mdp.value_iteration(reward, gamma=0.5, eps=1e-3)
        assert abs(value - expected).mean() < 1e-3, str(value)

class MDPTorusTest(unittest.TestCase):
    def test_square(self):
        reward = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]])
        expected = np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]],
                dtype=np.float32) / 3.0
        value = mdp.value_iteration(reward, torus=True, gamma=0.5, eps=1e-5)
        assert abs(value - expected).mean() < 1e-5, str(value)

    def test_wraparound(self):
        reward = np.array([[0, 1, 0], [0, 0, 0], [0, 0, 0]])
        expected = np.array([[2, 4, 2], [1, 2, 1], [1, 2, 1]],
                dtype=np.float32) / 3.0
        value = mdp.value_iteration(reward, torus=True, gamma=0.5, eps=1e-5)
        assert abs(value - expected).mean() < 1e-5, str(value)

    def test_wraparound_2(self):
        reward = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]])
        expected = np.array([[4, 2, 2], [2, 1, 1], [2, 1, 1]],
                dtype=np.float32) / 3.0
        value = mdp.value_iteration(reward, torus=True, gamma=0.5, eps=1e-5)
        assert abs(value - expected).mean() < 1e-5, str(value)

def suite():
    return unittest.TestSuite([
            unittest.makeSuite(MDPPlaneTest),
            unittest.makeSuite(MDPTorusTest),
        ])

if __name__ == '__main__':
    unittest.TextTestRunner(verbosity=1).run(suite())