# aichallenge-py / test_mdp.py

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48``` ```# -*- coding: utf-8 -*- # # test_mdp.py # aichallenge-py # # Created by Lars Yencken on 2011-11-13. # Copyright 2011 Lars Yencken. All rights reserved. # """ Test cases for solving MDPs. """ import unittest import numpy as np import mdp class MDPPlaneTest(unittest.TestCase): def setUp(self): pass #def test_single_row(self): #reward = np.array([0.0,0.0,0.0,8.0]) #reward.reshape((1, 4)) #value = mdp.value_iteration(reward, gamma=0.5, eps=1e-5) #expected = np.array([1.0, 2.0, 4.0, 8.0]) #expected.reshape((1, 4)) #assert abs(value - expected).mean() < 1e-5 def test_square(self): reward = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]]) expected = np.array( [[ 0.3333, 0.6666, 0.3333], [ 0.6666, 1.3333, 0.6666], [ 0.3333, 0.6666, 0.3333]] ) value = mdp.value_iteration(reward, gamma=0.5, eps=1e-3) assert abs(value - expected).mean() < 1e-3, str(value) class MDPTorusTest(unittest.TestCase): def test_square(self): reward = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]]) expected = np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]], dtype=np.float32) / 3.0 value = mdp.value_iteration(reward, torus=True, gamma=0.5, eps=1e-5) assert abs(value - expected).mean() < 1e-5, str(value) ```