# aichallenge-py / mdp.py

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81``` ```# -*- coding: utf-8 -*- # # mdp.py # aichallenge-py # # Created by Lars Yencken on 2011-11-13. # Copyright 2011 Lars Yencken. All rights reserved. # """ Solve Markov Decision Processes using value iteration, where the agent is on a map and can only move in four directions. """ import numpy as np import settings def value_iteration(reward, torus=False, gamma=settings.MDP_GAMMA, eps=settings.MDP_EPS): "Solve the MDP problem with the value iteration method." value = reward if torus: propagate = _propagate_torus else: propagate = _propagate_plane last_value = value value = propagate(reward, reward, gamma) while abs(last_value - value).mean() > eps: last_value = value value = propagate(value, reward, gamma) return value def _propagate_plane(value, reward, gamma): rows, cols = value.shape shape = (4,) + value.shape actions = np.zeros(shape, dtype=np.float32) # s actions[0, 0:rows-1, :] = value[1:rows] # n actions[1, 1:rows, :] = value[0:rows-1] # e actions[2, :, 0:cols-1] = value[:, 1:cols] # w actions[3, :, 1:cols] = value[:, 0:cols-1] action = np.maximum(actions[0], actions[1]) action = np.maximum(action, actions[2]) action = np.maximum(action, actions[3]) return reward + gamma * action def _propagate_torus(value, reward, gamma=settings.MDP_GAMMA): rows, cols = value.shape # each action is an offset on our torus shape = (4,) + value.shape actions = np.zeros(shape, dtype=np.float32) # s: first row wraps around actions[0, 0:rows-1] = value[1:rows] actions[0, rows-1] = value[0] # n: last row wraps around actions[1, 1:rows] = value[0:rows-1] actions[1, 0] = value[rows-1] # e: first col wraps around actions[2, :, 0:cols-1] = value[:, 1:cols] actions[2, :, cols-1] = value[:, 0] # w: last col wraps around actions[3, :, 1:cols] = value[:, 0:cols-1] actions[3, :, 0] = value[:, cols-1] action = np.maximum(actions[0], actions[1]) action = np.maximum(action, actions[2]) action = np.maximum(action, actions[3]) return reward + gamma * action ```
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.