Commits

Lars Yencken committed f9e6848

Use value iteration algorithm from mdp module.

  • Participants
  • Parent commits 8203481
  • Branches scent

Comments (0)

Files changed (1)

 
 import settings
 from ants import Ants
+import mdp
 
 class GameTurn(object):
     "Game state for an individual move."
         self.turn_no += 1
         self.remember_hills(ants)
         self.remember_seen(ants)
-        self.value = self.value_iteration(ants)
+        self.frontier = self.build_frontier()
+        reward = self.get_reward_matrix(self.frontier, ants)
+        self.value = mdp.value_iteration(reward, torus=True)
 
         if settings.DEBUG:
             self._dump_value_map()
         self.destroyed_hills.update(self.enemy_hills.intersection(
             ants.my_ants()))
 
-    def value_iteration(self, ants):
-        "Combine old scents with new ones."
-        frontier = self.build_frontier()
-        reward = self.get_reward_matrix(frontier, ants)
-        value = reward
-        next_value = self._propagate(reward, reward)
-        while abs(value - next_value).mean() < 0.0001:
-            value = next_value
-            next_value = self._propagate(value, reward)
-
-        return value
-    
-    def _propagate(self, value, reward):
-        rows = self.rows
-        cols = self.cols
-
-        # each action is an offset on our torus
-        shape = (4,) + value.shape
-        actions = np.zeros(shape, dtype=np.float32)
-
-        # s: first row wraps around
-        actions[0, 0:rows-2] = value[1:rows-1]
-        actions[0, rows-2] = value[0]
-        # n: last row wraps around
-        actions[1, 1:rows-1] = value[0:rows-2]
-        actions[1, 0] = value[rows-1]
-        # e: first col wraps around
-        actions[2, :, 0:cols-2] = value[:, 1:cols-1]
-        actions[2, :, cols-2] = value[:, 0]
-        # w: last col wraps around
-        actions[3, :, 1:cols-1] = value[:, 0:cols-2]
-        actions[3, :, 0] = value[:, cols-1]
-
-        action = np.maximum(actions[0], actions[1])
-        action = np.maximum(action, actions[2])
-        action = np.maximum(action, actions[3])
-
-        return reward + settings.GAMMA * action
-
     def get_reward_matrix(self, frontier, ants):
         reward = np.ones((ants.rows, ants.cols), dtype=np.float32) * -1
 
         for hill in self.enemy_hills.difference(self.destroyed_hills):
-            reward[hill] = 100
+            reward[hill] = 30
 
         for food in ants.food():
-            reward[food] = 150
+            reward[food] = 15
 
         for loc in ants.enemy_ants():
-            reward[loc] = 3
+            reward[loc] = 5
+
+        for y, x in zip(*np.nonzero(frontier)):
+            reward[y, x] += 2
 
         reward += (self.plannable == 0) * -999999