Commits

Lars Yencken committed e1eb0ff

Move reward constants into settings.

Comments (0)

Files changed (2)

         return self.enemy_hills.difference(self.destroyed_hills)
 
     def get_reward_matrix(self, frontier, ants):
-        reward = np.ones((ants.rows, ants.cols), dtype=np.float32) * -1
+        reward = np.ones((ants.rows, ants.cols), dtype=np.float32) * \
+                settings.BASE_REWARD
 
         for loc in self.active_enemy_hills:
-            reward[loc] = 30
+            reward[loc] = settings.ENEMY_HILL_REWARD
 
         for food in ants.food():
-            reward[food] = 15
+            reward[food] = settings.FOOD_REWARD
 
         for ant in ants.enemy_ants():
-            reward[ant.loc] = 5
+            reward[ant.loc] = settings.ENEMY_ANT_REWARD
 
         for y, x in zip(*np.nonzero(frontier)):
-            reward[y, x] += 2
+            reward[y, x] += settings.FRONTIER_REWARD 
 
-        reward += (self.plannable == 0) * -999999
+        reward += (self.plannable == 0) * settings.BARRIER_REWARD
 
         return reward
 
 
 DEBUG = False
 
-# learning rate
+# discount factor
+#   the extent to which we favour short-term reward over long-term reward
 MDP_GAMMA = 0.7
 
+# reward structure
+#   the extent to which we favour one objective over another
+ENEMY_HILL_REWARD = 30
+ENEMY_ANT_REWARD = 5
+FOOD_REWARD = 15
+FRONTIER_REWARD = 2
+BASE_REWARD = -1
+BARRIER_REWARD = -999999 # never choose this move
+
 # convergence criterion
 MDP_EPS = 1e-3