Commits

Lars Yencken committed 895523e

Use value iteration to lead to rewards.

Comments (0)

Files changed (3)

 	mkdir -p dist
 	rm -f dist/bundle.zip
 	zip dist/bundle.zip MyBot.py ants.py
+
+gifs: \
+	game_logs/frontier.gif \
+	game_logs/value.gif \
+
+game_logs/frontier.gif: game_logs/*-frontier.png
+	convert -delay 5 game_logs/*-frontier.png -loop 0 game_logs/frontier.gif
+
+game_logs/value.gif: game_logs/*-value.png
+	convert -delay 5 game_logs/*-value.png -loop 0 game_logs/value.gif
 Bot for Google's AI Challenge.
 """
 
+import os
 import sys
 import heapq
 from functools import partial
 from itertools import product
 import random
 import Image
+import math
 
 import numpy as np
 from scipy.signal import convolve
 
+import settings
 from ants import Ants
 
 class GameTurn(object):
                 if self.do_move_location(ant_loc, unseen_loc):
                     break
 
-    def build_frontier(self, unseen):
-        frontier = unseen.copy()
-        rows = self.ants.rows
-        cols = self.ants.cols
-        for i in xrange(rows):
-            for j in xrange(cols):
-                l = (i, j)
-                if ((i - 1) % rows, (j - 1) % cols) in unseen \
-                        and ((i - 1) % rows, (j + 1) % cols) in unseen \
-                        and ((i + 1) % rows, (j - 1) % cols) in unseen \
-                        and ((i + 1) % rows, (j + 1) % rows) in unseen:
-                    frontier.remove(l)
-
-        return frontier
-
     def print_frontier(self):
         self.game.print_map(self.frontier)
 
     def follow_scent(self):
         ants = self.ants
         directions = ['n', 's', 'e', 'w']
-        scent = self.game.scent
+        value = self.game.value
         for loc in ants.my_ants():
             options = []
             for d in directions:
                 dest = ants.destination(loc, d)
-                smell = scent[dest]
+                smell = value[dest]
                 if dest not in self.plans and self.ants.passable(dest):
                     options.append((smell, d))
             options.sort(reverse=True)
         self.cols = ants.cols
         self.seen = np.zeros((self.rows, self.cols), dtype=np.bool_)
         self.plannable = np.zeros((self.rows, self.cols), dtype=np.bool_)
-        self.scent = np.zeros((self.rows, self.cols), dtype=np.int32)
         self.destroyed_hills = set()
         self.enemy_hills = set()
+        self.turn_no = 0
 
     def do_turn(self, ants):
         "Give orders to all of our ants."
+        self.turn_no += 1
         self.remember_hills(ants)
         self.remember_seen(ants)
-        self.waft(ants)
+        self.value = self.value_iteration(ants)
+
+        if settings.DEBUG:
+            self._dump_value_map()
 
         turn = GameTurn(ants, self)
         turn.follow_scent()
 
+    def build_frontier(self):
+        rows = self.rows
+        cols = self.cols
+        mask = np.array(
+                [[0.0, -0.25, 0.0],
+                [-0.25, 1.0, -0.25],
+                [0.0, -0.25, 0.0]]
+            )
+        frontier = np.zeros((rows + 2, cols + 2))
+        frontier[1:rows+1,1:cols+1] = self.seen
+        frontier[rows+1,1:cols+1] = self.seen[0]
+        frontier[0,1:cols+1] = self.seen[rows-1]
+        frontier[1:rows+1,0] = self.seen[:, 0]
+        frontier[1:rows+1,cols+1] = self.seen[:, cols-1]
+
+        frontier = (convolve(frontier, mask) > 0)[2:rows+2,2:cols+2]
+
+        if settings.DEBUG:
+            self._dump_frontier(frontier)
+                    
+        return frontier
+
     def remember_seen(self, ants):
         visible = ants.visible
         seen = self.seen
         self.destroyed_hills.update(self.enemy_hills.intersection(
             ants.my_ants()))
 
-    def waft(self, ants):
+    def value_iteration(self, ants):
         "Combine old scents with new ones."
-        rows = ants.rows
-        cols = ants.cols
-        base_scent = self.get_base_scent(ants)
-
-
-        kernel = np.array(
-                [[0, 0.05, 0],
-                [0.05, 0.8, 0.05],
-                [0, 0.05, 0]]
-            )
-        s = np.zeros((rows*3, cols*3))
-        p = np.zeros((rows*3, cols*3))
-        for i in xrange(3):
-            for j in xrange(3):
-                s[i*rows:(i+1)*rows, j*cols:(j+1)*cols] = base_scent
-                p[i*rows:(i+1)*rows, j*cols:(j+1)*cols] = self.plannable
-
-        for k in xrange(8):
-            s = convolve(s, kernel)[1:-1, 1:-1] * p
-
-        self.scent = s[rows:2*rows, cols:2*cols]
-
-    def get_base_scent(self, ants):
-        base_scent = np.zeros((self.rows, self.cols), dtype=np.float32)
-
-        # score hills
-        for hill in self.hills:
-            base_scent[hill] = 50.0
+        frontier = self.build_frontier()
+        reward = self.get_reward_matrix(frontier, ants)
+        value = reward
+        next_value = self._propagate(reward, reward)
+        while abs(value - next_value).mean() < 0.0001:
+            value = next_value
+            next_value = self._propagate(value, reward)
+
+        return value
+    
+    def _propagate(self, value, reward):
+        rows = self.rows
+        cols = self.cols
+
+        # each action is an offset on our torus
+        shape = (4,) + value.shape
+        actions = np.zeros(shape, dtype=np.float32)
+
+        # s: first row wraps around
+        actions[0, 0:rows-2] = value[1:rows-1]
+        actions[0, rows-2] = value[0]
+        # n: last row wraps around
+        actions[1, 1:rows-1] = value[0:rows-2]
+        actions[1, 0] = value[rows-1]
+        # e: first col wraps around
+        actions[2, :, 0:cols-2] = value[:, 1:cols-1]
+        actions[2, :, cols-2] = value[:, 0]
+        # w: last col wraps around
+        actions[3, :, 1:cols-1] = value[:, 0:cols-2]
+        actions[3, :, 0] = value[:, cols-1]
+
+        action = np.maximum(actions[0], actions[1])
+        action = np.maximum(action, actions[2])
+        action = np.maximum(action, actions[3])
+
+        return reward + settings.GAMMA * action
+
+    def get_reward_matrix(self, frontier, ants):
+        reward = np.ones((ants.rows, ants.cols), dtype=np.float32) * -1
+
+        for hill in self.enemy_hills.difference(self.destroyed_hills):
+            reward[hill] = 100
 
-        # score food
         for food in ants.food():
-            base_scent[food] = 1000.0
-
-        for loc in ants.my_ants():
-            base_scent[loc] = -10
+            reward[food] = 150
 
         for loc in ants.enemy_ants():
-            base_scent[loc] = 10
+            reward[loc] = 3
+
+        reward += (self.plannable == 0) * -999999
 
-        return base_scent
+        return reward
 
     def print_map(self, m):
         render = lambda x: u'O' if x in m else u'•'
             l = u''.join(render((i, j)) for j in xrange(self.cols))
             print >> sys.stderr, l
 
+    def _dump_value_map(self):
+        im = Image.new('L', (self.rows, self.cols))
+        value = self.value
+        value = 255 * (value > 0) * value / value.max()
+        for i, j in product(xrange(self.rows), xrange(self.cols)):
+            im.putpixel((i, j), int(math.ceil(value[i, j])))
+        im.save(os.path.join(settings.LOG_DIR, '%.03d-value.png'
+            % self.turn_no))
+
+    def _dump_frontier(self, frontier):
+        im = Image.new('RGB', (self.rows, self.cols))
+        seen = self.seen
+        for i, j in product(xrange(self.rows), xrange(self.cols)):
+            if frontier[i, j]:
+                im.putpixel((i, j), (50, 50, 255))
+            elif seen[i, j]:
+                im.putpixel((i, j), (255, 255, 255))
+        im.save(os.path.join(settings.LOG_DIR, '%.03d-frontier.png'
+            % self.turn_no))
+
 def dump_grayscale_image(m, filename):
     rows, cols = m.shape
     im = Image.new('1', (cols, rows))
+# -*- coding: utf-8 -*-
+#
+#  settings.py
+#  aichallenge-py
+#
+#  Created by Lars Yencken on 2011-11-13.
+#  Copyright 2011 Lars Yencken. All rights reserved.
+#
+
+"""
+Settings for the AI challenge bot.
+"""
+
+DEBUG = True
+
+# learning rate
+GAMMA = 0.7
+
+LOG_DIR = 'game_logs'
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.