udacity373_code / unit4 / u4-hw5_stochastic.py

# --------------
# USER INSTRUCTIONS
#
# Write a function called stochastic_value that 
# takes no input and RETURNS two grids. The
# first grid, value, should contain the computed
# value of each cell as shown in the video. The
# second grid, policy, should contain the optimum
# policy for each cell.
#
# Stay tuned for a homework help video! This should
# be available by Thursday and will be visible
# in the course content tab.
#
# Good luck! Keep learning!
#
# --------------
# GRADING NOTES
#
# We will be calling your stochastic_value function
# with several different grids and different values
# of success_prob, collision_cost, and cost_step.
# In order to be marked correct, your function must
# RETURN (it does not have to print) two grids,
# value and policy.
#
# When grading your value grid, we will compare the
# value of each cell with the true value according
# to this model. If your answer for each cell
# is sufficiently close to the correct answer
# (within 0.001), you will be marked as correct.
#
# NOTE: Please do not modify the values of grid,
# success_prob, collision_cost, or cost_step inside
# your function. Doing so could result in your
# submission being inappropriately marked as incorrect.

# -------------
# GLOBAL VARIABLES
#
# You may modify these variables for testing
# purposes, but you should only modify them here.
# Do NOT modify them inside your stochastic_value
# function.

grid = [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 1, 1, 0]]
       
goal = [0, len(grid[0])-1] # Goal is in top right corner


delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>'] # Use these when creating your policy grid.

success_prob = 0.5                      
failure_prob = (1.0 - success_prob)/2.0 # Probability(stepping left) = prob(stepping right) = failure_prob
collision_cost = 100                    
cost_step = 1        
                     

############## INSERT/MODIFY YOUR CODE BELOW ##################
#
# You may modify the code below if you want, but remember that
# your function must...
#
# 1) ...be called stochastic_value().
# 2) ...NOT take any arguments.
# 3) ...return two grids: FIRST value and THEN policy.

def stochastic_value():
    enourmous_number = 1000
    value = [[enourmous_number for row in range(len(grid[0]))] for col in range(len(grid))]
    policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]

    def get_val(row,col):
        if 0 <= row < len(grid) and 0 <= col < len(grid[0])\
                and grid[row][col] == 0:
            return  value[row][col]
        else:
            return collision_cost

    def get_sides(a_id):
        '''returns ids of deltas, where robot may slide '''
        if 0 <= a_id < len(delta):
            n = len(delta)
            val1 = (a_id + 1) % n
            val2 = (a_id + 3) % n
            return val1, val2
        else:
            return None


    change = True
    while change:
        change = False

        for row in range(len(grid)):
            for col in range(len(grid[0])):
                if goal == [row, col] and value[row][col] > 0:
                    value[row][col] = 0
                    policy[row][col] = '*'

                elif grid[row][col] == 0:
                    rounds = []

                    for a_id, action in enumerate(delta):
                        slides = get_sides(a_id)
                        x2 = row + delta[a_id][0]
                        y2 = col + delta[a_id][1]

                        x3 = row + delta[slides[0]][0]
                        y3 = col + delta[slides[0]][1]
                        x4 = row + delta[slides[1]][0]
                        y4 = col + delta[slides[1]][1]
                        val = success_prob * get_val(x2, y2) + cost_step
                        val += failure_prob * (get_val(x3, y3) + get_val(x4, y4))
                        rounds.append(val)


                    min_val = min(rounds)
                    if min_val < value[row][col]:
                        value[row][col] = min_val
                        policy[row][col] = delta_name[rounds.index(min_val)]
                        change = True

    for row in value:
        print row

    for row in policy:
        print row
    return value, policy

stochastic_value()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.