jiffyclub/homework5-3.py

## homework5-3.py
"""
This script models the behavior of the agent in AI Class
Homework 5, Problem 3: http://youtu.be/212NkM6UCBc

The script prints the model grid and the total number of times a square was
visited by the agent over some number of simulations.

No actual TD-learning takes place in this script, the agent simply tries to
move from the start to the goal by moving to road squares on its preset path.

Change the make_map, set_roads, make_path, make_start, and make_goal functions
to test different scenarios. For the simulation to run, the agent requires a
map with road squares connecting the start and goal, a valid path that connects
the start and goal using only road squares, and a start and goal position.
See the help on the individual functions for more information.

This agent can get stuck if it finds itself in a square where none of the four
directly adjacent squares are road squares. This is because the agent has no
defined policy for this situation. Another limitation is that the agent cannot
navigate a path that requires taking a step that takes it further from the goal.
In that situation it will get stuck in an infinite loop. Really this is just
best for illustrating Homework 5.3.

How to Run
----------
This script requires Python 2.7 but works in Python 3 after 2to3.
Run at the command line with `python homework5-3.py number-of-sims`.
I recommend something like 1000 - 10000 simulations.

Author
------
@jiffyclub
http://git.io/jiffyclub

"""

import argparse
import math
import random

from collections import defaultdict


#error for when the agent has nowhere to go
class AgentStuck(StandardError):
    pass


class MapSquare(object):
    """
    Represent a single square of the Map.

    Parameters
    ----------
    row : int
        Row number of square. Indexed from 0.

    col : int
        Column number of square. Indexed from 0.

    is_road : bool
        Whether or not the square is a road.

    """
    def __init__(self,row,col,is_road):
        self.row = row
        self.col = col
        self.is_road = is_road

    def dist(self,sq):
        """
        Calculate the distance to another square.

        Parameters
        ----------
        sq : MapSquare instance
            An instance of MapSquare.

        Returns
        -------
        dist : float
            Distance to sq.

        """
        return math.sqrt((self.row - sq.row)**2 + (self.col - sq.col)**2)

    def __eq__(self,other):
        return (self.row,self.col) == (other.row,other.col)

    def __ne__(self,other):
        return (self.row,self.col) != (other.row,other.col)

    def __str__(self):
        return str((self.row,self.col,self.is_road))

    def __repr__(self):
        return repr((self.row,self.col,self.is_road))


class Map(object):
    """
    Represents a grid of MapSquares. Use the set_square_to_road function to
    configure which squares are roads.

    Parameters
    ----------
    rows : int
        Number of rows in grid.

    cols : int
        Number of columns in grid.

    """
    def __init__(self,rows,cols):
        self.rows = rows
        self.cols = cols

        # a dictionary to track which squares of the grid are roads
        self.square_is_road = defaultdict(lambda: False)

    def set_square_to_road(self,row,col):
        """
        Set a given square to be a road.

        Parameters
        ----------
        row : int
            Row number of square to set as road.

        col : int
            Column number of square to set as road.

        """
        self.square_is_road[(row,col)] = True

    def get_square(self,row,col,outside=None):
        """
        Get a MapSquare with optional default value for squares outside
        of the grid.

        Parameters
        ----------
        row : int
            Row number of square desired.

        col : int
            Column number of square desired.

        outside : object, optional
            Object to be returned if desired square is outside the grid.

        Returns
        -------
        square : MapSquare instance
            MapSquare for desired row and column, or outside if desired square
            is outside the grid.

        """
        if row < 0 or row > self.rows or col < 0 or col >= self.cols:
            return outside

        return MapSquare(row,col,self.square_is_road[(row,col)])

    def get_neighbors(self,square):
        """
        Returns the four up/down, left/right neighbors for a MapSquare. If the
        square is at a wall, that neighbor will be the MapSquare itself.

        Parameters
        ----------
        square : MapSquare instance
            Square for which to return neighbors.

        Returns
        -------
        neighbors : dict of MapSquares
            Has keys 'up', 'down', 'left', and 'right' holding MapSquare
            instances of this square's neighbors.

        """
        neighbors = {}

        row = square.row
        col = square.col

        neighbors['up'] = self.get_square(row,col+1,square)
        neighbors['down'] = self.get_square(row,col-1,square)
        neighbors['left'] = self.get_square(row-1,col,square)
        neighbors['right'] = self.get_square(row+1,col,square)

        return neighbors


class Agent(object):
    """
    This is the agent which moves around a grid according to its programmed
    policies, in this case that it should always move on a road towards the
    goal using only roads that are part of it's preset path. The path is
    required to distinguish between roads which may be both adjacent to
    the agent and equidistant from the goal.

    Parameters
    ----------
    map : Map instance
        The grid on which the agent is moving.

    start : MapSquare instance
        The starting square of the agent.

    goal : MapSquare instance
        The goal square of the agent.

    path : list of MapSquares
        The squares which constitute the path the agent should stay on.
        Should be entirely road squares.

    """
    def __init__(self,map,start,goal,path):
        self.map = map
        self.start = start
        self.goal = goal
        self.path = path

        self.pos = start

        # track which squares have been visited and how many times
        self.visited = defaultdict(lambda: 0)
        self.visited[(start.row,start.col)] = 1

    def pick_target(self):
        """
        Choose which square to move to next. This is where the agent's policies
        are programmed.

        Returns
        -------
        target : MapSquare instance
            The square which it would be most optimal to move to.

        left,right : MapSquare instances
            Squares on either side of the current square where the agent
            might accidentally end up if it is unlucky.

        """
        neighbors = self.map.get_neighbors(self.pos)

        dists = {}

        # pick only neighbors which are roads on in self.path, then
        # find out how far those neighbors are from the goal so we can
        # pick which one is closest.
        for nk in neighbors:
            if neighbors[nk].is_road and neighbors[nk] in self.path:
                dists[nk] = neighbors[nk].dist(self.goal)

        # if there are no neighboring path squares then raise the
        # AgentStuck exception
        if not dists:
            raise AgentStuck('Agent has no adjacent Path/Road squares.')

        min_dist = min(dists.values())

        for dk in dists:
            if dists[dk] == min_dist:
                targetk = dk

        # pick the target square and the squares the agent might end up
        # if it is unlucky.
        target = neighbors[targetk]

        if targetk in ('up','down'):
            left = neighbors['left']
            right = neighbors['right']
        elif targetk in ('left','right'):
            left = neighbors['up']
            right = neighbors['down']

        return target, left, right

    def move(self):
        """
        Make a move from the current square to whichever square is most
        optimal according to the agent's policies. This is where the
        probability for successful moves is programmed.

        """
        p_success = 0.8

        target, left, right = self.pick_target()

        # roll the dice to see whether we make it to the target or
        # go off course
        p = random.random()

        if p >= (1. - p_success):
            self.pos = target
        elif p >= (1. - p_success)/2.:
            self.pos = left
        else:
            self.pos = right

        # increment the visited count for whichever square the agent
        # ends up in
        self.visited[(self.pos.row,self.pos.col)] += 1

    def go_to_goal(self):
        """
        Call the move method until the agent is at the goal square.

        """
        while self.pos != self.goal:
            self.move()


def make_map():
    """
    Make a Map instance. Modify this to make the grid larger or smaller.

    Returns
    -------
    map : Map instance

    """
    # this is the grid size for Homework 5.3
    rows = 5
    cols = 8

    map = Map(rows,cols)

    return map


def set_roads(map):
    """
    Set which map squares are roads. Modify this to change the distribution
    of roads squares on the map.

    Parameters
    ----------
    map : Map instance

    """
    # these set the road distribution in Homework 5.3
    for i in xrange(8):
        map.set_square_to_road(0,i)
        map.set_square_to_road(3,i)

    for i in xrange(4):
        map.set_square_to_road(i,0)
        map.set_square_to_road(i,7)


def make_path(map):
    """
    Make the list of squares that the agent should follow. Modify this to
    change the route the agent should take.

    Parameters
    ----------
    map : Map instance

    Returns
    -------
    path : list of MapSquares
        The squares the agent will attempt to stay on.

    """
    path = []

    for i in xrange(8):
        path.append(map.get_square(3,i))

    return path


def make_start(map):
    """
    Make the starting MapSquare. Modify this to change where the agent starts.

    Parameters
    ----------
    map : Map instance

    Returns
    -------
    square : MapSquare instance
        Starting point of agent.

    """
    # (row, column)
    start = (3,0)

    return map.get_square(*start)


def make_goal(map):
    """
    Make the goal MapSquare. Modify this to change where the agent is
    trying to go.

    Parameters
    ----------
    map : Map instance

    Returns
    -------
    square : MapSquare instance
        Goal point of agent.

    """
    # (row, column)
    goal = (3,7)

    return map.get_square(*goal)


def run_sims(map, path, start, goal, num):
    """
    Move agents from start to goal num times so we can get good statistics
    on where the agent goes and how often.

    Parameters
    ----------
    map : Map instance
        The grid on which the agents will move.

    path : list of MapSquares
        The squares which constitute the path the agent should stay on.
        Should be entirely road squares.

    start : MapSquare instance
        The starting square of the agents.

    goal : MapSquare instance
        The goal square of the agents.

    num : int
        Number of times to run simulation.

    Returns
    -------
    visited : defaultdict
        Dictionary with keys (row,col) with the counts that each square was
        visited by all the agents.

    stuck : int
        The number of times an agent was stuck and could not move
        according to its policies.

    """
    visited = defaultdict(lambda: 0)

    stuck = 0

    for i in xrange(num):
        agent = Agent(map,start,goal,path)

        try:
            agent.go_to_goal()
        except AgentStuck:
            stuck += 1
            continue

        for k in agent.visited:
            visited[k] += agent.visited[k]

    return visited, stuck


def print_visited(map, visited, stuck):
    """
    Print a grid of the map showing how many times each square was visited by
    all the agents that were simulated.

    Parameters
    ----------
    map : Map instance
        Map the agents moved on.

    visited : dict
        Dictionary of (row, col) keys with the number of times each square
        was visited.

    stuck : int
        The number of times an agent was stuck and could not move
        according to its policies.

    """
    for i in xrange(map.rows-1,-1,-1):
        print map.cols*('+' + 8*'-') + '+'

        s = '|'
        for j in xrange(map.cols):
            s += '{:^8d}|'.format(visited[(i,j)])

        print s

    print map.cols*('+' + 8*'-') + '+'

    print ''

    print 'Agents got stuck {} times.'.format(stuck)


def parse_args():
    parser = argparse.ArgumentParser(description='Run Homework 5.3 sim.')

    parser.add_argument('num',nargs=1,type=int,
                        help='Number of simulations to run. '
                             '1000-10000 recommended.')

    return parser.parse_args()


def main():
    """
    The main controlling function. Construct a map, specify road squares,
    make start, goal, and path connecting them, then run simulations and
    print the results.

    """
    num = parse_args().num[0]

    map = make_map()

    set_roads(map)

    start = make_start(map)

    goal = make_goal(map)

    path = make_path(map)

    visited, stuck = run_sims(map, path, start, goal, num)

    print_visited(map, visited, stuck)


if __name__ == '__main__':
    raise SystemExit(main())
	"""
	This script models the behavior of the agent in AI Class
	Homework 5, Problem 3: http://youtu.be/212NkM6UCBc

	The script prints the model grid and the total number of times a square was
	visited by the agent over some number of simulations.

	No actual TD-learning takes place in this script, the agent simply tries to
	move from the start to the goal by moving to road squares on its preset path.

	Change the make_map, set_roads, make_path, make_start, and make_goal functions
	to test different scenarios. For the simulation to run, the agent requires a
	map with road squares connecting the start and goal, a valid path that connects
	the start and goal using only road squares, and a start and goal position.
	See the help on the individual functions for more information.

	This agent can get stuck if it finds itself in a square where none of the four
	directly adjacent squares are road squares. This is because the agent has no
	defined policy for this situation. Another limitation is that the agent cannot
	navigate a path that requires taking a step that takes it further from the goal.
	In that situation it will get stuck in an infinite loop. Really this is just
	best for illustrating Homework 5.3.

	How to Run
	----------
	This script requires Python 2.7 but works in Python 3 after 2to3.
	Run at the command line with `python homework5-3.py number-of-sims`.
	I recommend something like 1000 - 10000 simulations.

	Author
	------
	@jiffyclub
	http://git.io/jiffyclub

	"""

	import argparse
	import math
	import random

	from collections import defaultdict


	#error for when the agent has nowhere to go
	class AgentStuck(StandardError):
	pass


	class MapSquare(object):
	"""
	Represent a single square of the Map.

	Parameters
	----------
	row : int
	Row number of square. Indexed from 0.

	col : int
	Column number of square. Indexed from 0.

	is_road : bool
	Whether or not the square is a road.

	"""
	def __init__(self,row,col,is_road):
	self.row = row
	self.col = col
	self.is_road = is_road

	def dist(self,sq):
	"""
	Calculate the distance to another square.

	Parameters
	----------
	sq : MapSquare instance
	An instance of MapSquare.

	Returns
	-------
	dist : float
	Distance to sq.

	"""
	return math.sqrt((self.row - sq.row)2 + (self.col - sq.col)2)

	def __eq__(self,other):
	return (self.row,self.col) == (other.row,other.col)

	def __ne__(self,other):
	return (self.row,self.col) != (other.row,other.col)

	def __str__(self):
	return str((self.row,self.col,self.is_road))

	def __repr__(self):
	return repr((self.row,self.col,self.is_road))


	class Map(object):
	"""
	Represents a grid of MapSquares. Use the set_square_to_road function to
	configure which squares are roads.

	Parameters
	----------
	rows : int
	Number of rows in grid.

	cols : int
	Number of columns in grid.

	"""
	def __init__(self,rows,cols):
	self.rows = rows
	self.cols = cols

	# a dictionary to track which squares of the grid are roads
	self.square_is_road = defaultdict(lambda: False)

	def set_square_to_road(self,row,col):
	"""
	Set a given square to be a road.

	Parameters
	----------
	row : int
	Row number of square to set as road.

	col : int
	Column number of square to set as road.

	"""
	self.square_is_road[(row,col)] = True

	def get_square(self,row,col,outside=None):
	"""
	Get a MapSquare with optional default value for squares outside
	of the grid.

	Parameters
	----------
	row : int
	Row number of square desired.

	col : int
	Column number of square desired.

	outside : object, optional
	Object to be returned if desired square is outside the grid.

	Returns
	-------
	square : MapSquare instance
	MapSquare for desired row and column, or outside if desired square
	is outside the grid.

	"""
	if row < 0 or row > self.rows or col < 0 or col >= self.cols:
	return outside

	return MapSquare(row,col,self.square_is_road[(row,col)])

	def get_neighbors(self,square):
	"""
	Returns the four up/down, left/right neighbors for a MapSquare. If the
	square is at a wall, that neighbor will be the MapSquare itself.

	Parameters
	----------
	square : MapSquare instance
	Square for which to return neighbors.

	Returns
	-------
	neighbors : dict of MapSquares
	Has keys 'up', 'down', 'left', and 'right' holding MapSquare
	instances of this square's neighbors.

	"""
	neighbors = {}

	row = square.row
	col = square.col

	neighbors['up'] = self.get_square(row,col+1,square)
	neighbors['down'] = self.get_square(row,col-1,square)
	neighbors['left'] = self.get_square(row-1,col,square)
	neighbors['right'] = self.get_square(row+1,col,square)

	return neighbors


	class Agent(object):
	"""
	This is the agent which moves around a grid according to its programmed
	policies, in this case that it should always move on a road towards the
	goal using only roads that are part of it's preset path. The path is
	required to distinguish between roads which may be both adjacent to
	the agent and equidistant from the goal.

	Parameters
	----------
	map : Map instance
	The grid on which the agent is moving.

	start : MapSquare instance
	The starting square of the agent.

	goal : MapSquare instance
	The goal square of the agent.

	path : list of MapSquares
	The squares which constitute the path the agent should stay on.
	Should be entirely road squares.

	"""
	def __init__(self,map,start,goal,path):
	self.map = map
	self.start = start
	self.goal = goal
	self.path = path

	self.pos = start

	# track which squares have been visited and how many times
	self.visited = defaultdict(lambda: 0)
	self.visited[(start.row,start.col)] = 1

	def pick_target(self):
	"""
	Choose which square to move to next. This is where the agent's policies
	are programmed.

	Returns
	-------
	target : MapSquare instance
	The square which it would be most optimal to move to.

	left,right : MapSquare instances
	Squares on either side of the current square where the agent
	might accidentally end up if it is unlucky.

	"""
	neighbors = self.map.get_neighbors(self.pos)

	dists = {}

	# pick only neighbors which are roads on in self.path, then
	# find out how far those neighbors are from the goal so we can
	# pick which one is closest.
	for nk in neighbors:
	if neighbors[nk].is_road and neighbors[nk] in self.path:
	dists[nk] = neighbors[nk].dist(self.goal)

	# if there are no neighboring path squares then raise the
	# AgentStuck exception
	if not dists:
	raise AgentStuck('Agent has no adjacent Path/Road squares.')

	min_dist = min(dists.values())

	for dk in dists:
	if dists[dk] == min_dist:
	targetk = dk

	# pick the target square and the squares the agent might end up
	# if it is unlucky.
	target = neighbors[targetk]

	if targetk in ('up','down'):
	left = neighbors['left']
	right = neighbors['right']
	elif targetk in ('left','right'):
	left = neighbors['up']
	right = neighbors['down']

	return target, left, right

	def move(self):
	"""
	Make a move from the current square to whichever square is most
	optimal according to the agent's policies. This is where the
	probability for successful moves is programmed.

	"""
	p_success = 0.8

	target, left, right = self.pick_target()

	# roll the dice to see whether we make it to the target or
	# go off course
	p = random.random()

	if p >= (1. - p_success):
	self.pos = target
	elif p >= (1. - p_success)/2.:
	self.pos = left
	else:
	self.pos = right

	# increment the visited count for whichever square the agent
	# ends up in
	self.visited[(self.pos.row,self.pos.col)] += 1

	def go_to_goal(self):
	"""
	Call the move method until the agent is at the goal square.

	"""
	while self.pos != self.goal:
	self.move()


	def make_map():
	"""
	Make a Map instance. Modify this to make the grid larger or smaller.

	Returns
	-------
	map : Map instance

	"""
	# this is the grid size for Homework 5.3
	rows = 5
	cols = 8

	map = Map(rows,cols)

	return map


	def set_roads(map):
	"""
	Set which map squares are roads. Modify this to change the distribution
	of roads squares on the map.

	Parameters
	----------
	map : Map instance

	"""
	# these set the road distribution in Homework 5.3
	for i in xrange(8):
	map.set_square_to_road(0,i)
	map.set_square_to_road(3,i)

	for i in xrange(4):
	map.set_square_to_road(i,0)
	map.set_square_to_road(i,7)


	def make_path(map):
	"""
	Make the list of squares that the agent should follow. Modify this to
	change the route the agent should take.

	Parameters
	----------
	map : Map instance

	Returns
	-------
	path : list of MapSquares
	The squares the agent will attempt to stay on.

	"""
	path = []

	for i in xrange(8):
	path.append(map.get_square(3,i))

	return path


	def make_start(map):
	"""
	Make the starting MapSquare. Modify this to change where the agent starts.

	Parameters
	----------
	map : Map instance

	Returns
	-------
	square : MapSquare instance
	Starting point of agent.

	"""
	# (row, column)
	start = (3,0)

	return map.get_square(*start)


	def make_goal(map):
	"""
	Make the goal MapSquare. Modify this to change where the agent is
	trying to go.

	Parameters
	----------
	map : Map instance

	Returns
	-------
	square : MapSquare instance
	Goal point of agent.

	"""
	# (row, column)
	goal = (3,7)

	return map.get_square(*goal)


	def run_sims(map, path, start, goal, num):
	"""
	Move agents from start to goal num times so we can get good statistics
	on where the agent goes and how often.

	Parameters
	----------
	map : Map instance
	The grid on which the agents will move.

	path : list of MapSquares
	The squares which constitute the path the agent should stay on.
	Should be entirely road squares.

	start : MapSquare instance
	The starting square of the agents.

	goal : MapSquare instance
	The goal square of the agents.

	num : int
	Number of times to run simulation.

	Returns
	-------
	visited : defaultdict
	Dictionary with keys (row,col) with the counts that each square was
	visited by all the agents.

	stuck : int
	The number of times an agent was stuck and could not move
	according to its policies.

	"""
	visited = defaultdict(lambda: 0)

	stuck = 0

	for i in xrange(num):
	agent = Agent(map,start,goal,path)

	try:
	agent.go_to_goal()
	except AgentStuck:
	stuck += 1
	continue

	for k in agent.visited:
	visited[k] += agent.visited[k]

	return visited, stuck


	def print_visited(map, visited, stuck):
	"""
	Print a grid of the map showing how many times each square was visited by
	all the agents that were simulated.

	Parameters
	----------
	map : Map instance
	Map the agents moved on.

	visited : dict
	Dictionary of (row, col) keys with the number of times each square
	was visited.

	stuck : int
	The number of times an agent was stuck and could not move
	according to its policies.

	"""
	for i in xrange(map.rows-1,-1,-1):
	print map.cols('+' + 8'-') + '+'

	s = '\|'
	for j in xrange(map.cols):
	s += '{:^8d}\|'.format(visited[(i,j)])

	print s

	print map.cols('+' + 8'-') + '+'

	print ''

	print 'Agents got stuck {} times.'.format(stuck)


	def parse_args():
	parser = argparse.ArgumentParser(description='Run Homework 5.3 sim.')

	parser.add_argument('num',nargs=1,type=int,
	help='Number of simulations to run. '
	'1000-10000 recommended.')

	return parser.parse_args()


	def main():
	"""
	The main controlling function. Construct a map, specify road squares,
	make start, goal, and path connecting them, then run simulations and
	print the results.

	"""
	num = parse_args().num[0]

	map = make_map()

	set_roads(map)

	start = make_start(map)

	goal = make_goal(map)

	path = make_path(map)

	visited, stuck = run_sims(map, path, start, goal, num)

	print_visited(map, visited, stuck)


	if __name__ == '__main__':
	raise SystemExit(main())