AndrewWalker/brick_vi_mtime_sol.py

## brick_vi_mtime_sol.py
# This is a python port of the minimum time solution of the double integrator
# (with no damping) minimum time value iteration problem, based on the course notes of
# MIT OCWs Underactuated robotics.
#
# The original material that this work is derived from is:
#
# Prof. Russel Tedrake, Underactuated Robotics, Spring 2009. (Massachusetts Institute of # Technology: MIT OpenCouseWare), http://ocw.mit.edu (Accessed May 2012). License:
# Creative Commons BY-NC-SA

import sys
import math
import numpy as np
from numpy.matlib import repmat, reshape
import matplotlib.pyplot as plt

def dynamics(x, u):
    xdot = np.vstack([x[1,:], u[0]])
    ind  = np.sum(np.abs(x), axis = 0) == 0
    xdot[:,ind] = np.zeros((2, np.sum(ind)))
    return xdot

def normalize(s, q_bins, qdot_bins):
    s[0] = np.clip(s[0], q_bins[0], q_bins[-1])
    s[1] = np.clip(s[1], qdot_bins[0], qdot_bins[-1])
    return s

def cost(x, u):
    # The cost is zero at the goal, and 1 everywhere else
    C = (np.sum(np.abs(x),axis=0)>0).astype('float')
    return C

def vi_plot(J, PI, q_bins, qdot_bins):
    n1 = q_bins.shape[0]
    n2 = qdot_bins.shape[0]
    extents = [ q_bins[0], q_bins[-1], qdot_bins[0], qdot_bins[-1] ]

    plt.subplot(211)
    plt.imshow(np.fliplr(np.reshape(PI, (n2, n1))), extent = extents, interpolation='none')
    plt.gca().set_aspect('auto')

    plt.subplot(212)
    plt.imshow(np.fliplr(np.reshape(J.T, (n2, n1))), extent = extents, interpolation='none')
    plt.gca().set_aspect('auto')
    plt.draw()

def volumetric_interp(s, Sn, q_bins, qdot_bins):
    """Find the indices and parameters of interpolation

    Parameters
    ----------
    s : numpy.ndarray
        the original states
    Sn : numpy.ndarray
        the updated states (2d)
    q_bins : numpy.ndarray
        the 1d array of q portion of state
    qdot_bins : numpy.ndarray
        the 1d array of the qdot portion of state
    """
    # interesting things here
    #  the discretization doesn't have to be uniform
    ns = Sn.shape[1]
    Pi = np.zeros((4, ns), dtype = int)
    P  = np.zeros((4, ns), dtype = float)

    Sn = normalize(Sn, q_bins, qdot_bins)

    for i in xrange(ns):
        # the largest index of the q variable
        ind_q = np.max( np.where(q_bins <= Sn[0,i])[0] )
        # the largest index of the q_dot variable
        ind_qdot = np.max( np.where(qdot_bins <= Sn[1,i])[0] )

        # n-dimensional index offset vectors (automate?)
        offset = [[0,0],[1,0],[0,1],[1,1]]
        offset = np.array(offset)
        if ind_q == len(q_bins)-1:
            offset[:,0] *= -1.0
        if ind_qdot == len(qdot_bins)-1:
            offset[:,1] *= -1.0

        # box size and area
        dx = abs(q_bins[ind_q + offset[1,0]] - q_bins[ind_q])
        dy = abs(qdot_bins[ind_qdot + offset[2,1]] - qdot_bins[ind_qdot])
        totl_area = dx*dx

        for j in xrange(4):
            # convience cache the actual state of interest for this step
            ind_q_off    = ind_q    + offset[j,0]
            ind_qdot_off = ind_qdot + offset[j,1]

            # calculate the actual states for the indices
            state = [ q_bins[ind_q_off], qdot_bins[ind_qdot_off] ]
            state = np.array([state]).T
            # for origidx, orig in enumerate(s):
            #     if sum(abs(orig - state)) == 0:
            #         return origidx
            Pi[j, i] = np.where(np.sum(abs(s - repmat(state, 1, s.shape[1])), axis=0) == 0)[0][0]

            # serr_q and serr_qdot are scalars that post area normalisation are in the
            # range 0 - 1
            serr_q    = abs(Sn[0,i] - q_bins[ind_q_off])
            serr_qdot = abs(Sn[1,i] - qdot_bins[ind_qdot_off])
            P[3-j, i] = (serr_q * serr_qdot)/totl_area
    return Pi, P

# define the actions
a = [-1.0, 0.0, 1.0]
a = np.array([a])

# define the mesh points as row vectors
q_bins = np.linspace(-2., 2., 65)
qdot_bins = np.linspace(-2., 2., 65)

# create the mesh
q, qdot = np.meshgrid(q_bins, qdot_bins)

# state construction
s0 = reshape(q, (1, q.size))
s1 = reshape(qdot, (1, qdot.size))
s = np.vstack([s0, s1])

# size information
ns = s.shape[1]
na = a.shape[1]

## generate all state and action pairs
S = repmat(s, 1, na)
A = a.repeat(ns, axis = 1)

# compute the one step dynamics. lots of assumptions here
#  1. integration method is fixed timestep fwd Euler
#  2. assumes dt is sufficently small
dt = 1e-2
Sn = S + dynamics(S, A) * dt

Pi, P = volumetric_interp(s, Sn, q_bins, qdot_bins)
C = reshape(cost(S, A), (na, ns)).T
J = np.zeros((ns, 1))

gamma = 1.0
converged = 0.1
iter = 1
err = 1e6

plt.ion()
while err > converged:

    # discounted learning update
    #   cost = one-step-cost +
    #          (discount * the sum
    #               for all s
    #               for all a
    #                   of transitioning from s to f(s,a) === s' )
    Cprime = reshape(np.sum(P * J[Pi][:,:,0], axis=0),(na,ns)).T
    Cprime = C + (gamma * Cprime)

    # minimum over the arguments
    # matlab lets you do this in one call, python will too, but not
    # so sure about doing this in dimensions higher than 1. Neither
    # I or D.C. can work it out.
    #   PI   - Policy Iteration (best action to pick in this state)
    #   Jnew - the updated cost-to-go
    PI     = np.argmin(Cprime, axis = 1)
    Jnew   = reshape(np.min(Cprime, axis = 1), (ns,1))

    err    = np.max(np.abs(Jnew - J))
    print 'iteration=', iter, 'max err', err
    J      = Jnew

    if iter % 20 == 0:
        vi_plot(J, a.T[PI], q_bins, qdot_bins)
    iter  += 1
	# This is a python port of the minimum time solution of the double integrator
	# (with no damping) minimum time value iteration problem, based on the course notes of
	# MIT OCWs Underactuated robotics.
	#
	# The original material that this work is derived from is:
	#
	# Prof. Russel Tedrake, Underactuated Robotics, Spring 2009. (Massachusetts Institute of # Technology: MIT OpenCouseWare), http://ocw.mit.edu (Accessed May 2012). License:
	# Creative Commons BY-NC-SA

	import sys
	import math
	import numpy as np
	from numpy.matlib import repmat, reshape
	import matplotlib.pyplot as plt

	def dynamics(x, u):
	xdot = np.vstack([x[1,:], u[0]])
	ind = np.sum(np.abs(x), axis = 0) == 0
	xdot[:,ind] = np.zeros((2, np.sum(ind)))
	return xdot

	def normalize(s, q_bins, qdot_bins):
	s[0] = np.clip(s[0], q_bins[0], q_bins[-1])
	s[1] = np.clip(s[1], qdot_bins[0], qdot_bins[-1])
	return s

	def cost(x, u):
	# The cost is zero at the goal, and 1 everywhere else
	C = (np.sum(np.abs(x),axis=0)>0).astype('float')
	return C

	def vi_plot(J, PI, q_bins, qdot_bins):
	n1 = q_bins.shape[0]
	n2 = qdot_bins.shape[0]
	extents = [ q_bins[0], q_bins[-1], qdot_bins[0], qdot_bins[-1] ]

	plt.subplot(211)
	plt.imshow(np.fliplr(np.reshape(PI, (n2, n1))), extent = extents, interpolation='none')
	plt.gca().set_aspect('auto')

	plt.subplot(212)
	plt.imshow(np.fliplr(np.reshape(J.T, (n2, n1))), extent = extents, interpolation='none')
	plt.gca().set_aspect('auto')
	plt.draw()

	def volumetric_interp(s, Sn, q_bins, qdot_bins):
	"""Find the indices and parameters of interpolation

	Parameters
	----------
	s : numpy.ndarray
	the original states
	Sn : numpy.ndarray
	the updated states (2d)
	q_bins : numpy.ndarray
	the 1d array of q portion of state
	qdot_bins : numpy.ndarray
	the 1d array of the qdot portion of state
	"""
	# interesting things here
	# the discretization doesn't have to be uniform
	ns = Sn.shape[1]
	Pi = np.zeros((4, ns), dtype = int)
	P = np.zeros((4, ns), dtype = float)

	Sn = normalize(Sn, q_bins, qdot_bins)

	for i in xrange(ns):
	# the largest index of the q variable
	ind_q = np.max( np.where(q_bins <= Sn[0,i])[0] )
	# the largest index of the q_dot variable
	ind_qdot = np.max( np.where(qdot_bins <= Sn[1,i])[0] )

	# n-dimensional index offset vectors (automate?)
	offset = [[0,0],[1,0],[0,1],[1,1]]
	offset = np.array(offset)
	if ind_q == len(q_bins)-1:
	offset[:,0] *= -1.0
	if ind_qdot == len(qdot_bins)-1:
	offset[:,1] *= -1.0

	# box size and area
	dx = abs(q_bins[ind_q + offset[1,0]] - q_bins[ind_q])
	dy = abs(qdot_bins[ind_qdot + offset[2,1]] - qdot_bins[ind_qdot])
	totl_area = dx*dx

	for j in xrange(4):
	# convience cache the actual state of interest for this step
	ind_q_off = ind_q + offset[j,0]
	ind_qdot_off = ind_qdot + offset[j,1]

	# calculate the actual states for the indices
	state = [ q_bins[ind_q_off], qdot_bins[ind_qdot_off] ]
	state = np.array([state]).T
	# for origidx, orig in enumerate(s):
	# if sum(abs(orig - state)) == 0:
	# return origidx
	Pi[j, i] = np.where(np.sum(abs(s - repmat(state, 1, s.shape[1])), axis=0) == 0)[0][0]

	# serr_q and serr_qdot are scalars that post area normalisation are in the
	# range 0 - 1
	serr_q = abs(Sn[0,i] - q_bins[ind_q_off])
	serr_qdot = abs(Sn[1,i] - qdot_bins[ind_qdot_off])
	P[3-j, i] = (serr_q * serr_qdot)/totl_area
	return Pi, P

	# define the actions
	a = [-1.0, 0.0, 1.0]
	a = np.array([a])

	# define the mesh points as row vectors
	q_bins = np.linspace(-2., 2., 65)
	qdot_bins = np.linspace(-2., 2., 65)

	# create the mesh
	q, qdot = np.meshgrid(q_bins, qdot_bins)

	# state construction
	s0 = reshape(q, (1, q.size))
	s1 = reshape(qdot, (1, qdot.size))
	s = np.vstack([s0, s1])

	# size information
	ns = s.shape[1]
	na = a.shape[1]

	## generate all state and action pairs
	S = repmat(s, 1, na)
	A = a.repeat(ns, axis = 1)

	# compute the one step dynamics. lots of assumptions here
	# 1. integration method is fixed timestep fwd Euler
	# 2. assumes dt is sufficently small
	dt = 1e-2
	Sn = S + dynamics(S, A) * dt

	Pi, P = volumetric_interp(s, Sn, q_bins, qdot_bins)
	C = reshape(cost(S, A), (na, ns)).T
	J = np.zeros((ns, 1))

	gamma = 1.0
	converged = 0.1
	iter = 1
	err = 1e6

	plt.ion()
	while err > converged:

	# discounted learning update
	# cost = one-step-cost +
	# (discount * the sum
	# for all s
	# for all a
	# of transitioning from s to f(s,a) === s' )
	Cprime = reshape(np.sum(P * J[Pi][:,:,0], axis=0),(na,ns)).T
	Cprime = C + (gamma * Cprime)

	# minimum over the arguments
	# matlab lets you do this in one call, python will too, but not
	# so sure about doing this in dimensions higher than 1. Neither
	# I or D.C. can work it out.
	# PI - Policy Iteration (best action to pick in this state)
	# Jnew - the updated cost-to-go
	PI = np.argmin(Cprime, axis = 1)
	Jnew = reshape(np.min(Cprime, axis = 1), (ns,1))

	err = np.max(np.abs(Jnew - J))
	print 'iteration=', iter, 'max err', err
	J = Jnew

	if iter % 20 == 0:
	vi_plot(J, a.T[PI], q_bins, qdot_bins)
	iter += 1