Create a gist now

Instantly share code, notes, and snippets.

Double integrator minimum time value iteration
# This is a python port of the minimum time solution of the double integrator
# (with no damping) minimum time value iteration problem, based on the course notes of
# MIT OCWs Underactuated robotics.
#
# The original material that this work is derived from is:
#
# Prof. Russel Tedrake, Underactuated Robotics, Spring 2009. (Massachusetts Institute of # Technology: MIT OpenCouseWare), http://ocw.mit.edu (Accessed May 2012). License:
# Creative Commons BY-NC-SA
import sys
import math
import numpy as np
from numpy.matlib import repmat, reshape
import matplotlib.pyplot as plt
def dynamics(x, u):
xdot = np.vstack([x[1,:], u[0]])
ind = np.sum(np.abs(x), axis = 0) == 0
xdot[:,ind] = np.zeros((2, np.sum(ind)))
return xdot
def normalize(s, q_bins, qdot_bins):
s[0] = np.clip(s[0], q_bins[0], q_bins[-1])
s[1] = np.clip(s[1], qdot_bins[0], qdot_bins[-1])
return s
def cost(x, u):
# The cost is zero at the goal, and 1 everywhere else
C = (np.sum(np.abs(x),axis=0)>0).astype('float')
return C
def vi_plot(J, PI, q_bins, qdot_bins):
n1 = q_bins.shape[0]
n2 = qdot_bins.shape[0]
extents = [ q_bins[0], q_bins[-1], qdot_bins[0], qdot_bins[-1] ]
plt.subplot(211)
plt.imshow(np.fliplr(np.reshape(PI, (n2, n1))), extent = extents, interpolation='none')
plt.gca().set_aspect('auto')
plt.subplot(212)
plt.imshow(np.fliplr(np.reshape(J.T, (n2, n1))), extent = extents, interpolation='none')
plt.gca().set_aspect('auto')
plt.draw()
def volumetric_interp(s, Sn, q_bins, qdot_bins):
"""Find the indices and parameters of interpolation
Parameters
----------
s : numpy.ndarray
the original states
Sn : numpy.ndarray
the updated states (2d)
q_bins : numpy.ndarray
the 1d array of q portion of state
qdot_bins : numpy.ndarray
the 1d array of the qdot portion of state
"""
# interesting things here
# the discretization doesn't have to be uniform
ns = Sn.shape[1]
Pi = np.zeros((4, ns), dtype = int)
P = np.zeros((4, ns), dtype = float)
Sn = normalize(Sn, q_bins, qdot_bins)
for i in xrange(ns):
# the largest index of the q variable
ind_q = np.max( np.where(q_bins <= Sn[0,i])[0] )
# the largest index of the q_dot variable
ind_qdot = np.max( np.where(qdot_bins <= Sn[1,i])[0] )
# n-dimensional index offset vectors (automate?)
offset = [[0,0],[1,0],[0,1],[1,1]]
offset = np.array(offset)
if ind_q == len(q_bins)-1:
offset[:,0] *= -1.0
if ind_qdot == len(qdot_bins)-1:
offset[:,1] *= -1.0
# box size and area
dx = abs(q_bins[ind_q + offset[1,0]] - q_bins[ind_q])
dy = abs(qdot_bins[ind_qdot + offset[2,1]] - qdot_bins[ind_qdot])
totl_area = dx*dx
for j in xrange(4):
# convience cache the actual state of interest for this step
ind_q_off = ind_q + offset[j,0]
ind_qdot_off = ind_qdot + offset[j,1]
# calculate the actual states for the indices
state = [ q_bins[ind_q_off], qdot_bins[ind_qdot_off] ]
state = np.array([state]).T
# for origidx, orig in enumerate(s):
# if sum(abs(orig - state)) == 0:
# return origidx
Pi[j, i] = np.where(np.sum(abs(s - repmat(state, 1, s.shape[1])), axis=0) == 0)[0][0]
# serr_q and serr_qdot are scalars that post area normalisation are in the
# range 0 - 1
serr_q = abs(Sn[0,i] - q_bins[ind_q_off])
serr_qdot = abs(Sn[1,i] - qdot_bins[ind_qdot_off])
P[3-j, i] = (serr_q * serr_qdot)/totl_area
return Pi, P
# define the actions
a = [-1.0, 0.0, 1.0]
a = np.array([a])
# define the mesh points as row vectors
q_bins = np.linspace(-2., 2., 65)
qdot_bins = np.linspace(-2., 2., 65)
# create the mesh
q, qdot = np.meshgrid(q_bins, qdot_bins)
# state construction
s0 = reshape(q, (1, q.size))
s1 = reshape(qdot, (1, qdot.size))
s = np.vstack([s0, s1])
# size information
ns = s.shape[1]
na = a.shape[1]
## generate all state and action pairs
S = repmat(s, 1, na)
A = a.repeat(ns, axis = 1)
# compute the one step dynamics. lots of assumptions here
# 1. integration method is fixed timestep fwd Euler
# 2. assumes dt is sufficently small
dt = 1e-2
Sn = S + dynamics(S, A) * dt
Pi, P = volumetric_interp(s, Sn, q_bins, qdot_bins)
C = reshape(cost(S, A), (na, ns)).T
J = np.zeros((ns, 1))
gamma = 1.0
converged = 0.1
iter = 1
err = 1e6
plt.ion()
while err > converged:
# discounted learning update
# cost = one-step-cost +
# (discount * the sum
# for all s
# for all a
# of transitioning from s to f(s,a) === s' )
Cprime = reshape(np.sum(P * J[Pi][:,:,0], axis=0),(na,ns)).T
Cprime = C + (gamma * Cprime)
# minimum over the arguments
# matlab lets you do this in one call, python will too, but not
# so sure about doing this in dimensions higher than 1. Neither
# I or D.C. can work it out.
# PI - Policy Iteration (best action to pick in this state)
# Jnew - the updated cost-to-go
PI = np.argmin(Cprime, axis = 1)
Jnew = reshape(np.min(Cprime, axis = 1), (ns,1))
err = np.max(np.abs(Jnew - J))
print 'iteration=', iter, 'max err', err
J = Jnew
if iter % 20 == 0:
vi_plot(J, a.T[PI], q_bins, qdot_bins)
iter += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment