Skip to content

Instantly share code, notes, and snippets.

@jaeandersson
Created August 6, 2014 02:20
Show Gist options
  • Save jaeandersson/e37e796e094b3c6cad9e to your computer and use it in GitHub Desktop.
Save jaeandersson/e37e796e094b3c6cad9e to your computer and use it in GitHub Desktop.
Incomplete implementation of a dynamic programming for a 2-state system
from pylab import *
# End time
T = 10.
# Number of control intervals
N = 20
# Number of Runge-Kutta 4 steps per interval and step size
NK = 20
DT = T/(N*NK)
# Number of discrete control values
NU = 101
# Number of discrete state values
NX = 101
# System dynamics, can be called with matricex
def f(x1,x2,u):
x1_dot = (1 - x2*x2)*x1 - x2 + u
x2_dot = x1
q_dot = x1*x1 + x2*x2 + u*u
return (x1_dot, x2_dot, q_dot)
# Control enumeration
U = linspace(-1,1,NU)
# State space enumeration
x1 = linspace(-1,1,NX)
x2 = linspace(-1,1,NX)
X1,X2 = meshgrid(x1,x2)
# For each control action and state, precalculate next state and stage cost
stage_J = []
next_x1 = []
next_x2 = []
for u in U:
# Take number of integration steps
X1_k = copy(X1)
X2_k = copy(X2)
Q_k = zeros(X1.shape)
for k in range(NK):
# RK4 integration for x1, x2 and q
k1_x1, k1_x2, k1_q = f(X1_k, X2_k, u)
k2_x1, k2_x2, k2_q = f(X1_k + DT/2 * k1_x1, X2_k + DT/2 * k1_x2, u)
k3_x1, k3_x2, k3_q = f(X1_k + DT/2 * k2_x1, X2_k + DT/2 * k2_x2, u)
k4_x1, k4_x2, k4_q = f(X1_k + DT * k3_x1, X2_k + DT * k3_x2, u)
X1_k += DT/6*(k1_x1 + 2*k2_x1 + 2*k3_x1 + k4_x1)
X2_k += DT/6*(k1_x2 + 2*k2_x2 + 2*k3_x2 + k4_x2)
Q_k += DT/6*(k1_q + 2*k2_q + 2*k3_q + k4_q )
# Find out which state comes next (index)
X1_k = matrix.round((X1_k+1)/2*(NX-1)).astype(int)
X2_k = matrix.round((X2_k+1)/2*(NX-1)).astype(int)
# Infinite cost if state gets out-of-bounds
I = X1_k < 0; Q_k[I]=inf; X1_k[I]=0
I = X2_k < 0; Q_k[I]=inf; X2_k[I]=0
I = X1_k >= NX; Q_k[I]=inf; X1_k[I]=0
I = X2_k >= NX; Q_k[I]=inf; X2_k[I]=0
# Save the stage cost and next state
next_x1.append(X1_k)
next_x2.append(X2_k)
stage_J.append(Q_k)
# Calculate cost-to-go (no end cost) and optimal control
J = zeros(X1.shape)
U_opt = []
for k in reversed(range(N)):
# Cost to go for the previous step, optimal control action
J_prev = inf*ones(X1.shape)
u_prev = -ones(X1.shape,dtype=int)
# --------------
# Add calculation of optimal cost-to-go (J_prev) as well as
# the optimal control for every state (u_prev)
# --------------
# Update cost-to-go and save optimal control
J = J_prev
U_opt.append(u_prev)
# Reorder U_opt by stage
U_opt.reverse()
# Find optimal control starting at x1=0, x2=1
i1 = NX/2
i2 = NX-1
u_opt = []
x1_opt = [x1[i1]]
x2_opt = [x2[i2]]
cost = 0
for k in range(N):
# Get the optimal control and go to next step
u_ind = U_opt[k][i2,i1]
cost += stage_J[u_ind][i2,i1]
i1, i2 = next_x1[u_ind][i2,i1], next_x2[u_ind][i2,i1]
# Save the trajectories
u_opt.append(U[u_ind])
x1_opt.append(x1[i1])
x2_opt.append(x2[i2])
# Optimal cost
print "Minimal cost: ", cost
assert abs(cost-J[NX-1,NX/2])<1e-8 # Consistency check
# Plot
figure(1)
clf()
# Plot optimal cost-to-go
subplot(121)
contourf(X1,X2,J)
colorbar()
xlabel('x1')
ylabel('x2')
title('Cost-to-go')
subplot(122)
plot(linspace(0,T,N+1),x1_opt,'--')
plot(linspace(0,T,N+1),x2_opt,'-.')
step(linspace(0,T,N),u_opt,'-')
plt.title("Dynamic programming solution")
plt.xlabel('time')
plt.legend(['x1 trajectory','x2 trajectory','u trajectory'])
grid(True)
show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment