Skip to content

Instantly share code, notes, and snippets.

@danielsuo
Last active July 22, 2020 19:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielsuo/ae5a4fc250df792dbab7270d3ddf09eb to your computer and use it in GitHub Desktop.
Save danielsuo/ae5a4fc250df792dbab7270d3ddf09eb to your computer and use it in GitHub Desktop.
diff --git a/wilhelm/control/eastman.py b/wilhelm/control/eastman.py
index a3d1ade..1defdbb 100644
--- a/wilhelm/control/eastman.py
+++ b/wilhelm/control/eastman.py
@@ -27,13 +27,23 @@ grad_loss = jax.grad(counterfact_loss, (0, 1))
def rolls_by_eastman(env_true, env_sim, U_old, k, K, X_old, D, F, alpha, w_is):
- H, M, lr, C = 3, 3, 0.001, 1.
+ H, M, lr, C = 3, 3, 0.1, 1.
X, U, U_delta = [None for _ in range(env_sim.H+1)], [None for _ in range(env_sim.H)], [None for _ in range(env_sim.H)]
W, E, off = np.zeros((H+M, env_sim.state_dim)), np.zeros((H, env_sim.action_dim, env_sim.state_dim)), np.zeros(env_sim.action_dim)
X[0], cost = env_true.reset(), 0
+ norm_off = 0
+ norm_other = 0
+ norm_U_delta = 0
+ norm_U_vals = 0
+ norm_U_old = 0
for h in range(env_true.H):
U_delta[h] = np.tensordot(E, W[-M:], axes = ([0, 2], [0, 1])) + off
U[h] = U_old[h] + alpha * k[h] + K[h] @ (X[h] - X_old[h]) + C * U_delta[h]
+ norm_other+= np.linalg.norm(np.tensordot(E, W[-M:], axes = ([0, 2], [0, 1])))**2
+ norm_off+= np.linalg.norm(off)**2
+ norm_U_delta+= np.linalg.norm(U_delta[h])**2
+ norm_U_vals+= np.linalg.norm(U[h])**2
+ norm_U_old+= np.linalg.norm(U_old[h])**2
X[h+1], instant_cost = env_true.step(U[h])
cost += instant_cost
@@ -51,11 +61,12 @@ def rolls_by_eastman(env_true, env_sim, U_old, k, K, X_old, D, F, alpha, w_is):
delta_E, delta_off = grad_loss(E, off, W, H, M, X[h-H], h-H, env_sim, U_old, k, K, X_old, D, F, w_is, alpha, C)
E -= lr * delta_E
off -= lr * delta_off
+ print(norm_U_old, norm_other, norm_off, norm_U_delta, norm_U_vals)
return X, U, cost
def Eastman_closed(env_true, env_sim, U, T, k=None, K=None, X=None, w_is='dede'):
- alpha, r = 1., 1
+ alpha, r = (1.1)**(-9.), 1
X, U, c = rollout(env_true, U, k, K, X)
assert w_is in ['de', 'dede', 'delin']
for t in range(T):
@@ -69,5 +80,5 @@ def Eastman_closed(env_true, env_sim, U, T, k=None, K=None, X=None, w_is='dede')
print(f"Eastman (closed+{w_is}): t = {t}, r = {r}, c = {c}, alpha = {alpha}")
break
else:
- return X, U, k, K, C
+ return X, U, k, K, c
return X, U, k, K, c
diff --git a/wilhelm/sim/quadrotor.py b/wilhelm/sim/quadrotor.py
index d7341fb..1884480 100644
--- a/wilhelm/sim/quadrotor.py
+++ b/wilhelm/sim/quadrotor.py
@@ -10,8 +10,8 @@ class PlanarQuadrotor:
metadata = {'render.modes' : ['human', 'rgb_array'],
'video.frames_per_second' : 30}
- def __init__(self, wind=0.):
- self.m, self.l, self.g, self.dt, self.H, self.wind = 0.1, 0.2, 9.81, 0.05, 100, wind
+ def __init__(self, wind=0., wind_mode='DISSIPATIVE'):
+ self.m, self.l, self.g, self.dt, self.H, self.wind, self.wind_mode = 0.1, 0.2, 9.81, 0.05, 100, wind, wind_mode
self.initial_state, self.goal_state, self.goal_action = np.array([1.0, 1.0, 0.0, 0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), np.array([self.m*self.g/2., self.m*self.g/2.])
self.viewer = None
@@ -19,7 +19,19 @@ class PlanarQuadrotor:
@jax.jit
def wind_field(x, y):
- return [self.wind * x, self.wind * y]
+ if self.wind_mode == 'DISSIPATIVE':
+ return [self.wind * x, self.wind * y]
+ elif self.wind_mode == 'TORNADOES':
+ pos1 = [1., 0.]
+ pos2 = [0.5, 0.5]
+ pos3 = [0., 1.]
+ return [self.wind*(pos1[0]+pos2[0]+pos3[0]-3*x), self.wind*(pos1[0]+pos2[0]+pos3[0]-3*y)]
+ elif self.wind_mode == 'CONSTANT':
+ theta = np.pi/4.
+ return [self.wind*np.cos(theta), self.wind*np.sin(theta)]
+ else:
+ print("WIND MODE NOT RECOGNIZED")
+ exit()
@jax.jit
def f(x, u):
diff --git a/wilhelm/utils/benchmark.py b/wilhelm/utils/benchmark.py
index dffb6f3..b19d053 100644
--- a/wilhelm/utils/benchmark.py
+++ b/wilhelm/utils/benchmark.py
@@ -8,24 +8,24 @@ from wilhelm.control.ilc import iLC_open, iLC_closed
from wilhelm.control.eastman import Eastman_closed
-def benchmark(wind, T):
+def benchmark(wind, T, wind_mode='DISSIPATIVE'):
perf = {}
- env_true, env_sim = PlanarQuadrotor(wind), PlanarQuadrotor()
+ env_true, env_sim = PlanarQuadrotor(wind, wind_mode=wind_mode), PlanarQuadrotor()
U0 = np.tile(env_sim.goal_action, (env_sim.H, 1))
print(f"\n=== Warming Up ===")
- X, U, k, K, _ = iLQR(env_sim, U0, T)
+ X, U, k, K, _ = iLQR(env_sim, U0, 20)
run = {
'Eastman_closed_de': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'de')),
- 'Eastman_closed_dede': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'dede')),
- 'Eastman_closed_delin': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'delin')),
- 'iLQR_open': (iLQR_open, (env_true, env_sim, U0, T)),
- 'iLQR_closed': (iLQR_closed, (env_true, env_sim, U0, T)),
- 'iLQR_oracle': (iLQR_oracle, (env_true, env_sim, U0, T)),
- 'iLC_open_g': (iLC_open, (env_true, env_sim, U, T, 'g')),
- 'iLC_open_lin': (iLC_open, (env_true, env_sim, U, T, 'lin')),
- 'iLC_open_non': (iLC_open, (env_true, env_sim, U, T, 'non')),
- 'iLC_closed': (iLC_closed, (env_true, env_sim, U, T, k, K, X)),
+ #'Eastman_closed_dede': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'dede')),
+ #'Eastman_closed_delin': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'delin')),
+ #'iLQR_open': (iLQR_open, (env_true, env_sim, U0, T)),
+ #'iLQR_closed': (iLQR_closed, (env_true, env_sim, U0, T)),
+ #'iLQR_oracle': (iLQR_oracle, (env_true, env_sim, U0, T)),
+ #'iLC_open_g': (iLC_open, (env_true, env_sim, U, T, 'g')),
+ #'iLC_open_lin': (iLC_open, (env_true, env_sim, U, T, 'lin')),
+ #'iLC_open_non': (iLC_open, (env_true, env_sim, U, T, 'non')),
+ #'iLC_closed': (iLC_closed, (env_true, env_sim, U, T, k, K, X)),
}
for s, (f, arg) in run.items():
@@ -37,8 +37,9 @@ def benchmark(wind, T):
return perf
-wind, T = 0.4, 20
-perf = benchmark(wind, T).items()
+wind, T, wind_mode = 0.4, 50, 'TORNADOES'
+perf = benchmark(wind, T, wind_mode=wind_mode).items()
print('\n')
-print(f"Wind = {wind}, Iter = {T}")
-print(tabulate(perf))
\ No newline at end of file
+print(f"Wind = {wind}, Iter = {T}, Mode = {wind_mode}")
+print(tabulate(perf))
+
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment