Last active
July 22, 2020 19:39
-
-
Save danielsuo/ae5a4fc250df792dbab7270d3ddf09eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/wilhelm/control/eastman.py b/wilhelm/control/eastman.py | |
index a3d1ade..1defdbb 100644 | |
--- a/wilhelm/control/eastman.py | |
+++ b/wilhelm/control/eastman.py | |
@@ -27,13 +27,23 @@ grad_loss = jax.grad(counterfact_loss, (0, 1)) | |
def rolls_by_eastman(env_true, env_sim, U_old, k, K, X_old, D, F, alpha, w_is): | |
- H, M, lr, C = 3, 3, 0.001, 1. | |
+ H, M, lr, C = 3, 3, 0.1, 1. | |
X, U, U_delta = [None for _ in range(env_sim.H+1)], [None for _ in range(env_sim.H)], [None for _ in range(env_sim.H)] | |
W, E, off = np.zeros((H+M, env_sim.state_dim)), np.zeros((H, env_sim.action_dim, env_sim.state_dim)), np.zeros(env_sim.action_dim) | |
X[0], cost = env_true.reset(), 0 | |
+ norm_off = 0 | |
+ norm_other = 0 | |
+ norm_U_delta = 0 | |
+ norm_U_vals = 0 | |
+ norm_U_old = 0 | |
for h in range(env_true.H): | |
U_delta[h] = np.tensordot(E, W[-M:], axes = ([0, 2], [0, 1])) + off | |
U[h] = U_old[h] + alpha * k[h] + K[h] @ (X[h] - X_old[h]) + C * U_delta[h] | |
+ norm_other+= np.linalg.norm(np.tensordot(E, W[-M:], axes = ([0, 2], [0, 1])))**2 | |
+ norm_off+= np.linalg.norm(off)**2 | |
+ norm_U_delta+= np.linalg.norm(U_delta[h])**2 | |
+ norm_U_vals+= np.linalg.norm(U[h])**2 | |
+ norm_U_old+= np.linalg.norm(U_old[h])**2 | |
X[h+1], instant_cost = env_true.step(U[h]) | |
cost += instant_cost | |
@@ -51,11 +61,12 @@ def rolls_by_eastman(env_true, env_sim, U_old, k, K, X_old, D, F, alpha, w_is): | |
delta_E, delta_off = grad_loss(E, off, W, H, M, X[h-H], h-H, env_sim, U_old, k, K, X_old, D, F, w_is, alpha, C) | |
E -= lr * delta_E | |
off -= lr * delta_off | |
+ print(norm_U_old, norm_other, norm_off, norm_U_delta, norm_U_vals) | |
return X, U, cost | |
def Eastman_closed(env_true, env_sim, U, T, k=None, K=None, X=None, w_is='dede'): | |
- alpha, r = 1., 1 | |
+ alpha, r = (1.1)**(-9.), 1 | |
X, U, c = rollout(env_true, U, k, K, X) | |
assert w_is in ['de', 'dede', 'delin'] | |
for t in range(T): | |
@@ -69,5 +80,5 @@ def Eastman_closed(env_true, env_sim, U, T, k=None, K=None, X=None, w_is='dede') | |
print(f"Eastman (closed+{w_is}): t = {t}, r = {r}, c = {c}, alpha = {alpha}") | |
break | |
else: | |
- return X, U, k, K, C | |
+ return X, U, k, K, c | |
return X, U, k, K, c | |
diff --git a/wilhelm/sim/quadrotor.py b/wilhelm/sim/quadrotor.py | |
index d7341fb..1884480 100644 | |
--- a/wilhelm/sim/quadrotor.py | |
+++ b/wilhelm/sim/quadrotor.py | |
@@ -10,8 +10,8 @@ class PlanarQuadrotor: | |
metadata = {'render.modes' : ['human', 'rgb_array'], | |
'video.frames_per_second' : 30} | |
- def __init__(self, wind=0.): | |
- self.m, self.l, self.g, self.dt, self.H, self.wind = 0.1, 0.2, 9.81, 0.05, 100, wind | |
+ def __init__(self, wind=0., wind_mode='DISSIPATIVE'): | |
+ self.m, self.l, self.g, self.dt, self.H, self.wind, self.wind_mode = 0.1, 0.2, 9.81, 0.05, 100, wind, wind_mode | |
self.initial_state, self.goal_state, self.goal_action = np.array([1.0, 1.0, 0.0, 0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), np.array([self.m*self.g/2., self.m*self.g/2.]) | |
self.viewer = None | |
@@ -19,7 +19,19 @@ class PlanarQuadrotor: | |
@jax.jit | |
def wind_field(x, y): | |
- return [self.wind * x, self.wind * y] | |
+ if self.wind_mode == 'DISSIPATIVE': | |
+ return [self.wind * x, self.wind * y] | |
+ elif self.wind_mode == 'TORNADOES': | |
+ pos1 = [1., 0.] | |
+ pos2 = [0.5, 0.5] | |
+ pos3 = [0., 1.] | |
+ return [self.wind*(pos1[0]+pos2[0]+pos3[0]-3*x), self.wind*(pos1[0]+pos2[0]+pos3[0]-3*y)] | |
+ elif self.wind_mode == 'CONSTANT': | |
+ theta = np.pi/4. | |
+ return [self.wind*np.cos(theta), self.wind*np.sin(theta)] | |
+ else: | |
+ print("WIND MODE NOT RECOGNIZED") | |
+ exit() | |
@jax.jit | |
def f(x, u): | |
diff --git a/wilhelm/utils/benchmark.py b/wilhelm/utils/benchmark.py | |
index dffb6f3..b19d053 100644 | |
--- a/wilhelm/utils/benchmark.py | |
+++ b/wilhelm/utils/benchmark.py | |
@@ -8,24 +8,24 @@ from wilhelm.control.ilc import iLC_open, iLC_closed | |
from wilhelm.control.eastman import Eastman_closed | |
-def benchmark(wind, T): | |
+def benchmark(wind, T, wind_mode='DISSIPATIVE'): | |
perf = {} | |
- env_true, env_sim = PlanarQuadrotor(wind), PlanarQuadrotor() | |
+ env_true, env_sim = PlanarQuadrotor(wind, wind_mode=wind_mode), PlanarQuadrotor() | |
U0 = np.tile(env_sim.goal_action, (env_sim.H, 1)) | |
print(f"\n=== Warming Up ===") | |
- X, U, k, K, _ = iLQR(env_sim, U0, T) | |
+ X, U, k, K, _ = iLQR(env_sim, U0, 20) | |
run = { | |
'Eastman_closed_de': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'de')), | |
- 'Eastman_closed_dede': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'dede')), | |
- 'Eastman_closed_delin': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'delin')), | |
- 'iLQR_open': (iLQR_open, (env_true, env_sim, U0, T)), | |
- 'iLQR_closed': (iLQR_closed, (env_true, env_sim, U0, T)), | |
- 'iLQR_oracle': (iLQR_oracle, (env_true, env_sim, U0, T)), | |
- 'iLC_open_g': (iLC_open, (env_true, env_sim, U, T, 'g')), | |
- 'iLC_open_lin': (iLC_open, (env_true, env_sim, U, T, 'lin')), | |
- 'iLC_open_non': (iLC_open, (env_true, env_sim, U, T, 'non')), | |
- 'iLC_closed': (iLC_closed, (env_true, env_sim, U, T, k, K, X)), | |
+ #'Eastman_closed_dede': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'dede')), | |
+ #'Eastman_closed_delin': (Eastman_closed, (env_true, env_sim, U, T, k, K, X, 'delin')), | |
+ #'iLQR_open': (iLQR_open, (env_true, env_sim, U0, T)), | |
+ #'iLQR_closed': (iLQR_closed, (env_true, env_sim, U0, T)), | |
+ #'iLQR_oracle': (iLQR_oracle, (env_true, env_sim, U0, T)), | |
+ #'iLC_open_g': (iLC_open, (env_true, env_sim, U, T, 'g')), | |
+ #'iLC_open_lin': (iLC_open, (env_true, env_sim, U, T, 'lin')), | |
+ #'iLC_open_non': (iLC_open, (env_true, env_sim, U, T, 'non')), | |
+ #'iLC_closed': (iLC_closed, (env_true, env_sim, U, T, k, K, X)), | |
} | |
for s, (f, arg) in run.items(): | |
@@ -37,8 +37,9 @@ def benchmark(wind, T): | |
return perf | |
-wind, T = 0.4, 20 | |
-perf = benchmark(wind, T).items() | |
+wind, T, wind_mode = 0.4, 50, 'TORNADOES' | |
+perf = benchmark(wind, T, wind_mode=wind_mode).items() | |
print('\n') | |
-print(f"Wind = {wind}, Iter = {T}") | |
-print(tabulate(perf)) | |
\ No newline at end of file | |
+print(f"Wind = {wind}, Iter = {T}, Mode = {wind_mode}") | |
+print(tabulate(perf)) | |
+ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment