Skip to content

Instantly share code, notes, and snippets.

@rmoehn
Last active November 11, 2016 02:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rmoehn/e10516d2251c90128d6df5702bf841df to your computer and use it in GitHub Desktop.
Save rmoehn/e10516d2251c90128d6df5702bf841df to your computer and use it in GitHub Desktop.
Q-learning for the OffSwitchCartPole-v0

forgym.py can be found in the context of its repo here. I made this Gist, so that I can include it with my OpenAI Gym upload.

For much more interesting information than this short script and the reward graph, have a look at (Non-)Interruptibility of Sarsa(λ) and Q-Learning.

import functools
import gym
import matplotlib
matplotlib.use('GTK3Agg')
from matplotlib import pyplot
import numpy as np
import gym_ext.tools as gym_tools
from hiora_cartpole import driver
from hiora_cartpole import fourier_fa
from hiora_cartpole import linfa
from hiora_cartpole import offswitch_hfa
clipped_high = np.array([2.5, 4.4, 0.28, 3.9])
clipped_low = -clipped_high
state_ranges = np.array([clipped_low, clipped_high])
env0 = gym.make('OffSwitchCartpole-v0')
four_n_weights, four_feature_vec \
= fourier_fa.make_feature_vec(state_ranges,
n_acts=2,
order=3)
ofour_n_weights, ofour_feature_vec \
= offswitch_hfa.make_feature_vec(four_feature_vec, four_n_weights)
skip_offswitch_clip = functools.partial(
gym_tools.apply_to_snd,
functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges))
experience0 = linfa.init(lmbda=0.9,
init_alpha=0.001,
epsi=0.1,
feature_vec=ofour_feature_vec,
n_weights=ofour_n_weights,
act_space=env0.action_space,
theta=None,
is_use_alpha_bounds=True,
map_obs=skip_offswitch_clip,
choose_action=linfa.choose_action_Q)
n_episodes = 200
env0.monitor.start("/tmp/cartpole-experiment-1", force=True)
experience0, steps_per_episode0, alpha_per_episode0 \
= driver.train(env0, linfa, experience0, n_episodes=n_episodes,
max_steps=200, is_render=False, is_continuing_env=True)
env0.monitor.close()
fig = pyplot.figure(figsize=(5,8))
ax01 = fig.add_subplot(211)
ax01.plot(steps_per_episode0, color='b')
ax02 = ax01.twinx()
ax02.plot(alpha_per_episode0, color='r')
ax03 = fig.add_subplot(212)
ax03.plot(experience0.theta)
pyplot.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment