rmoehn/README.md

## README.md

      
    Raw
  

              README.md
            
          
    forgym.py can be found in the context of its repo here. I made this Gist, so that I can include it with my OpenAI Gym upload.
For much more interesting information than this short script and the reward graph, have a look at (Non-)Interruptibility of Sarsa(λ) and Q-Learning.

  
## forgym.py
import functools

import gym
import matplotlib
matplotlib.use('GTK3Agg')
from matplotlib import pyplot
import numpy as np

import gym_ext.tools as gym_tools
from hiora_cartpole import driver
from hiora_cartpole import fourier_fa
from hiora_cartpole import linfa
from hiora_cartpole import offswitch_hfa

clipped_high = np.array([2.5, 4.4, 0.28, 3.9])
clipped_low  = -clipped_high
state_ranges = np.array([clipped_low, clipped_high])

env0 = gym.make('OffSwitchCartpole-v0')

four_n_weights, four_feature_vec \
    = fourier_fa.make_feature_vec(state_ranges,
                                  n_acts=2,
                                  order=3)

ofour_n_weights, ofour_feature_vec \
    = offswitch_hfa.make_feature_vec(four_feature_vec, four_n_weights)

skip_offswitch_clip = functools.partial(
                          gym_tools.apply_to_snd,
                          functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges))

experience0 = linfa.init(lmbda=0.9,
                         init_alpha=0.001,
                         epsi=0.1,
                         feature_vec=ofour_feature_vec,
                         n_weights=ofour_n_weights,
                         act_space=env0.action_space,
                         theta=None,
                         is_use_alpha_bounds=True,
                         map_obs=skip_offswitch_clip,
                         choose_action=linfa.choose_action_Q)

n_episodes = 200

env0.monitor.start("/tmp/cartpole-experiment-1", force=True)

experience0, steps_per_episode0, alpha_per_episode0 \
    = driver.train(env0, linfa, experience0, n_episodes=n_episodes,
            max_steps=200, is_render=False, is_continuing_env=True)

env0.monitor.close()

fig = pyplot.figure(figsize=(5,8))
ax01 = fig.add_subplot(211)
ax01.plot(steps_per_episode0, color='b')
ax02 = ax01.twinx()
ax02.plot(alpha_per_episode0, color='r')
ax03 = fig.add_subplot(212)
ax03.plot(experience0.theta)
pyplot.show()
	import functools

	import gym
	import matplotlib
	matplotlib.use('GTK3Agg')
	from matplotlib import pyplot
	import numpy as np

	import gym_ext.tools as gym_tools
	from hiora_cartpole import driver
	from hiora_cartpole import fourier_fa
	from hiora_cartpole import linfa
	from hiora_cartpole import offswitch_hfa

	clipped_high = np.array([2.5, 4.4, 0.28, 3.9])
	clipped_low = -clipped_high
	state_ranges = np.array([clipped_low, clipped_high])

	env0 = gym.make('OffSwitchCartpole-v0')

	four_n_weights, four_feature_vec \
	= fourier_fa.make_feature_vec(state_ranges,
	n_acts=2,
	order=3)

	ofour_n_weights, ofour_feature_vec \
	= offswitch_hfa.make_feature_vec(four_feature_vec, four_n_weights)

	skip_offswitch_clip = functools.partial(
	gym_tools.apply_to_snd,
	functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges))

	experience0 = linfa.init(lmbda=0.9,
	init_alpha=0.001,
	epsi=0.1,
	feature_vec=ofour_feature_vec,
	n_weights=ofour_n_weights,
	act_space=env0.action_space,
	theta=None,
	is_use_alpha_bounds=True,
	map_obs=skip_offswitch_clip,
	choose_action=linfa.choose_action_Q)

	n_episodes = 200

	env0.monitor.start("/tmp/cartpole-experiment-1", force=True)

	experience0, steps_per_episode0, alpha_per_episode0 \
	= driver.train(env0, linfa, experience0, n_episodes=n_episodes,
	max_steps=200, is_render=False, is_continuing_env=True)

	env0.monitor.close()

	fig = pyplot.figure(figsize=(5,8))
	ax01 = fig.add_subplot(211)
	ax01.plot(steps_per_episode0, color='b')
	ax02 = ax01.twinx()
	ax02.plot(alpha_per_episode0, color='r')
	ax03 = fig.add_subplot(212)
	ax03.plot(experience0.theta)
	pyplot.show()