Skip to content

Instantly share code, notes, and snippets.

@tomykaira
Created January 31, 2019 04:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomykaira/58e1271c0118462e71e17ca26c679f80 to your computer and use it in GitHub Desktop.
Save tomykaira/58e1271c0118462e71e17ca26c679f80 to your computer and use it in GitHub Desktop.
# import gym
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import gym
import random
init_notebook_mode(connected=True)
env = gym.make('CartPole-v1')
data = []
theta = np.random.uniform(low=-1, high=1, size=(5))
alpha = 0.1
gamma = 0.98
def q(state, action):
return theta[0] * state[0] + theta[1] * state[1] + theta[2] * state[2] + theta[3] * state[3] + theta[4] * action
def update_qtable(state, action, reward, next_state, next_action):
delta_q = alpha * (reward + gamma * q(next_state, next_action) - q(state, action))
#超適当
for i in range(5):
if i < 4:
theta[i] += delta_q * state[i] / (abs(delta_q) * abs(state[i]))
else:
theta[i] += delta_q * action / (abs(delta_q) * abs(action))
for i in range(10000):
obs = env.reset()
turn = 0
if q(obs, -1) < q(obs, 1):
action = 1
else:
action = -1
while True:
if action == -1:
bin_action = 0
else:
bin_action = 1
next_obs, reward, done, _ = env.step(bin_action)
turn += 1
if turn < 500 and done:
reward = -500
if q(next_obs, -1) < q(next_obs, 1):
next_action = 1
else:
next_action = -1
update_qtable(obs, action, reward, next_obs, next_action)
obs = next_obs
action = next_action
if done:
break
print(theta, turn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment