Skip to content

Instantly share code, notes, and snippets.

@jeongukjae
Last active August 18, 2017 12:28
Show Gist options
  • Save jeongukjae/123d547b6777a447d2f1a1bf4c4092df to your computer and use it in GitHub Desktop.
Save jeongukjae/123d547b6777a447d2f1a1bf4c4092df to your computer and use it in GitHub Desktop.
CartPole-v0 (Open AI Gym)
import gym
import tflearn
from gym import wrappers
import numpy as np
# build nueral network
input_layer = tflearn.input_data(shape = [None, 4], name='input')
hidden_layer = tflearn.fully_connected(input_layer, 16, activation='relu')
hidden_layer2 = tflearn.fully_connected(hidden_layer, 32, activation='relu')
hidden_layer3 = tflearn.fully_connected(hidden_layer2, 16, activation='relu')
output_layer = tflearn.fully_connected(hidden_layer3, 2, activation='softmax')
model = tflearn.DNN(output_layer)
env = gym.make('CartPole-v0')
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1', force=True) # record
model.load('./training_data/result.tfl') # load a model
Rs = []
for _ in range(100):
obs = env.reset()
R = 0
for _ in range(200):
# render
env.render()
# get action from DNN
action = np.argmax(model.predict([obs])[0])
obs, r, d, i = env.step(action)
R += r
if d:
break
Rs.append(R)
# print result
print("Average Reward : {0}".format(sum(Rs)/len(Rs)))
import gym
import tflearn
import numpy as np
# build a neural network
input_layer = tflearn.input_data(shape = [None, 4], name='input')
hidden_layer = tflearn.fully_connected(input_layer, 16, activation='relu')
hidden_layer = tflearn.dropout(hidden_layer, 0.5)
hidden_layer2 = tflearn.fully_connected(hidden_layer, 32, activation='relu')
hidden_layer2 = tflearn.dropout(hidden_layer2, 0.5)
hidden_layer3 = tflearn.fully_connected(hidden_layer2, 16, activation='relu')
hidden_layer3 = tflearn.dropout(hidden_layer3, 0.5)
output_layer = tflearn.fully_connected(hidden_layer3, 2, activation='softmax')
regression = tflearn.regression(output_layer, optimizer='adam', learning_rate=0.001, name='targets')
model = tflearn.DNN(regression)
env = gym.make('CartPole-v0')
for _ in range(4000):
observation = env.reset()
# gather data to train a model
actions = []
observations = []
# total reward
R = 0
for _ in range(200):
action = env.action_space.sample()
# save the observation and the action
observations.append(observation)
actions.append([action ^ 0b1, action & 0b1])
# and take an action
observation, reward, done, info = env.step(action)
# sum of rewards
R += reward
if done:
if R >= 50:
# train
model.fit({'input' : observations}, {'targets': actions})
break
model.save('./training_data/result.tfl')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment