Last active
August 18, 2017 12:28
-
-
Save jeongukjae/123d547b6777a447d2f1a1bf4c4092df to your computer and use it in GitHub Desktop.
CartPole-v0 (Open AI Gym)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import tflearn | |
from gym import wrappers | |
import numpy as np | |
# build nueral network | |
input_layer = tflearn.input_data(shape = [None, 4], name='input') | |
hidden_layer = tflearn.fully_connected(input_layer, 16, activation='relu') | |
hidden_layer2 = tflearn.fully_connected(hidden_layer, 32, activation='relu') | |
hidden_layer3 = tflearn.fully_connected(hidden_layer2, 16, activation='relu') | |
output_layer = tflearn.fully_connected(hidden_layer3, 2, activation='softmax') | |
model = tflearn.DNN(output_layer) | |
env = gym.make('CartPole-v0') | |
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1', force=True) # record | |
model.load('./training_data/result.tfl') # load a model | |
Rs = [] | |
for _ in range(100): | |
obs = env.reset() | |
R = 0 | |
for _ in range(200): | |
# render | |
env.render() | |
# get action from DNN | |
action = np.argmax(model.predict([obs])[0]) | |
obs, r, d, i = env.step(action) | |
R += r | |
if d: | |
break | |
Rs.append(R) | |
# print result | |
print("Average Reward : {0}".format(sum(Rs)/len(Rs))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import tflearn | |
import numpy as np | |
# build a neural network | |
input_layer = tflearn.input_data(shape = [None, 4], name='input') | |
hidden_layer = tflearn.fully_connected(input_layer, 16, activation='relu') | |
hidden_layer = tflearn.dropout(hidden_layer, 0.5) | |
hidden_layer2 = tflearn.fully_connected(hidden_layer, 32, activation='relu') | |
hidden_layer2 = tflearn.dropout(hidden_layer2, 0.5) | |
hidden_layer3 = tflearn.fully_connected(hidden_layer2, 16, activation='relu') | |
hidden_layer3 = tflearn.dropout(hidden_layer3, 0.5) | |
output_layer = tflearn.fully_connected(hidden_layer3, 2, activation='softmax') | |
regression = tflearn.regression(output_layer, optimizer='adam', learning_rate=0.001, name='targets') | |
model = tflearn.DNN(regression) | |
env = gym.make('CartPole-v0') | |
for _ in range(4000): | |
observation = env.reset() | |
# gather data to train a model | |
actions = [] | |
observations = [] | |
# total reward | |
R = 0 | |
for _ in range(200): | |
action = env.action_space.sample() | |
# save the observation and the action | |
observations.append(observation) | |
actions.append([action ^ 0b1, action & 0b1]) | |
# and take an action | |
observation, reward, done, info = env.step(action) | |
# sum of rewards | |
R += reward | |
if done: | |
if R >= 50: | |
# train | |
model.fit({'input' : observations}, {'targets': actions}) | |
break | |
model.save('./training_data/result.tfl') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment