jeongukjae/play.py

## play.py
import gym
import tflearn

from gym import wrappers

import numpy as np

# build nueral network

input_layer = tflearn.input_data(shape = [None, 4], name='input')
hidden_layer = tflearn.fully_connected(input_layer, 16, activation='relu')
hidden_layer2 = tflearn.fully_connected(hidden_layer, 32, activation='relu')
hidden_layer3 = tflearn.fully_connected(hidden_layer2, 16, activation='relu')
output_layer = tflearn.fully_connected(hidden_layer3, 2, activation='softmax')

model = tflearn.DNN(output_layer)

env = gym.make('CartPole-v0')
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1', force=True) # record

model.load('./training_data/result.tfl') # load a model

Rs = []
for _ in range(100):
    obs = env.reset()
    R = 0
    for _ in range(200):
        # render
        env.render()

        # get action from DNN
        action = np.argmax(model.predict([obs])[0])
        obs, r, d, i = env.step(action)

        R += r
        if d:
            break

    Rs.append(R)

# print result
print("Average Reward : {0}".format(sum(Rs)/len(Rs)))

## train.py
import gym
import tflearn

import numpy as np

# build a neural network

input_layer = tflearn.input_data(shape = [None, 4], name='input')
hidden_layer = tflearn.fully_connected(input_layer, 16, activation='relu')
hidden_layer = tflearn.dropout(hidden_layer, 0.5)
hidden_layer2 = tflearn.fully_connected(hidden_layer, 32, activation='relu')
hidden_layer2 = tflearn.dropout(hidden_layer2, 0.5)
hidden_layer3 = tflearn.fully_connected(hidden_layer2, 16, activation='relu')
hidden_layer3 = tflearn.dropout(hidden_layer3, 0.5)
output_layer = tflearn.fully_connected(hidden_layer3, 2, activation='softmax')
regression = tflearn.regression(output_layer, optimizer='adam', learning_rate=0.001, name='targets')

model = tflearn.DNN(regression)

env = gym.make('CartPole-v0')

for _ in range(4000):
    observation = env.reset()

    # gather data to train a model
    actions = []
    observations = []

    # total reward
    R = 0

    for _ in range(200):
        action = env.action_space.sample()

        # save the observation and the action
        observations.append(observation)
        actions.append([action ^ 0b1, action & 0b1])

        # and take an action
        observation, reward, done, info = env.step(action)

        # sum of rewards
        R += reward

        if done:
            if R >= 50:
                # train
                model.fit({'input' : observations}, {'targets': actions})
            break

model.save('./training_data/result.tfl')
	import gym
	import tflearn

	from gym import wrappers

	import numpy as np

	# build nueral network

	input_layer = tflearn.input_data(shape = [None, 4], name='input')
	hidden_layer = tflearn.fully_connected(input_layer, 16, activation='relu')
	hidden_layer2 = tflearn.fully_connected(hidden_layer, 32, activation='relu')
	hidden_layer3 = tflearn.fully_connected(hidden_layer2, 16, activation='relu')
	output_layer = tflearn.fully_connected(hidden_layer3, 2, activation='softmax')

	model = tflearn.DNN(output_layer)

	env = gym.make('CartPole-v0')
	env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1', force=True) # record

	model.load('./training_data/result.tfl') # load a model

	Rs = []
	for _ in range(100):
	obs = env.reset()
	R = 0
	for _ in range(200):
	# render
	env.render()

	# get action from DNN
	action = np.argmax(model.predict([obs])[0])
	obs, r, d, i = env.step(action)

	R += r
	if d:
	break

	Rs.append(R)

	# print result
	print("Average Reward : {0}".format(sum(Rs)/len(Rs)))