isseu/CartPole-v0-Keras.py

## CartPole-v0-Keras.py
from __future__ import division
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import sgd
import os
import random
from os.path import isfile
from collections import deque

NUM_ACTIONS = 2
NUM_STATES = 4
MAX_REPLAY_STATES = 100
BATCH_SIZE = 20
NUM_GAMES_TRAIN = 500
JUMP_FPS = 2
WEIGHT_FILE = 'weights.h5'

def create_model(n_inputs, n_outputs):
  model = Sequential([
    Dense(8, batch_input_shape = (None, n_inputs)),
    Activation('relu'),
    Dense(16),
    Activation('relu'),
    Dense(n_outputs),
    Activation('linear')
  ])
  model.compile('adam', loss = 'mse')
  if isfile(WEIGHT_FILE):
    print "[+] Loaded weights from file"
    model.load_weights(WEIGHT_FILE)
  return model

env = gym.make('CartPole-v0')
env.monitor.start('training', force = True)
model = create_model(NUM_STATES, NUM_ACTIONS)

replay = []

gamma = 0.99
epsilon = 1
for number_game in range(NUM_GAMES_TRAIN):
  new_state = env.reset()
  reward_game = 0
  done = False
  loss = 0
  index_train_per_game = 0
  print '[+] Starting Game ' + str(number_game)
  while not done:
    env.render()
    index_train_per_game += 1
    if random.random() < epsilon:
      action = np.random.randint(NUM_ACTIONS)
    else:
      q = model.predict(new_state.reshape(1, NUM_STATES))[0]
      action = np.argmax(q)
    old_state = new_state
    new_state, reward, done, info = env.step(action)
    reward_game += reward
    replay.append([new_state, reward, action, done, old_state])
    if len(replay) > MAX_REPLAY_STATES: replay.pop(np.random.randint(MAX_REPLAY_STATES) + 1)
    if JUMP_FPS != 1 and index_train_per_game % JUMP_FPS == 0: # We skip this train, but already add data
      continue
    len_mini_batch = min(len(replay), BATCH_SIZE)
    mini_batch = random.sample(replay, len_mini_batch)
    X_train = np.zeros((len_mini_batch, NUM_STATES))
    Y_train = np.zeros((len_mini_batch, NUM_ACTIONS))
    for index_rep in range(len_mini_batch):
      new_rep_state, reward_rep, action_rep, done_rep, old_rep_state = mini_batch[index_rep]
      old_q = model.predict(old_rep_state.reshape(1, NUM_STATES))[0]
      new_q = model.predict(new_rep_state.reshape(1, NUM_STATES))[0]
      update_target = np.copy(old_q)
      if done_rep:
        update_target[action_rep] = -1
      else:
        update_target[action_rep] = reward_rep + (gamma * np.max(new_q))
      X_train[index_rep] = old_rep_state
      Y_train[index_rep] = update_target
    loss += model.train_on_batch(X_train, Y_train)
    if reward_game > 200:
      break
  print "[+] End Game {} | Reward {} | Epsilon {:.4f} | TrainPerGame {} | Loss {:.4f} ".format(number_game, reward_game, epsilon, index_train_per_game, loss / index_train_per_game * JUMP_FPS)
  if epsilon >= 0.1:
    epsilon -= (1 / (NUM_GAMES_TRAIN))
  if isfile(WEIGHT_FILE):
    os.remove(WEIGHT_FILE)
  model.save_weights(WEIGHT_FILE)
env.monitor.close()
gym.upload( 'training', api_key = '<-LOL->' )
	from __future__ import division
	import gym
	import numpy as np
	from keras.models import Sequential
	from keras.layers import Dense, Activation
	from keras.optimizers import sgd
	import os
	import random
	from os.path import isfile
	from collections import deque

	NUM_ACTIONS = 2
	NUM_STATES = 4
	MAX_REPLAY_STATES = 100
	BATCH_SIZE = 20
	NUM_GAMES_TRAIN = 500
	JUMP_FPS = 2
	WEIGHT_FILE = 'weights.h5'

	def create_model(n_inputs, n_outputs):
	model = Sequential([
	Dense(8, batch_input_shape = (None, n_inputs)),
	Activation('relu'),
	Dense(16),
	Activation('relu'),
	Dense(n_outputs),
	Activation('linear')
	])
	model.compile('adam', loss = 'mse')
	if isfile(WEIGHT_FILE):
	print "[+] Loaded weights from file"
	model.load_weights(WEIGHT_FILE)
	return model

	env = gym.make('CartPole-v0')
	env.monitor.start('training', force = True)
	model = create_model(NUM_STATES, NUM_ACTIONS)

	replay = []

	gamma = 0.99
	epsilon = 1
	for number_game in range(NUM_GAMES_TRAIN):
	new_state = env.reset()
	reward_game = 0
	done = False
	loss = 0
	index_train_per_game = 0
	print '[+] Starting Game ' + str(number_game)
	while not done:
	env.render()
	index_train_per_game += 1
	if random.random() < epsilon:
	action = np.random.randint(NUM_ACTIONS)
	else:
	q = model.predict(new_state.reshape(1, NUM_STATES))[0]
	action = np.argmax(q)
	old_state = new_state
	new_state, reward, done, info = env.step(action)
	reward_game += reward
	replay.append([new_state, reward, action, done, old_state])
	if len(replay) > MAX_REPLAY_STATES: replay.pop(np.random.randint(MAX_REPLAY_STATES) + 1)
	if JUMP_FPS != 1 and index_train_per_game % JUMP_FPS == 0: # We skip this train, but already add data
	continue
	len_mini_batch = min(len(replay), BATCH_SIZE)
	mini_batch = random.sample(replay, len_mini_batch)
	X_train = np.zeros((len_mini_batch, NUM_STATES))
	Y_train = np.zeros((len_mini_batch, NUM_ACTIONS))
	for index_rep in range(len_mini_batch):
	new_rep_state, reward_rep, action_rep, done_rep, old_rep_state = mini_batch[index_rep]
	old_q = model.predict(old_rep_state.reshape(1, NUM_STATES))[0]
	new_q = model.predict(new_rep_state.reshape(1, NUM_STATES))[0]
	update_target = np.copy(old_q)
	if done_rep:
	update_target[action_rep] = -1
	else:
	update_target[action_rep] = reward_rep + (gamma * np.max(new_q))
	X_train[index_rep] = old_rep_state
	Y_train[index_rep] = update_target
	loss += model.train_on_batch(X_train, Y_train)
	if reward_game > 200:
	break
	print "[+] End Game {} \| Reward {} \| Epsilon {:.4f} \| TrainPerGame {} \| Loss {:.4f} ".format(number_game, reward_game, epsilon, index_train_per_game, loss / index_train_per_game * JUMP_FPS)
	if epsilon >= 0.1:
	epsilon -= (1 / (NUM_GAMES_TRAIN))
	if isfile(WEIGHT_FILE):
	os.remove(WEIGHT_FILE)
	model.save_weights(WEIGHT_FILE)
	env.monitor.close()
	gym.upload( 'training', api_key = '<-LOL->' )