Skip to content

Instantly share code, notes, and snippets.

@ChintanTrivedi ChintanTrivedi/train.py Secret

Created May 28, 2018
Embed
What would you like to do?
# parameters
max_memory = 1000 # Maximum number of experiences we are storing
batch_size = 1 # Number of experiences we use for training per batch
exp_replay = ExperienceReplay(max_memory=max_memory)
# Train a model on the given game
def train(game, model, epochs, verbose=1):
num_actions = len(game.key_to_press) # 4 actions [shoot_low, shoot_high, left_arrow, right_arrow]
# Reseting the win counter
win_cnt = 0
# We want to keep track of the progress of the AI over time, so we save its win count history
# indicated by number of goals scored
win_hist = []
# Epochs is the number of games we play
for e in range(epochs):
loss = 0.
# epsilon for exploration - dependent inversely on the training epoch
epsilon = 4 / ((e + 1) ** (1 / 2))
game_over = False
# get current state s by observing our game environment
input_t = game.observe()
while not game_over:
# The learner is acting on the last observed game screen
# input_t is a vector containing representing the game screen
input_tm1 = input_t
# We choose our action from either exploration (random) or exploitation (model).
if np.random.rand() <= epsilon:
# Explore a random action
action = int(np.random.randint(0, num_actions, size=1))
else:
# Choose action from the model's prediction
# q contains the expected rewards for the actions
q = model.predict(input_tm1)
# We pick the action with the highest expected reward
action = np.argmax(q[0])
# apply action, get rewards r and new state s'
input_t, reward, game_over = game.act(action)
# If we managed to score a goal we add 1 to our win counter
if reward == 1:
win_cnt += 1
"""
The experiences < s, a, r, s’ > we make during gameplay are our training data.
Here we first save the last experience, and then load a batch of experiences to train our model
"""
# store experience
exp_replay.remember([input_tm1, action, reward, input_t], game_over)
# Load batch of experiences
inputs, targets = exp_replay.get_batch(model, batch_size=batch_size)
# train model on experiences
batch_loss = model.train_on_batch(inputs, targets)
loss += batch_loss
if verbose > 0:
print("Epoch {:03d}/{:03d} | Loss {:.4f} | Win count {}".format(e, epochs, loss, win_cnt))
# Track win history to later check if our model is improving at the game over time.
win_hist.append(win_cnt)
return win_hist
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.