This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
NUM_STATES = 10 | |
NUM_ACTIONS = 2 | |
GAMMA = 0.5 | |
def hot_one_state(index): | |
array = np.zeros(NUM_STATES) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """ | |
import numpy as np | |
import cPickle as pickle | |
import gym | |
# hyperparameters | |
H = 200 # number of hidden layer neurons | |
batch_size = 10 # every how many episodes to do a param update? | |
learning_rate = 1e-4 | |
gamma = 0.99 # discount factor for reward |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# select a random move | |
move = random.choice(moves) | |
result, next_move = monte_carlo_sample(apply_move(board_state, move, side), -side) | |
return result, move |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def monte_carlo_tree_search(board_state, side, number_of_samples): | |
results_per_move = collections.defaultdict(lambda: [0, 0]) | |
for _ in range(number_of_samples): | |
result, move = monte_carlo_sample(board_state, side) | |
results_per_move[move][0] += result | |
results_per_move[move][1] += 1 | |
move = max(results_per_move, | |
key=lambda x: results_per_move.get(x)[0] / | |
results_per_move[move][1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def upper_confidence_bounds(payout, samples_for_this_machine, log_total_samples): | |
return payout / samples_for_this_machine + math.sqrt((2 * log_total_samples) / samples_for_this_machine) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def monte_carlo_tree_search_uct(board_state, side, number_of_rollouts): | |
state_results = collections.defaultdict(float) | |
state_samples = collections.defaultdict(float) | |
for _ in range(number_of_rollouts): | |
current_side = side | |
current_board_state = board_state | |
first_unvisited_node = True | |
rollout_path = [] | |
result = 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
while result == 0: | |
move_states = {move: apply_move(current_board_state, move, current_side) | |
for move in available_moves(current_board_state)} | |
if not move_states: | |
result = 0 | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if all((state in state_samples) for _, state in move_states): | |
log_total_samples = math.log(sum(state_samples[s] for s in move_states.values())) | |
move, state = max(move_states, | |
key=lambda _, s:upper_confidence_bounds(state_results[s],state_samples[s], log_total_samples)) | |
else: | |
move = random.choice(list(move_states.keys())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
current_board_state = move_states[move] | |
if first_unvisited_node: | |
rollout_path.append((current_board_state, current_side)) | |
if current_board_state not in state_samples: | |
first_unvisited_node = False | |
state_values[current_board_state] = value_func(current_board_state) |