Created
June 2, 2020 13:06
-
-
Save cashlo/62d27b0350f967c3bab0826bd952e10b to your computer and use it in GitHub Desktop.
Monte Carlo Tree Search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random | |
class Node: | |
def __init__(self, parent=None, simulation_limit=1, exploration_constant=1): | |
self.parent=parent | |
self.reward = 0 | |
self.visit_count = 0 | |
self.possible_move_list = None | |
self.expanded_children = {} | |
self.simulation_limit = simulation_limit | |
self.exploration_constant = exploration_constant | |
def search(self): | |
simulation_count = 0 | |
while simulation_count < self.simulation_limit: | |
next_node = self.pick_next_node(self.exploration_constant) | |
reward = next_node.rollout() | |
next_node.backup(reward) | |
simulation_count += 1 | |
return self.best_UCB_child(0) | |
def expand_random_move(self): | |
move = self.possible_move_list.pop() | |
new_child = self.create_from_move(move) | |
self.expanded_children[move] = new_child | |
return new_child | |
def best_UCB_child(self, exploration_constant): | |
def ucb(n): | |
return n.reward/n.visit_count + exploration_constant*math.sqrt(2*math.log(self.visit_count)/n.visit_count) | |
return max(self.expanded_children.values(), key=ucb) | |
def pick_next_node(self, exploration_constant): | |
if self.is_terminal(): | |
return self | |
if self.possible_move_list is None: | |
self.possible_move_list = self.get_all_possible_moves() | |
random.shuffle(self.possible_move_list) | |
if self.possible_move_list: | |
return self.expand_random_move() | |
return self.best_UCB_child(exploration_constant).pick_next_node(exploration_constant) | |
def backup(self, reward): | |
self.visit_count += 1 | |
self.reward += reward | |
if self.parent is not None: | |
self.parent.backup(-reward) | |
def create_from_move(self, move): | |
pass | |
def rollout(self): | |
pass | |
def get_all_possible_moves(self): | |
pass | |
def is_terminal(self): | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment