Skip to content

Instantly share code, notes, and snippets.

@cashlo
Created June 2, 2020 13:06
Show Gist options
  • Save cashlo/62d27b0350f967c3bab0826bd952e10b to your computer and use it in GitHub Desktop.
Save cashlo/62d27b0350f967c3bab0826bd952e10b to your computer and use it in GitHub Desktop.
Monte Carlo Tree Search
import math
import random
class Node:
def __init__(self, parent=None, simulation_limit=1, exploration_constant=1):
self.parent=parent
self.reward = 0
self.visit_count = 0
self.possible_move_list = None
self.expanded_children = {}
self.simulation_limit = simulation_limit
self.exploration_constant = exploration_constant
def search(self):
simulation_count = 0
while simulation_count < self.simulation_limit:
next_node = self.pick_next_node(self.exploration_constant)
reward = next_node.rollout()
next_node.backup(reward)
simulation_count += 1
return self.best_UCB_child(0)
def expand_random_move(self):
move = self.possible_move_list.pop()
new_child = self.create_from_move(move)
self.expanded_children[move] = new_child
return new_child
def best_UCB_child(self, exploration_constant):
def ucb(n):
return n.reward/n.visit_count + exploration_constant*math.sqrt(2*math.log(self.visit_count)/n.visit_count)
return max(self.expanded_children.values(), key=ucb)
def pick_next_node(self, exploration_constant):
if self.is_terminal():
return self
if self.possible_move_list is None:
self.possible_move_list = self.get_all_possible_moves()
random.shuffle(self.possible_move_list)
if self.possible_move_list:
return self.expand_random_move()
return self.best_UCB_child(exploration_constant).pick_next_node(exploration_constant)
def backup(self, reward):
self.visit_count += 1
self.reward += reward
if self.parent is not None:
self.parent.backup(-reward)
def create_from_move(self, move):
pass
def rollout(self):
pass
def get_all_possible_moves(self):
pass
def is_terminal(self):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment