Last active
August 13, 2020 14:45
-
-
Save rubenhorn/6f187fb130b0e755f0b2088a8493cf09 to your computer and use it in GitHub Desktop.
Simple RL-TicTacToe game written in python (change the variable of player_1 and player_2 in line 190 and 191 to human_player to play against the AI)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os, re, random, copy, sys, atexit | |
clear = lambda: os.system('cls' if os.name == 'nt' else 'clear') | |
def print_header(): | |
print(' _____ _ _____ _____ ') | |
print(' |_ _(_)_|_ _|_ _ _|_ _|__ ___ ') | |
print(' | | | / _|| |/ _` / _|| |/ _ \/ -_)') | |
print(' |_| |_\__||_|\__,_\__||_|\___/\___|') | |
print() | |
def get_player_move(player): | |
print('player {} move (x, y):'.format(player), end='') | |
raw_input = input() | |
try: | |
m = re.match(r'\(\s*(\d+)\s*,\s*(\d+)\s*\)', raw_input) | |
return tuple([int(s) - 1 for s in m.groups()])[::-1] # Reversed | |
except Exception: | |
return None | |
def get_available_moves(state): | |
moves = [] | |
for row_index in range(len(state)): | |
for col_index in range(len(state[0])): | |
if state[row_index][col_index] == 0: | |
moves.append((row_index, col_index)) | |
return moves | |
def has_player_won(state, player): | |
for row_index in range(len(state)): | |
line = 0 | |
for col_index in range(len(state[0])): | |
if state[row_index][col_index] == player: | |
line += 1 | |
if line == 3: | |
return True | |
for col_index in range(len(state[0])): | |
line = 0 | |
for row_index in range(len(state)): | |
if state[row_index][col_index] == player: | |
line += 1 | |
if line == 3: | |
return True | |
if state[0][0] == player and state[1][1] == player and state[2][2] == player: | |
return True | |
if state[0][2] == player and state[1][1] == player and state[2][0] == player: | |
return True | |
return False | |
def print_game(state): | |
print(' x: 1 2 3\n\ry:') | |
first_column = ['1', '2', '3'] | |
for row in state: | |
print(first_column.pop(0) + ' |', end='') | |
for cell in row: | |
cell_state = ' ' | |
if cell == 1: | |
cell_state = 'x' | |
elif cell == 2: | |
cell_state = 'o' | |
print(cell_state + '|', end='') | |
print() | |
def bot_random(state): | |
return random.choice(get_available_moves(state)) | |
def bot_qlearning(state): | |
learning_rate_decay = 0.99 | |
exploration_rate_decay = 0.9 | |
discount = 0.99 | |
global qtable, learning_rate, exploration_rate | |
if not 'learning_rate' in globals(): | |
globals()['learning_rate'] = 0.1 | |
if not 'exploration_rate' in globals(): | |
globals()['exploration_rate'] = 0.3 | |
qtable_filename = os.path.abspath(sys.argv[0]) + '.qtable' | |
def save_qtable(): | |
clear() | |
print_header() | |
print('saving q-table to "{}"... '.format(qtable_filename), end='') | |
sys.stdout.flush() | |
csv = '\n'.join([','.join([str(row[i]).replace(',','.') for i in range(len(row))]) for row in qtable]) | |
with open(qtable_filename, 'w') as file: | |
file.write(csv) | |
print('done!') | |
if not 'qtable' in globals(): | |
if os.path.isfile(qtable_filename): | |
clear() | |
print_header() | |
print('loading q-table from "{}"... '.format(qtable_filename), end='') | |
sys.stdout.flush() | |
with open(qtable_filename, 'r') as file: | |
csv = file.read() | |
initial_qtable = [[float(col.strip()) for col in row.split(',')] for row in csv.split('\n')] | |
print('done!') | |
else: | |
initial_qtable = [[0] * 9 for i in range(2**18)] | |
globals()['qtable'] = initial_qtable | |
atexit.register(save_qtable) | |
flatten = lambda l: [item for sublist in l for item in sublist] | |
player = get_player_current_turn(state) | |
def state_to_number(state): | |
player_1_state = ''.join(['1' if b == 1 else '0' for b in flatten(state)]) | |
player_2_state = ''.join(['1' if b == 2 else '0' for b in flatten(state)]) | |
return int(player_1_state + player_2_state if player == 1 else player_2_state + player_1_state, 2) | |
state_number = state_to_number(state) | |
argmax = lambda l: l.index(max(l)) | |
if True or random.random() < exploration_rate: | |
move_number = random.randrange(9) | |
else: | |
move_number = argmax(qtable[state_number]) | |
move = (int(move_number / 3), move_number % 3) | |
is_move_possible = move in get_available_moves(state) | |
next_state = update_game(state, move, player, create_copy=True) if is_move_possible else state | |
def bellman(reward, is_final_state): | |
qtable[state_number][move_number] = qtable[state_number][move_number] + learning_rate * (reward + (discount * (max(qtable[state_to_number(next_state)]) if is_final_state else 0)) - qtable[state_number][move_number]) | |
if not is_move_possible: | |
bellman(-100, True) | |
if has_player_won(next_state, player): | |
bellman(100, True) | |
elif len(get_available_moves(next_state)) == 0: | |
bellman(0, True) | |
else: | |
bellman(-1, False) | |
learning_rate *= learning_rate_decay | |
exploration_rate *= exploration_rate_decay | |
del next_state | |
return move if is_move_possible else bot_qlearning(state) | |
def get_player_current_turn(state): | |
player_1_moves = 0 | |
player_2_moves = 0 | |
for row in state: | |
for cell in row: | |
if cell == 1: | |
player_1_moves += 1 | |
elif cell == 2: | |
player_2_moves += 1 | |
return 1 if player_1_moves == player_2_moves else 2 | |
def human_player(state): | |
is_input_valid = True | |
while True: | |
clear() | |
print_header() | |
print_game(state) | |
if not is_input_valid: | |
print('\n\rinvalid move!') | |
else: | |
print('\n') | |
player = get_player_current_turn(state) | |
move = get_player_move(player) | |
if move is None or move not in get_available_moves(state): | |
is_input_valid = False | |
else: | |
return move | |
def game_over(state, message): | |
global player_1, player_2 | |
is_human_playing = player_1 == human_player or player_2 == human_player | |
if is_human_playing: | |
clear() | |
print_header() | |
print_game(state) | |
print('\n') | |
print(message) | |
if is_human_playing: | |
input() | |
clear() | |
# Reset | |
state[0] = [0, 0, 0] | |
state[1] = [0, 0, 0] | |
state[2] = [0, 0, 0] | |
def update_game(state, move, player, create_copy=False): | |
updated_state = copy.deepcopy(state) if create_copy else state | |
updated_state[move[0]][move[1]] = player | |
return updated_state | |
def check_winner(state, last_move_by): | |
if has_player_won(state, last_move_by): | |
game_over(state, 'player {} wins!'.format(last_move_by)) | |
elif len(get_available_moves(state)) == 0: | |
game_over(state, 'draw!') | |
try: | |
player_1 = bot_qlearning #human_player | |
player_2 = bot_random | |
clear() | |
state = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] | |
while True: | |
move = player_1(state) | |
update_game(state, move, 1) | |
check_winner(state, 1) | |
move = player_2(state) | |
update_game(state, move, 2) | |
check_winner(state, 2) | |
except KeyboardInterrupt: | |
exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment