Skip to content

Instantly share code, notes, and snippets.

@rubenhorn
Last active August 13, 2020 14:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rubenhorn/6f187fb130b0e755f0b2088a8493cf09 to your computer and use it in GitHub Desktop.
Save rubenhorn/6f187fb130b0e755f0b2088a8493cf09 to your computer and use it in GitHub Desktop.
Simple RL-TicTacToe game written in python (change the variable of player_1 and player_2 in line 190 and 191 to human_player to play against the AI)
#!/usr/bin/env python3
import os, re, random, copy, sys, atexit
clear = lambda: os.system('cls' if os.name == 'nt' else 'clear')
def print_header():
print(' _____ _ _____ _____ ')
print(' |_ _(_)_|_ _|_ _ _|_ _|__ ___ ')
print(' | | | / _|| |/ _` / _|| |/ _ \/ -_)')
print(' |_| |_\__||_|\__,_\__||_|\___/\___|')
print()
def get_player_move(player):
print('player {} move (x, y):'.format(player), end='')
raw_input = input()
try:
m = re.match(r'\(\s*(\d+)\s*,\s*(\d+)\s*\)', raw_input)
return tuple([int(s) - 1 for s in m.groups()])[::-1] # Reversed
except Exception:
return None
def get_available_moves(state):
moves = []
for row_index in range(len(state)):
for col_index in range(len(state[0])):
if state[row_index][col_index] == 0:
moves.append((row_index, col_index))
return moves
def has_player_won(state, player):
for row_index in range(len(state)):
line = 0
for col_index in range(len(state[0])):
if state[row_index][col_index] == player:
line += 1
if line == 3:
return True
for col_index in range(len(state[0])):
line = 0
for row_index in range(len(state)):
if state[row_index][col_index] == player:
line += 1
if line == 3:
return True
if state[0][0] == player and state[1][1] == player and state[2][2] == player:
return True
if state[0][2] == player and state[1][1] == player and state[2][0] == player:
return True
return False
def print_game(state):
print(' x: 1 2 3\n\ry:')
first_column = ['1', '2', '3']
for row in state:
print(first_column.pop(0) + ' |', end='')
for cell in row:
cell_state = ' '
if cell == 1:
cell_state = 'x'
elif cell == 2:
cell_state = 'o'
print(cell_state + '|', end='')
print()
def bot_random(state):
return random.choice(get_available_moves(state))
def bot_qlearning(state):
learning_rate_decay = 0.99
exploration_rate_decay = 0.9
discount = 0.99
global qtable, learning_rate, exploration_rate
if not 'learning_rate' in globals():
globals()['learning_rate'] = 0.1
if not 'exploration_rate' in globals():
globals()['exploration_rate'] = 0.3
qtable_filename = os.path.abspath(sys.argv[0]) + '.qtable'
def save_qtable():
clear()
print_header()
print('saving q-table to "{}"... '.format(qtable_filename), end='')
sys.stdout.flush()
csv = '\n'.join([','.join([str(row[i]).replace(',','.') for i in range(len(row))]) for row in qtable])
with open(qtable_filename, 'w') as file:
file.write(csv)
print('done!')
if not 'qtable' in globals():
if os.path.isfile(qtable_filename):
clear()
print_header()
print('loading q-table from "{}"... '.format(qtable_filename), end='')
sys.stdout.flush()
with open(qtable_filename, 'r') as file:
csv = file.read()
initial_qtable = [[float(col.strip()) for col in row.split(',')] for row in csv.split('\n')]
print('done!')
else:
initial_qtable = [[0] * 9 for i in range(2**18)]
globals()['qtable'] = initial_qtable
atexit.register(save_qtable)
flatten = lambda l: [item for sublist in l for item in sublist]
player = get_player_current_turn(state)
def state_to_number(state):
player_1_state = ''.join(['1' if b == 1 else '0' for b in flatten(state)])
player_2_state = ''.join(['1' if b == 2 else '0' for b in flatten(state)])
return int(player_1_state + player_2_state if player == 1 else player_2_state + player_1_state, 2)
state_number = state_to_number(state)
argmax = lambda l: l.index(max(l))
if True or random.random() < exploration_rate:
move_number = random.randrange(9)
else:
move_number = argmax(qtable[state_number])
move = (int(move_number / 3), move_number % 3)
is_move_possible = move in get_available_moves(state)
next_state = update_game(state, move, player, create_copy=True) if is_move_possible else state
def bellman(reward, is_final_state):
qtable[state_number][move_number] = qtable[state_number][move_number] + learning_rate * (reward + (discount * (max(qtable[state_to_number(next_state)]) if is_final_state else 0)) - qtable[state_number][move_number])
if not is_move_possible:
bellman(-100, True)
if has_player_won(next_state, player):
bellman(100, True)
elif len(get_available_moves(next_state)) == 0:
bellman(0, True)
else:
bellman(-1, False)
learning_rate *= learning_rate_decay
exploration_rate *= exploration_rate_decay
del next_state
return move if is_move_possible else bot_qlearning(state)
def get_player_current_turn(state):
player_1_moves = 0
player_2_moves = 0
for row in state:
for cell in row:
if cell == 1:
player_1_moves += 1
elif cell == 2:
player_2_moves += 1
return 1 if player_1_moves == player_2_moves else 2
def human_player(state):
is_input_valid = True
while True:
clear()
print_header()
print_game(state)
if not is_input_valid:
print('\n\rinvalid move!')
else:
print('\n')
player = get_player_current_turn(state)
move = get_player_move(player)
if move is None or move not in get_available_moves(state):
is_input_valid = False
else:
return move
def game_over(state, message):
global player_1, player_2
is_human_playing = player_1 == human_player or player_2 == human_player
if is_human_playing:
clear()
print_header()
print_game(state)
print('\n')
print(message)
if is_human_playing:
input()
clear()
# Reset
state[0] = [0, 0, 0]
state[1] = [0, 0, 0]
state[2] = [0, 0, 0]
def update_game(state, move, player, create_copy=False):
updated_state = copy.deepcopy(state) if create_copy else state
updated_state[move[0]][move[1]] = player
return updated_state
def check_winner(state, last_move_by):
if has_player_won(state, last_move_by):
game_over(state, 'player {} wins!'.format(last_move_by))
elif len(get_available_moves(state)) == 0:
game_over(state, 'draw!')
try:
player_1 = bot_qlearning #human_player
player_2 = bot_random
clear()
state = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
while True:
move = player_1(state)
update_game(state, move, 1)
check_winner(state, 1)
move = player_2(state)
update_game(state, move, 2)
check_winner(state, 2)
except KeyboardInterrupt:
exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment