Skip to content

Instantly share code, notes, and snippets.

import sys, os, shutil, pdb, random
from tqdm import tqdm
def q(text = ''):
print(f'>{text}<')
sys.exit()
from environment import TicTacToe
from agent import QLearningAgent, Hoooman
import config as cfg
from config import display_board
import sys, os, shutil, pdb, random
from tqdm import tqdm
def q(text = ''):
'''
a function that exits the code after printing a message. used for dubugging purposes
'''
print(f'>{text}<') # f-strings work only with python3
sys.exit()
summary_dir = 'summary'
num_episodes = 500000
display = False # boolean for diplaying/printing the Tic-Tac-Toe board on the terminal. It is suggested to set it to False for training purposes
# exploration-exploitation trade-off factor
epsilon = 0.4 # must be a real number between (0,1)
# learning-rate
alpha = 0.3 # must be a real number between (0,1)
def display_board(board, action, playerID, player1, player2, reward, done, possible_actions, training = True, episode_reward_player1=None, episode_reward_player2=None):
'''
prints out the Tic-Tac-Toe board in the terminal.
prints the action taken by the players, the reward they recieved and the status of the game (Done -> True or False)
prints if either of the players have won or lost the game or if it is a tied between the players
prints all the possible next actions if the training argument is set to True
'''
print('\n')
for i in range(3):
print(' '.join(board[i*3:(i+1)*3]))
import random, pickle
import config as cfg
class QLearningAgent:
def __init__(self, name, epsilon = cfg.epsilon, alpha = cfg.alpha, gamma = cfg.gamma):
self.name = name
self.epsilon = epsilon # exploration-exploiataion trade-off factor
self.alpha = alpha # learning-rate
self.gamma = gamma # discount-factor
self.Q = {} # Q-Table
class TicTacToe:
def __init__(self):
'''
the environment starts with 9 empty spaces representing a board of Tic-Tac-Toe
'''
self.board = ['_']*9 # the initial blank board
self.done = False # done = True means the game has ended
def reset(self):
'''
import sys
# A function for exiting the script after printing a message
def q(text = ''):
print(f'>{text}<')
sys.exit()
import argparse, os
# Desktop path