Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save lahdjirayhan/65c0c6468e35539f7b111f8a28611a4d to your computer and use it in GitHub Desktop.
Save lahdjirayhan/65c0c6468e35539f7b111f8a28611a4d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class Game:\n",
" def __init__(self, PlayerA, PlayerB):\n",
" self.board = np.zeros(9, int)\n",
" self.PlayerA = PlayerA\n",
" self.PlayerB = PlayerB\n",
" self.history = []\n",
" \n",
" def reset(self):\n",
" self.board = np.zeros(9, int)\n",
" self.history = []\n",
" \n",
" def draw(self, mark, where):\n",
" self.board[where] = mark\n",
" self.history.append(str(self.board))\n",
" \n",
" def is_game_done(self):\n",
" if not self.PlayerA.evaluate_board(self.board) == 0:\n",
" # Because PlayerB's victory is equivalent to PlayerA's defeat\n",
" return True\n",
" elif len(np.argwhere(self.board == 0).tolist()) == 0:\n",
" return True\n",
" else:\n",
" return False\n",
" \n",
" def print_board(self):\n",
" print(np.reshape(self.board, (3,3)))\n",
" \n",
"class Player:\n",
" def __init__(self, mark, learning_rate, exploration_rate):\n",
" # Available marks: -1 and 1\n",
" self.mark = mark\n",
" self.learning_rate = np.random.uniform() if learning_rate is None else learning_rate\n",
" self.exploration_rate = np.random.uniform() if exploration_rate is None else exploration_rate\n",
" self.history_book = {}\n",
" self.performance_record = []\n",
" \n",
" def evaluate_board(self, board):\n",
" board = np.reshape(board, (3,3))\n",
" if (self.mark*3 in np.sum(board, axis = 0) or\n",
" self.mark*3 in np.sum(board, axis = 1) or\n",
" np.trace(board) == self.mark*3 or\n",
" sum(board[i][3-i-1] for i in range(3)) == self.mark*3):\n",
" return 10*np.count_nonzero(board==0)\n",
" elif (self.mark*-3 in np.sum(board, axis = 0) or\n",
" self.mark*-3 in np.sum(board, axis = 1) or\n",
" np.trace(board) == self.mark*-3 or\n",
" sum(board[i][3-i-1] for i in range(3)) == self.mark*-3):\n",
" return -10*np.count_nonzero(board==0)\n",
" else:\n",
" return 0\n",
" \n",
" def update_history_book(self, new_history_entries):\n",
" # Ugly because new_history_entries are strings [1 0 -1 ... ].\n",
" score = self.evaluate_board(np.array(list(map(int, new_history_entries[-1][1:-1].split()))))\n",
" new_history_scores = [score for i in range(len(new_history_entries))]\n",
" new_history_book = dict(zip(new_history_entries, new_history_scores))\n",
" \n",
" for i in new_history_book:\n",
" if self.history_book.get(i) == None:\n",
" self.history_book[i] = new_history_book[i]\n",
" else:\n",
" self.history_book[i] = (self.history_book[i] +\n",
" self.learning_rate *\n",
" (new_history_book[i]-self.history_book[i]))\n",
" \n",
" self.performance_record.append(score)\n",
" \n",
" def move_randomly(self, board):\n",
" \"\"\"\n",
" Currently choosing randomly from the empty spaces\n",
" \"\"\"\n",
" choices = [i[0] for i in np.argwhere(board == 0)]\n",
" action = np.random.choice(choices)\n",
" return action\n",
" \n",
" def move_using_knowledge(self, board):\n",
" history = self.history_book\n",
" state = str(board)\n",
" \n",
" empty_tiles = [str(i[0]) for i in np.argwhere(board == 0)]\n",
" unknown_tiles = []\n",
" max_value = -1000\n",
" action = \"\"\n",
" for i in empty_tiles:\n",
" if history.get(state+i) == None:\n",
" unknown_tiles.append(i)\n",
" elif history.get(state+i) > max_value:\n",
" action = i\n",
" \n",
" if action == \"\" or (history.get(state+i) < 0 and unknown_tiles):\n",
" return np.random.choice(unknown_tiles)\n",
" \n",
" \n",
" def move(self, board):\n",
" if np.random.uniform() > self.exploration_rate:\n",
" return self.move_using_knowledge(board)\n",
" else:\n",
" return self.move_randomly(board)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"Player1 = Player(mark = 1,\n",
" learning_rate = 0.2,\n",
" exploration_rate = 0.02) # a play-it-safe agent\n",
"\n",
"Player2 = Player(mark = -1,\n",
" learning_rate = 0.01,\n",
" exploration_rate = 0.95) # a very random and ruthless agent\n",
"\n",
"Player3 = Player(mark = 1,\n",
" learning_rate = 0.5,\n",
" exploration_rate = 0.4) # an impatient learner\n",
"\n",
"Player4 = Player(mark = -1,\n",
" learning_rate = 0.1,\n",
" exploration_rate = 0.3) # a maybe normal agent\n",
"\n",
"Game12 = Game(Player1, Player2)\n",
"Game14 = Game(Player1, Player4)\n",
"Game32 = Game(Player3, Player2)\n",
"Game34 = Game(Player3, Player4)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration: 10000\n",
"Player 1:\n",
" Mean: 4.844\n",
" Stdv: 18.33836590320959\n",
"Player 2:\n",
" Mean: -4.844\n",
" Stdv: 18.33836590320959\n",
"--------------------------------------\n",
"Iteration: 20000\n",
"Player 1:\n",
" Mean: 4.579\n",
" Stdv: 18.516553648019926\n",
"Player 2:\n",
" Mean: -4.579\n",
" Stdv: 18.516553648019926\n",
"--------------------------------------\n",
"Iteration: 30000\n",
"Player 1:\n",
" Mean: 4.338\n",
" Stdv: 18.299228289739435\n",
"Player 2:\n",
" Mean: -4.338\n",
" Stdv: 18.299228289739435\n",
"--------------------------------------\n",
"Iteration: 40000\n",
"Player 1:\n",
" Mean: 4.521\n",
" Stdv: 18.333863722630863\n",
"Player 2:\n",
" Mean: -4.521\n",
" Stdv: 18.333863722630863\n",
"--------------------------------------\n",
"Iteration: 50000\n",
"Player 1:\n",
" Mean: 4.413\n",
" Stdv: 18.280465831044896\n",
"Player 2:\n",
" Mean: -4.413\n",
" Stdv: 18.280465831044896\n",
"--------------------------------------\n",
"Iteration: 60000\n",
"Player 1:\n",
" Mean: 4.202\n",
" Stdv: 18.27630148580396\n",
"Player 2:\n",
" Mean: -4.202\n",
" Stdv: 18.27630148580396\n",
"--------------------------------------\n",
"Iteration: 70000\n",
"Player 1:\n",
" Mean: 4.254\n",
" Stdv: 18.402268447123575\n",
"Player 2:\n",
" Mean: -4.254\n",
" Stdv: 18.402268447123575\n",
"--------------------------------------\n",
"Iteration: 80000\n",
"Player 1:\n",
" Mean: 4.286\n",
" Stdv: 18.430143895260287\n",
"Player 2:\n",
" Mean: -4.286\n",
" Stdv: 18.430143895260287\n",
"--------------------------------------\n",
"Iteration: 90000\n",
"Player 1:\n",
" Mean: 4.553\n",
" Stdv: 18.21099093953978\n",
"Player 2:\n",
" Mean: -4.553\n",
" Stdv: 18.21099093953978\n",
"--------------------------------------\n",
"Iteration: 100000\n",
"Player 1:\n",
" Mean: 4.266\n",
" Stdv: 18.204429241258843\n",
"Player 2:\n",
" Mean: -4.266\n",
" Stdv: 18.204429241258843\n",
"--------------------------------------\n",
"Iteration: 110000\n",
"Player 1:\n",
" Mean: 4.123\n",
" Stdv: 18.27596429740439\n",
"Player 2:\n",
" Mean: -4.123\n",
" Stdv: 18.27596429740439\n",
"--------------------------------------\n",
"Iteration: 120000\n",
"Player 1:\n",
" Mean: 4.314\n",
" Stdv: 18.481596359622188\n",
"Player 2:\n",
" Mean: -4.314\n",
" Stdv: 18.481596359622188\n",
"--------------------------------------\n",
"Iteration: 130000\n",
"Player 1:\n",
" Mean: 4.508\n",
" Stdv: 18.385808005089146\n",
"Player 2:\n",
" Mean: -4.508\n",
" Stdv: 18.385808005089146\n",
"--------------------------------------\n",
"Iteration: 140000\n",
"Player 1:\n",
" Mean: 4.371\n",
" Stdv: 18.31950760801174\n",
"Player 2:\n",
" Mean: -4.371\n",
" Stdv: 18.31950760801174\n",
"--------------------------------------\n",
"Iteration: 150000\n",
"Player 1:\n",
" Mean: 4.419\n",
" Stdv: 18.296514394823948\n",
"Player 2:\n",
" Mean: -4.419\n",
" Stdv: 18.296514394823948\n",
"--------------------------------------\n",
"Iteration: 160000\n",
"Player 1:\n",
" Mean: 4.727\n",
" Stdv: 18.401235583514495\n",
"Player 2:\n",
" Mean: -4.727\n",
" Stdv: 18.401235583514495\n",
"--------------------------------------\n",
"Iteration: 170000\n",
"Player 1:\n",
" Mean: 4.506\n",
" Stdv: 18.409670393573048\n",
"Player 2:\n",
" Mean: -4.506\n",
" Stdv: 18.409670393573048\n",
"--------------------------------------\n",
"Iteration: 180000\n",
"Player 1:\n",
" Mean: 4.422\n",
" Stdv: 18.318458341246952\n",
"Player 2:\n",
" Mean: -4.422\n",
" Stdv: 18.318458341246952\n",
"--------------------------------------\n",
"Iteration: 190000\n",
"Player 1:\n",
" Mean: 4.523\n",
" Stdv: 18.357354684158608\n",
"Player 2:\n",
" Mean: -4.523\n",
" Stdv: 18.357354684158608\n",
"--------------------------------------\n",
"Iteration: 200000\n",
"Player 1:\n",
" Mean: 4.557\n",
" Stdv: 18.259894605391345\n",
"Player 2:\n",
" Mean: -4.557\n",
" Stdv: 18.259894605391345\n",
"--------------------------------------\n",
"Iteration: 210000\n",
"Player 1:\n",
" Mean: 4.099\n",
" Stdv: 18.54799716950593\n",
"Player 2:\n",
" Mean: -4.099\n",
" Stdv: 18.54799716950593\n",
"--------------------------------------\n",
"Iteration: 220000\n",
"Player 1:\n",
" Mean: 4.421\n",
" Stdv: 18.286737243149744\n",
"Player 2:\n",
" Mean: -4.421\n",
" Stdv: 18.286737243149744\n",
"--------------------------------------\n",
"Iteration: 230000\n",
"Player 1:\n",
" Mean: 4.345\n",
" Stdv: 18.35840338918393\n",
"Player 2:\n",
" Mean: -4.345\n",
" Stdv: 18.35840338918393\n",
"--------------------------------------\n",
"Iteration: 240000\n",
"Player 1:\n",
" Mean: 4.599\n",
" Stdv: 18.40758536582134\n",
"Player 2:\n",
" Mean: -4.599\n",
" Stdv: 18.40758536582134\n",
"--------------------------------------\n",
"Iteration: 250000\n",
"Player 1:\n",
" Mean: 4.473\n",
" Stdv: 18.29705634794843\n",
"Player 2:\n",
" Mean: -4.473\n",
" Stdv: 18.29705634794843\n",
"--------------------------------------\n",
"Iteration: 260000\n",
"Player 1:\n",
" Mean: 4.376\n",
" Stdv: 18.519466083016543\n",
"Player 2:\n",
" Mean: -4.376\n",
" Stdv: 18.519466083016543\n",
"--------------------------------------\n",
"Iteration: 270000\n",
"Player 1:\n",
" Mean: 4.368\n",
" Stdv: 18.386967558572564\n",
"Player 2:\n",
" Mean: -4.368\n",
" Stdv: 18.386967558572564\n",
"--------------------------------------\n",
"Iteration: 280000\n",
"Player 1:\n",
" Mean: 4.467\n",
" Stdv: 18.581601411073265\n",
"Player 2:\n",
" Mean: -4.467\n",
" Stdv: 18.581601411073265\n",
"--------------------------------------\n",
"Iteration: 290000\n",
"Player 1:\n",
" Mean: 4.574\n",
" Stdv: 18.41951476016673\n",
"Player 2:\n",
" Mean: -4.574\n",
" Stdv: 18.41951476016673\n",
"--------------------------------------\n",
"Iteration: 300000\n",
"Player 1:\n",
" Mean: 4.476\n",
" Stdv: 18.152284263970746\n",
"Player 2:\n",
" Mean: -4.476\n",
" Stdv: 18.152284263970746\n",
"--------------------------------------\n"
]
}
],
"source": [
"TIMES = 300000\n",
"DISPLAY_DIV = 10000\n",
"\n",
"for times in range(TIMES):\n",
" while(True):\n",
" Game12.draw(Player1.mark, int(Player1.move(Game12.board)))\n",
" if Game12.is_game_done():\n",
" Player1.update_history_book(Game12.history)\n",
" Player2.update_history_book(Game12.history)\n",
" Game12.reset()\n",
" break\n",
" Game12.draw(Player2.mark, int(Player2.move(Game12.board)))\n",
" if Game12.is_game_done():\n",
" Player1.update_history_book(Game12.history)\n",
" Player2.update_history_book(Game12.history)\n",
" Game12.reset()\n",
" break\n",
" if (times+1) % DISPLAY_DIV == 0:\n",
" print(\"Iteration: \", times+1)\n",
" print(\"Player 1:\")\n",
" print(\" Mean: \", np.mean(Player1.performance_record[-DISPLAY_DIV:]))\n",
" print(\" Stdv: \", np.std(Player1.performance_record[-DISPLAY_DIV:]))\n",
" print(\"Player 2:\")\n",
" print(\" Mean: \", np.mean(Player2.performance_record[-DISPLAY_DIV:]))\n",
" print(\" Stdv: \", np.std(Player2.performance_record[-DISPLAY_DIV:]))\n",
" print(\"--------------------------------------\")\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"iter = list(range(TIMES//DISPLAY_DIV))\n",
"\n",
"p1perf = []\n",
"tempmean = 0\n",
"for i in iter:\n",
" p1perf.append(0)\n",
" for j in range(i, i + (TIMES//DISPLAY_DIV)):\n",
" p1perf[i] += Player1.performance_record[j]\n",
" p1perf[i] /= (TIMES//DISPLAY_DIV)\n",
"plt.plot(iter, p1perf)\n",
"plt.xlabel('Block iteration (x' + str(DISPLAY_DIV) +')')\n",
"plt.ylabel(\"Moving average of score\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"with open(\"Player1_knowledge.json\", \"w\") as file:\n",
" json.dump(Player1.history_book, file)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment