Created
April 11, 2024 23:40
-
-
Save jesuscmadrigal/1ffbd6e1042941834aa6200d671f2034 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
"""FinalCode_RL_chess.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/116XsQ7P0d22cQlLEwtmNMaIu3h_jCmlT | |
**Installing libraries** | |
""" | |
import numpy as np #for linear algebra | |
import numpy.matlib #for plots | |
import matplotlib.pyplot as plt #for plots | |
"""**Generate chess board**""" | |
def Board(): | |
#defining a current chess game intented to finish in 2 moves | |
#enemy king | |
wking_state= [1,0] #defining initial state for white knight | |
bknight_state= [4,3] #defining initial state for black knight | |
brook_state = [1,3] #defining initial state for black bishop 1 original 7,1 | |
bking_state = [2,3] #defining initial state for black king | |
size_board=(8,8) #size of the board | |
visualboard= np.zeros([8, 8], dtype=int) | |
visualboard[bking_state[0],bking_state[1]]=1 | |
visualboard[brook_state[0],brook_state[1]]=2 | |
visualboard[bknight_state[0],bknight_state[1]]=3 | |
visualboard[wking_state[0],wking_state[1]]=4 | |
return visualboard, bking_state, brook_state, bknight_state, wking_state, size_board | |
visualboard, bking_state, brook_state, bknight_state, wking_state, size_board = Board() | |
visualboard | |
"""**Defining actions spaces of each piece**""" | |
#wking | |
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up | |
(wking_state[0]+1, wking_state[1]), # down | |
(wking_state[0], wking_state[1]-1), # left | |
(wking_state[0], wking_state[1]+1), # right | |
(wking_state[0]-1, wking_state[1]-1), # up-left | |
(wking_state[0]-1, wking_state[1]+1), # up-right | |
(wking_state[0]+1, wking_state[1]-1), # down-left | |
(wking_state[0]+1, wking_state[1]+1), # down-right | |
] | |
wking_action_space=np.array(wking_action_space) | |
possible_wking_actions=8 | |
All_possible_white_actions=possible_wking_actions | |
#bking | |
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up | |
(bking_state[0]+1, bking_state[1]), # down | |
(bking_state[0], bking_state[1]-1), # left | |
(bking_state[0], bking_state[1]+1), # right | |
(bking_state[0]-1, bking_state[1]-1), # up-left | |
(bking_state[0]-1, bking_state[1]+1), # up-right | |
(bking_state[0]+1, bking_state[1]-1), # down-left | |
(bking_state[0]+1, bking_state[1]+1), # down-right | |
] | |
bking_action_space=np.array(bking_action_space) | |
possible_bking_actions=8 | |
#brook | |
brook_action_space=[] | |
for amplitude in range(1, 8): | |
brook_action_space.append((brook_state[0] + amplitude, brook_state[1])) # down | |
brook_action_space.append((brook_state[0]-amplitude, brook_state[1])) # up | |
brook_action_space.append((brook_state[0], brook_state[1]+amplitude)) # right | |
brook_action_space.append((brook_state[0], brook_state[1]-amplitude)) # left | |
brook_action_space=np.array(brook_action_space) | |
possible_brook_actions=7*4 | |
#bknight | |
bknight_action_space = [(bknight_state[0]-2, bknight_state[1]+1), # up-up-right | |
(bknight_state[0]-1, bknight_state[1]+2), # up-right-right | |
(bknight_state[0]+1, bknight_state[1]+2), # down-right-right | |
(bknight_state[0]+2, bknight_state[1]+1), # down-down-right | |
(bknight_state[0]+2, bknight_state[1]-1), # down-down-left | |
(bknight_state[0]+1, bknight_state[1]-2), # down-left-left | |
(bknight_state[0]-1, bknight_state[1]-2), # up-left-left | |
(bknight_state[0]-2, bknight_state[1]-1)] # up-up-left | |
bknight_action_space=np.array(bknight_action_space) | |
possible_bknight_actions=8 | |
All_possible_black_actions=possible_bking_actions+possible_bknight_actions+\ | |
possible_brook_actions | |
brook_action_space | |
"""**Defining allowed actions for each piece per move** | |
1. **Black king** | |
""" | |
def degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state): | |
s=size_board[0] #8 | |
dfblack_king = np.zeros([s, s], dtype=int) #matrix with black king possible position | |
dfblack_king[bking_state[0], bking_state[1]] = 1 | |
dfbking_black_pieces= np.zeros([s, s], dtype=int) # matrix with black pieces restrictions | |
dfbking_black_pieces[bknight_state[0], bknight_state[1]] = 1 #position black knight | |
dfbking_black_pieces[brook_state[0], brook_state[1]] = 1 #position black rook | |
a_black_king = np.zeros([8, 1], dtype=int) #matrix for allowed actions | |
#white action space | |
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up | |
(wking_state[0]+1, wking_state[1]), # down | |
(wking_state[0], wking_state[1]-1), # left | |
(wking_state[0], wking_state[1]+1), # right | |
(wking_state[0]-1, wking_state[1]-1), # up-left | |
(wking_state[0]-1, wking_state[1]+1), # up-right | |
(wking_state[0]+1, wking_state[1]-1), # down-left | |
(wking_state[0]+1, wking_state[1]+1), # down-right | |
] | |
wking_action_space=np.array(wking_action_space) | |
# allowed up | |
if bking_state[0] > 0: | |
if (bking_state[0] - 1 != bknight_state[0] or bking_state[1] != bknight_state[1]) and (bking_state[0] - 1 != brook_state[0] or bking_state[1] != brook_state[1]): | |
dfbking_black_pieces[bking_state[0] - 1, bking_state[1]] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0] - 1, bking_state[1]] = 1 | |
a_black_king[0] = 1 | |
# allowed down | |
if bking_state[0] < 7: | |
if bking_state[0] + 1 != bknight_state[0] or bking_state[1] != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] != brook_state[1]: | |
dfbking_black_pieces[bking_state[0] + 1, bking_state[1]] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0] + 1, bking_state[1]] = 1 | |
a_black_king[1] = 1 | |
# allowed left | |
if bking_state[1] > 0: | |
if bking_state[0] != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] != brook_state[0] or bking_state[1] - 1 != brook_state[1]: | |
dfbking_black_pieces[bking_state[0], bking_state[1] - 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0], bking_state[1] - 1] = 1 | |
a_black_king[2] = 1 | |
# allowed right | |
if bking_state[1] < 7: | |
if bking_state[0] != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] != brook_state[0] or bking_state[1] + 1 != brook_state[1]: | |
dfbking_black_pieces[bking_state[0], bking_state[1] + 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0], bking_state[1] + 1] = 1 | |
a_black_king[3] = 1 | |
# allowed up-left | |
if bking_state[0] > 0 and bking_state[1] > 0: | |
if bking_state[0] - 1 != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] - 1 != brook_state[0] or bking_state[1] - 1 != brook_state[1]: | |
dfbking_black_pieces[bking_state[0] - 1, bking_state[1] - 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0] - 1, bking_state[1] - 1] = 1 | |
a_black_king[4] = 1 | |
# up-right | |
if bking_state[0] > 0 and bking_state[1] < 7: | |
if bking_state[0] - 1 != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] - 1 != brook_state[0] or bking_state[1] + 1 != brook_state[1]: | |
dfbking_black_pieces[bking_state[0] - 1, bking_state[1] + 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0] - 1, bking_state[1] + 1] = 1 | |
a_black_king[5] = 1 | |
# down-left | |
if bking_state[0] < 7 and bking_state[1] > 0: | |
if bking_state[0] + 1 != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] - 1 != brook_state[1]: | |
dfbking_black_pieces[bking_state[0] + 1, bking_state[1] - 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0] + 1, bking_state[1] - 1] = 1 | |
a_black_king[6] = 1 | |
# down-right | |
if bking_state[0] < 7 and bking_state[1] < 7: | |
if bking_state[0] + 1 != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] + 1 != brook_state[1]: | |
dfbking_black_pieces[bking_state[0] + 1, bking_state[1] + 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_king[bking_state[0] + 1, bking_state[1] + 1] = 1 | |
a_black_king[7] = 1 | |
# previous location | |
dfblack_king[bking_state[0], bking_state[1]] = 0 | |
return dfblack_king, a_black_king, dfbking_black_pieces | |
""" | |
2. **Black Rook** | |
""" | |
def degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state): | |
s=size_board[0] #8 | |
dfblack_rook = np.zeros([s, s], dtype=int) #matrix with black rook possible position | |
dfblack_rook[brook_state[0], brook_state[1]] = 1 | |
dfbrook_black_pieces = np.zeros([s, s], dtype=int) # matrix with black rook possible position cosnidering restrictions | |
dfbrook_black_pieces[brook_state[0], brook_state[1]] = 1 | |
# Black Rook | |
blocked = np.zeros(8, dtype=int) #position blocked by white or black pieces | |
blocked2 = np.zeros(8, dtype=int) #position blocked by black king | |
blocked3 = np.zeros(8, dtype=int) #position blocked by black knight | |
a_black_rook = np.zeros([4 * (s - 1), 1], dtype=int) #matrix for allowed actions | |
#white action space | |
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up | |
(wking_state[0]+1, wking_state[1]), # down | |
(wking_state[0], wking_state[1]-1), # left | |
(wking_state[0], wking_state[1]+1), # right | |
(wking_state[0]-1, wking_state[1]-1), # up-left | |
(wking_state[0]-1, wking_state[1]+1), # up-right | |
(wking_state[0]+1, wking_state[1]-1), # down-left | |
(wking_state[0]+1, wking_state[1]+1), # down-right | |
] | |
wking_action_space=np.array(wking_action_space) | |
#bking | |
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up | |
(bking_state[0]+1, bking_state[1]), # down | |
(bking_state[0], bking_state[1]-1), # left | |
(bking_state[0], bking_state[1]+1), # right | |
(bking_state[0]-1, bking_state[1]-1), # up-left | |
(bking_state[0]-1, bking_state[1]+1), # up-right | |
(bking_state[0]+1, bking_state[1]-1), # down-left | |
(bking_state[0]+1, bking_state[1]+1), # down-right | |
] | |
bking_action_space=np.array(bking_action_space) | |
for j in range(s): | |
# allow_down | |
if brook_state[0] + j < 7: | |
if (brook_state[0] + j + 1 == bking_state[0] and brook_state[1] == bking_state[1]) or ((brook_state[0] + j + 1 == bknight_state[0] and brook_state[1] == bknight_state[1])) or ((brook_state[0] + j + 1 == wking_state[0] and brook_state[1] == wking_state[1])): | |
blocked[0] = 1 | |
if brook_state[0] + j + 1 == bking_state[0] and brook_state[1] == bking_state[1]: | |
blocked2[0] = 1 | |
dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = -1 | |
if brook_state[0] + j + 1 == bknight_state[0] and brook_state[1] == bknight_state[1]: | |
blocked3[0] = 1 | |
dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = -1 | |
if blocked[0] == 0: | |
tmp = np.zeros([8], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if brook_state[0] + j + 1 != wking_action_space[i, 0] or brook_state[1] != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
if np.all(tmp): | |
dfblack_rook[brook_state[0] + j + 1, brook_state[1]] = 1 | |
a_black_rook[j] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if brook_state[0] + j + 1 == bking_action_space[ii, 0] and brook_state[1] == bking_action_space[ii, 1]: | |
dfblack_rook[brook_state[0] + j + 1, brook_state[1]] = 1 | |
a_black_rook[j] = 1 | |
if blocked2[0] == 0 and blocked3[0] == 0: | |
dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = 1 | |
# allow_up | |
if brook_state[0] - j > 0: | |
if (brook_state[0] - j - 1 == bking_state[0] and brook_state[1] == bking_state[1]) or ((brook_state[0] - j - 1 == bknight_state[0] and brook_state[1] == bknight_state[1])) or ((brook_state[0] - j - 1 == wking_state[0] and brook_state[1] == wking_state[1])): | |
blocked[1] = 1 | |
if brook_state[0] - j - 1 == bking_state[0] and brook_state[1] == bking_state[1]: | |
blocked2[1] = 1 | |
if brook_state[0] - j - 1 == bknight_state[0] and brook_state[1] == bknight_state[1]: | |
blocked3[1] = 1 | |
if blocked[1] == 0: | |
tmp = np.zeros([8], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if brook_state[0] - j - 1 != wking_action_space[i, 0] or brook_state[1] != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
if np.all(tmp): | |
dfblack_rook[brook_state[0] - j - 1, brook_state[1]] = 1 | |
a_black_rook[j+7] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if brook_state[0] - j - 1 == bking_action_space[ii, 0] and brook_state[1] == bking_action_space[ii, 1]: | |
dfblack_rook[brook_state[0] - j - 1, brook_state[1]] = 1 | |
a_black_rook[j+7] = 1 | |
if blocked2[1] == 0 and blocked3[1] == 0: | |
dfbrook_black_pieces[brook_state[0] - j - 1, brook_state[1]] = 1 | |
# allow_right | |
if brook_state[1] + j < 7: | |
if (brook_state[0] == bking_state[0] and brook_state[1] + j + 1 == bking_state[1]) or ((brook_state[0] == bknight_state[0] and brook_state[1] + j + 1 == bknight_state[1])) or ((brook_state[0] == wking_state[0] and brook_state[1] + j + 1 == wking_state[1])): | |
blocked[2] = 1 | |
if brook_state[0] == bking_state[0] and brook_state[1] + j + 1 == bking_state[1]: | |
blocked2[2] = 1 | |
if brook_state[0] == bknight_state[0] and brook_state[1] + j + 1 == bknight_state[1]: | |
blocked3[2] = 1 | |
if blocked[2] == 0: | |
tmp = np.zeros([8], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if brook_state[0] != wking_action_space[i, 0] or brook_state[1] + j + 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
if np.all(tmp): | |
dfblack_rook[brook_state[0], brook_state[1] + j + 1] = 1 | |
a_black_rook[j+14] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if brook_state[0] == bking_action_space[ii, 0] and brook_state[1] + j + 1 == bking_action_space[ii, 1]: | |
dfblack_rook[brook_state[0], brook_state[1] + j + 1] = 1 | |
a_black_rook[j+14] = 1 | |
if blocked2[2] == 0 and blocked3[2] == 0: | |
dfbrook_black_pieces[brook_state[0], brook_state[1] + j + 1] = 1 | |
# allow_left | |
if brook_state[1] - j > 0: | |
if (brook_state[0] == bking_state[0] and brook_state[1] - j - 1 == bking_state[1]) or ((brook_state[0] == bknight_state[0] and brook_state[1] - j - 1 == bknight_state[1])) or ((brook_state[0] == wking_state[0] and brook_state[1] - j - 1 == wking_state[1])): | |
blocked[3] = 1 | |
if brook_state[0] == bking_state[0] and brook_state[1] - j - 1 == bking_state[1]: | |
blocked2[3] = 1 | |
if brook_state[0] == bknight_state[0] and brook_state[1] - j - 1 == bknight_state[1]: | |
blocked3[3] = 1 | |
if blocked[3] == 0: | |
tmp = np.zeros([8], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if brook_state[0] != wking_action_space[i, 0] or brook_state[1] - j - 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
if np.all(tmp): | |
dfblack_rook[brook_state[0], brook_state[1] - j - 1] = 1 | |
a_black_rook[j+21] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if brook_state[0] == bking_action_space[ii, 0] and brook_state[1] - j - 1 == bking_action_space[ii, 1]: | |
dfblack_rook[brook_state[0], brook_state[1] - j - 1] = 1 | |
a_black_rook[j+21] = 1 | |
if blocked2[3] == 0 and blocked3[3] == 0: | |
dfbrook_black_pieces[brook_state[0], brook_state[1] - j - 1] = 1 | |
dfblack_rook[brook_state[0], brook_state[1]] = 0 | |
dfblack_rook[bking_state[0], bking_state[1]] = 0 | |
dfblack_rook[bknight_state[0], bknight_state[1]] = 0 | |
if wking_state[0] != np.inf: | |
dfblack_rook[wking_state[0], wking_state[1]] = 0 | |
return dfblack_rook, a_black_rook, dfbrook_black_pieces | |
""" | |
3. **Black Knight** | |
""" | |
def degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state): | |
s=size_board[0] #8 | |
dfblack_knight = np.zeros([s, s], dtype=int) #matrix with black knight possible position | |
dfblack_knight[bknight_state[0], bknight_state[1]] = 1 | |
dfbknight_black_pieces = np.zeros([s, s], dtype=int) # matrix with black rook possible position cosnidering restrictions | |
dfbknight_black_pieces[bknight_state[0], bknight_state[1]] = 1 | |
# Black Knight | |
a_black_knight = np.zeros([8, 1], dtype=int) #matrix for allowed action | |
#white action space | |
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up | |
(wking_state[0]+1, wking_state[1]), # down | |
(wking_state[0], wking_state[1]-1), # left | |
(wking_state[0], wking_state[1]+1), # right | |
(wking_state[0]-1, wking_state[1]-1), # up-left | |
(wking_state[0]-1, wking_state[1]+1), # up-right | |
(wking_state[0]+1, wking_state[1]-1), # down-left | |
(wking_state[0]+1, wking_state[1]+1), # down-right | |
] | |
wking_action_space=np.array(wking_action_space) | |
#bking | |
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up | |
(bking_state[0]+1, bking_state[1]), # down | |
(bking_state[0], bking_state[1]-1), # left | |
(bking_state[0], bking_state[1]+1), # right | |
(bking_state[0]-1, bking_state[1]-1), # up-left | |
(bking_state[0]-1, bking_state[1]+1), # up-right | |
(bking_state[0]+1, bking_state[1]-1), # down-left | |
(bking_state[0]+1, bking_state[1]+1), # down-right | |
] | |
bking_action_space=np.array(bking_action_space) | |
# up-up-right | |
if bknight_state[0] -1 > 0 and bknight_state[1] < 7: | |
if bknight_state[0] - 2 != bking_state[0] or bknight_state[1] + 1 != bking_state[1] or bknight_state[0] - 2 != brook_state[0] or bknight_state[1] + 1 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] - 2, bknight_state[1] + 1] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] - 2 != wking_action_space[i, 0] or bknight_state[1] + 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] - 2, bknight_state[1] + 1] = 1 | |
a_black_knight[0] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] - 2 == bking_action_space[ii, 0] and bknight_state[1] + 1 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] - 2, bknight_state[1] + 1] = 1 | |
a_black_knight[0] = 1 | |
# up-right-right | |
if bknight_state[0] > 0 and bknight_state[1] + 1 < 7: | |
if bknight_state[0] - 1 != bking_state[0] or bknight_state[1] + 2 != bking_state[1] or bknight_state[0] - 1 != brook_state[0] or bknight_state[1] + 2 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] - 1, bknight_state[1] + 2] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] - 1 != wking_action_space[i, 0] or bknight_state[1] + 2 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] - 1, bknight_state[1] + 2] = 1 | |
a_black_knight[1] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] - 1 == bking_action_space[ii, 0] and bknight_state[1] + 2 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] - 1, bknight_state[1] + 2] = 1 | |
a_black_knight[1] = 1 | |
# down-right-right | |
if bknight_state[0] < 7 and bknight_state[1] + 1 < 7: | |
if bknight_state[0] + 1 != bking_state[0] or bknight_state[1] + 2 != bking_state[1] or bknight_state[0] + 1 != brook_state[0] or bknight_state[1] + 2 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] + 1, bknight_state[1] + 2] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] + 1 != wking_action_space[i, 0] or bknight_state[1] + 2 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] + 1, bknight_state[1] + 2] = 1 | |
a_black_knight[2] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] + 1 == bking_action_space[ii, 0] and bknight_state[1] + 2 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] + 1, bknight_state[1] + 2] = 1 | |
a_black_knight[2] = 1 | |
# down-down-right | |
if bknight_state[0] + 1 < 7 and bknight_state[1] < 7: | |
if bknight_state[0] + 2 != bking_state[0] or bknight_state[1] + 1 != bking_state[1] or bknight_state[0] + 2 != brook_state[0] or bknight_state[1] + 1 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] + 2, bknight_state[1] + 1] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] + 2 != wking_action_space[i, 0] or bknight_state[1] + 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] + 2, bknight_state[1] + 1] = 1 | |
a_black_knight[3] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] + 2 == bking_action_space[ii, 0] and bknight_state[1] + 1 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] + 2, bknight_state[1] + 1] = 1 | |
a_black_knight[3] = 1 | |
# down-down-left | |
if bknight_state[0] + 1 < 7 and bknight_state[1] > 0: | |
if bknight_state[0] + 2 != bking_state[0] or bknight_state[1] - 1 != bking_state[1] or bknight_state[0] + 2 != brook_state[0] or bknight_state[1] - 1 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] + 2, bknight_state[1] - 1] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] + 2 != wking_action_space[i, 0] or bknight_state[1] - 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] + 2, bknight_state[1] - 1] = 1 | |
a_black_knight[4] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] + 2 == bking_action_space[ii, 0] and bknight_state[1] - 1 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] + 2, bknight_state[1] - 1] = 1 | |
a_black_knight[4] = 1 | |
# down-left-left | |
if bknight_state[0] < 7 and bknight_state[1] - 1 > 0: | |
if bknight_state[0] + 1 != bking_state[0] or bknight_state[1] - 2 != bking_state[1] or bknight_state[0] + 1 != brook_state[0] or bknight_state[1] - 2 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] + 1, bknight_state[1] - 2] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] + 1 != wking_action_space[i, 0] or bknight_state[1] - 2 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] + 1, bknight_state[1] - 2] = 1 | |
a_black_knight[5] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] + 1 == bking_action_space[ii, 0] and bknight_state[1] - 2 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] + 1, bknight_state[1] - 2] = 1 | |
a_black_knight[5] = 1 | |
# up-left-left | |
if bknight_state[0] > 0 and bknight_state[1] - 1 > 0: | |
if bknight_state[0] - 1 != bking_state[0] or bknight_state[1] - 2 != bking_state[1] or bknight_state[0] - 1 != brook_state[0] or bknight_state[1] - 2 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] - 1, bknight_state[1] - 2] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] - 1 != wking_action_space[i, 0] or bknight_state[1] - 2 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] - 1, bknight_state[1] - 2] = 1 | |
a_black_knight[6] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] - 1 == bking_action_space[ii, 0] and bknight_state[1] - 2 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] - 1, bknight_state[1] - 2] = 1 | |
a_black_knight[6] = 1 | |
# up-up-left | |
if bknight_state[0] - 1 > 0 and bknight_state[1] > 0: | |
if bknight_state[0] - 2 != bking_state[0] or bknight_state[1] - 1 != bking_state[1] or bknight_state[0] - 2 != brook_state[0] or bknight_state[1] - 1 != brook_state[1]: | |
dfbknight_black_pieces[bknight_state[0] - 2, bknight_state[1] - 1] = 1 | |
# It is not the Black King and Rook's position | |
tmp = np.zeros([wking_action_space.shape[0]], dtype=int) | |
for i in range(wking_action_space.shape[0]): | |
if bknight_state[0] - 2 != wking_action_space[i, 0] or bknight_state[1] - 1 != wking_action_space[i, 1]: | |
tmp[i] = 1 | |
# check if it will be within the reach of White King | |
if np.all(tmp): | |
dfblack_knight[bknight_state[0] - 2, bknight_state[1] - 1] = 1 | |
a_black_knight[7] = 1 | |
else: | |
for i in range(len(tmp)): | |
if tmp[i] == 0: | |
for ii in range(bking_action_space.shape[0]): | |
if bknight_state[0] - 2 == bking_action_space[ii, 0] and bknight_state[1] - 1 == bking_action_space[ii, 1]: | |
dfblack_knight[bknight_state[0] - 2, bknight_state[1] - 1] = 1 | |
a_black_knight[7] = 1 | |
# previous location | |
dfblack_knight[bknight_state[0], bknight_state[1]] = 0 | |
return dfblack_knight, a_black_knight, dfbknight_black_pieces | |
""" | |
4. **White King** | |
""" | |
def degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state): | |
s=size_board[0] #8 | |
dfwhite_king = np.zeros([s, s], dtype=int) #matrix with with king possible position | |
dfwhite_king[wking_state[0], wking_state[1]] = 1 | |
dfwking_black_pieces= np.zeros([s, s], dtype=int) # matrix with black pieces restrictions | |
dfwking_black_pieces[bknight_state[0], bknight_state[1]] = 1 #position black knight | |
dfwking_black_pieces[brook_state[0], brook_state[1]] = 1 #position black rook | |
dfwking_black_pieces[bking_state[0], bking_state[1]] = 1 #position black king | |
a_white_king = np.zeros([8, 1], dtype=int) #matrix for allowed actions | |
#brook | |
brook_action_space=[] | |
for amplitude in range(1, 8): | |
brook_action_space.append((brook_state[0] + amplitude, brook_state[1])) # down | |
brook_action_space.append((brook_state[0]-amplitude, brook_state[1])) # up | |
brook_action_space.append((brook_state[0], brook_state[1]+amplitude)) # right | |
brook_action_space.append((brook_state[0], brook_state[1]-amplitude)) # left | |
brook_action_space=np.array(brook_action_space) | |
#bknight | |
bknight_action_space = [(bknight_state[0]-2, bknight_state[1]+1), # up-up-right | |
(bknight_state[0]-1, bknight_state[1]+2), # up-right-right | |
(bknight_state[0]+1, bknight_state[1]+2), # down-right-right | |
(bknight_state[0]+2, bknight_state[1]+1), # down-down-right | |
(bknight_state[0]+2, bknight_state[1]-1), # down-down-left | |
(bknight_state[0]+1, bknight_state[1]-2), # down-left-left | |
(bknight_state[0]-1, bknight_state[1]-2), # up-left-left | |
(bknight_state[0]-2, bknight_state[1]-1)] # up-up-left | |
bknight_action_space=np.array(bknight_action_space) | |
#bking | |
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up | |
(bking_state[0]+1, bking_state[1]), # down | |
(bking_state[0], bking_state[1]-1), # left | |
(bking_state[0], bking_state[1]+1), # right | |
(bking_state[0]-1, bking_state[1]-1), # up-left | |
(bking_state[0]-1, bking_state[1]+1), # up-right | |
(bking_state[0]+1, bking_state[1]-1), # down-left | |
(bking_state[0]+1, bking_state[1]+1), # down-right | |
] | |
bking_action_space=np.array(bking_action_space) | |
# allowed up | |
if wking_state[0] > 0: | |
if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] != brook_state[1]): | |
dfwking_black_pieces[wking_state[0] - 1, wking_state[1]] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0] - 1, wking_state[1]] = 1 | |
a_white_king[0] = 1 | |
# allowed down | |
if wking_state[0] < 7: | |
if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] != brook_state[1]): | |
dfwking_black_pieces[wking_state[0] + 1, wking_state[1]] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0] + 1, wking_state[1]] = 1 | |
a_white_king[1] = 1 | |
# allowed left | |
if wking_state[1] > 0: | |
if (wking_state[0] != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] != brook_state[0] or wking_state[1] - 1 != brook_state[1]): | |
dfwking_black_pieces[wking_state[0], wking_state[1] - 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0], wking_state[1] - 1] = 1 | |
a_white_king[2] = 1 | |
# allowed right | |
if wking_state[1] < 7: | |
if (wking_state[0] != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] != brook_state[0] or wking_state[1] + 1 != brook_state[1]): | |
dfwking_black_pieces[wking_state[0], wking_state[1] + 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0], wking_state[1] + 1] = 1 | |
a_white_king[3] = 1 | |
# allowed up-left | |
if wking_state[0] > 0 and wking_state[1] > 0: | |
if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] - 1 != brook_state[1]): | |
dfwking_black_pieces[wking_state[0] - 1, wking_state[1] - 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0] - 1, wking_state[1] - 1] = 1 | |
a_white_king[4] = 1 | |
# up-right | |
if wking_state[0] > 0 and wking_state[1] < 7: | |
if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] + 1 != brook_state[1]): | |
dfwking_black_pieces[wking_state[0] - 1, wking_state[1] + 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0] - 1, wking_state[1] + 1] = 1 | |
a_white_king[5] = 1 | |
# down-left | |
if wking_state[0] < 7 and wking_state[1] > 0: | |
if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] - 1 != brook_state[1]): | |
dfwking_black_pieces[wking_state[0] + 1, wking_state[1] - 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0] + 1, wking_state[1] - 1] = 1 | |
a_white_king[6] = 1 | |
# down-right | |
if wking_state[0] < 7 and wking_state[1] < 7: | |
if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] + 1 != brook_state[1]): | |
dfwking_black_pieces[wking_state[0] + 1, wking_state[1] + 1] = 1 | |
# It is not the Black Knight and Rook's position | |
tmp = np.zeros([bking_action_space.shape[0]], dtype=int) | |
for i in range(bking_action_space.shape[0]): | |
if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]): | |
tmp[i] = 1 | |
for ii in range(brook_action_space.shape[0]): | |
if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]: | |
tmp[i] = 0 | |
# check if it will be within the reach of black King | |
if np.all(tmp): | |
dfwhite_king[wking_state[0] + 1, wking_state[1] + 1] = 1 | |
a_white_king[7] = 1 | |
# previous location | |
dfwhite_king[wking_state[0], wking_state[1]] = 0 | |
check = -1 | |
if dfbrook_black_pieces[wking_state[0], wking_state[1]] == 1 or dfbknight_black_pieces[wking_state[0], wking_state[1]] == 1: | |
check = 1 | |
return dfwhite_king, a_white_king, check | |
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state) | |
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state) | |
dfbrook_black_pieces | |
visualboard | |
def states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check): | |
s=size_board[0] #8 | |
# Degrees of freedom of the Enemy King | |
wking_dof = len(np.where(dfwhite_king == 1)[0]) | |
#finding states in board | |
s_bking = np.array(visualboard == 1).astype(int).reshape(-1) | |
s_brook = np.array(visualboard == 2).astype(int).reshape(-1) | |
s_bknight = np.array(visualboard == 3).astype(int).reshape(-1) | |
s_wking = np.array(visualboard == 4).astype(int).reshape(-1) | |
#concatenating states in x | |
x = np.concatenate([s_bking, s_brook, s_bknight, s_wking, [check], [wking_dof]]) | |
return x | |
x = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check) | |
n_input_layer = 258 # Number of neurons of the input layer. | |
n_hidden_layer = 200 # Number of neurons of the hidden layer | |
n_output_layer = 44 # Number of neurons of the output layer. | |
W1=np.random.uniform(0,1,(n_hidden_layer,n_input_layer)) #initializing random weights | |
W1=np.divide(W1,np.matlib.repmat(np.sum(W1,1)[:,None],1,n_input_layer)) | |
W2=np.random.uniform(0,1,(n_output_layer,n_hidden_layer)) | |
W2=np.divide(W2,np.matlib.repmat(np.sum(W2,1)[:,None],1,n_hidden_layer)) | |
bias_W1=np.zeros((n_hidden_layer,)) #initializing bias in zero | |
bias_W2=np.zeros((n_output_layer,)) | |
eta = 0.0035 #learning rate | |
Alpha = 0.0001 #exponential | |
beta = 0.00005 #epsilon discount factor | |
"""**Parameters**""" | |
epsilon_0 = 0.2 #epsilon for the e-greedy policy | |
gamma = 0.85 #SARSA Learning discount factor | |
total_episodes = 10 #Number of games, each game ends when we have a checkmate or a draw | |
"""**Mapping directions for black pieces**""" | |
mapking = np.array([[-1, 0], | |
[1, 0], | |
[0, -1], | |
[0, 1], | |
[-1, -1], | |
[-1, 1], | |
[1, -1], | |
[1, 1]]) | |
maprook = np.array([[1, 0], | |
[-1, 0], | |
[0, 1], | |
[0, -1]]) | |
mapknight = np.array([[-2, 1], | |
[-1, 2], | |
[1, 2], | |
[2, 1], | |
[2, -1], | |
[1, -2], | |
[-1, -2], | |
[-2, -1]]) | |
#Preparing data for further plotting and analysis | |
Saved_rewards = np.zeros([total_episodes]) | |
avg_saved_rewards = np.zeros([total_episodes]) | |
Saved_moves = np.zeros([total_episodes]) | |
avg_saved_moves = np.zeros([total_episodes]) | |
Saved_Q = np.zeros([total_episodes]) | |
Saved_actionspolicies = np.zeros([total_episodes]) | |
"""**SARSA Algorithm implementation**""" | |
#Starting | |
for episode in range(total_episodes): | |
print(f"episode: {episode}") | |
epsilon_f = epsilon_0 / (1 + beta * episode) #epsilon is discounting per iteration to have less probability to explore | |
checkmate = 0 # 0 = not a checkmate, 1 = checkmate - flag | |
draw = 0 # 0 = not a draw, 1 = draw - flag | |
#generating board game | |
visualboard, bking_state, brook_state, bknight_state, wking_state, size_board = Board() | |
#Calculating possible actions for each piece | |
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state) | |
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state) | |
m=0 #counter | |
while checkmate == 0 and draw == 0 and check==-1: | |
R = 0 # Reward | |
# Actions & allowed_actions for black pieces (player 1) | |
a = np.concatenate([np.array(a_black_knight),np.array(a_black_rook), np.array(a_black_king)]) #0-7: knight, 36-43: king | |
allowed_a = np.where(a > 0)[0] #array with only allowed | |
x = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check) | |
#Q-table initialization | |
out1=np.maximum(0, W1.dot(x) + bias_W1) #nodes activation | |
Q= np.maximum(0, W2.dot(out1)+bias_W2) #calculating initial q values with a relu | |
#Action selection with epsilon greedy apporach | |
Possible_Action = [] | |
greedy = int(np.random.rand() > epsilon_f) | |
if greedy: | |
#assinging q values | |
for i in allowed_a: | |
Possible_Action.append(Q[i]) | |
Possible_Action = Possible_Action.index(max(Possible_Action)) | |
action = allowed_a[Possible_Action] #using that index with higher Q | |
else: | |
action = np.random.choice(allowed_a) #Pick a random allowed action | |
#getting current and next strate | |
#Moving a black piece according to the selected action | |
if 36 <= action <= 43: #king | |
direction = action - 36 | |
steps = 1 | |
visualboard[bking_state[0],bking_state[1]]= 0 | |
mov = mapking[direction, :] * steps | |
visualboard[bking_state[0]+mov[0],bking_state[1]+mov[1]]= 1 | |
bking_state[0] = bking_state[0] + mov[0] | |
bking_state[1] = bking_state[1] + mov[1] | |
state2=bking_state | |
elif 0 <= action <= 7: #knight | |
direction = action | |
steps = 1 | |
visualboard[bknight_state[0],bknight_state[1]]= 0 | |
mov = mapknight[direction, :] * steps | |
visualboard[bknight_state[0]+mov[0],bknight_state[1]+mov[1]]= 3 | |
bknight_state[0] = bknight_state[0] + mov[0] | |
bknight_state[1] = bknight_state[1] + mov[1] | |
else: #rook 8-35/0-3 | |
if 8<=action<=14: | |
direction = 0 | |
step=action-7 | |
if 15<=action<=21: | |
direction = 1 | |
step=action-14 | |
if 22<=action<=28: | |
direction = 2 | |
step=action-21 | |
if 29<=action<=35: | |
direction = 3 | |
step=action-28 | |
visualboard[brook_state[0],brook_state[1]]= 0 | |
mov = maprook[direction, :] * step | |
visualboard[brook_state[0]+mov[0],brook_state[1]+mov[1]]= 2 | |
brook_state[0] = brook_state[0] + mov[0] | |
brook_state[1] = brook_state[1] + mov[1] | |
m+=1 #counter | |
#As a piece was moved, it is needed to calculate possible actions again | |
#Calculating possible actions for each piece | |
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state) | |
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state) | |
#after moving a black piece it is the turn of the player 2 (doesn't use greedy just a random allowed action) | |
print(visualboard) | |
#But first, let's check if moving the black piece gives a checkmate | |
if np.sum(dfwhite_king) == 0 and (dfbrook_black_pieces[wking_state[0], wking_state[1]] == 1 or dfbknight_black_pieces[wking_state[0], wking_state[1]] == 1) : | |
#previous if evaluates that white king has no actions and it is check by a black piece | |
checkmate = 1 | |
R=1 #reward of winning | |
out2delta = (R - Q[action]) * np.heaviside(Q[action], 0) #bp2 out-hidden | |
W2[action] = (W2[action] - (eta * out2delta * out1)) | |
bias_W2[action] = (bias_W2[action] - (eta * out2delta)) | |
out1delta = np.dot(W2[action], out2delta) * np.heaviside(out1, 0) #bp2 hidden-input | |
W1 = W1 - (eta * np.outer(out1delta,x)) | |
bias_W1 = (bias_W1 - (eta * out1delta)) | |
#Updating data for plotting | |
Saved_rewards[episode]= R #reward per game | |
avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward | |
m+=1 #counter | |
Saved_moves[episode] = m #moves per game | |
avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves | |
if checkmate: | |
break | |
#If it wasn't checkmate checks if it is a draw | |
elif np.sum(dfwhite_king) == 0 and dfbrook_black_pieces[wking_state[0], wking_state[1]] == 0 and dfbknight_black_pieces[wking_state[0], wking_state[1]] == 0: | |
#previous if evaluates if none pieces have possible actions | |
draw = 1 | |
R=0.01 #reward of drawing | |
out2delta = (R - Q[action]) * np.heaviside(Q[action], 0) #bp2 out-hidden | |
W2[action] = (W2[action] - (eta * out2delta * out1)) | |
bias_W2[action] = (bias_W2[action] - (eta * out2delta)) | |
out1delta = np.dot(W2[action], out2delta) * np.heaviside(out1, 0) #bp2 hidden-input | |
W1 = W1 - (eta * np.outer(out1delta,x)) | |
bias_W1 = (bias_W1 - (eta * out1delta)) | |
#Updating data for plotting | |
Saved_rewards[episode]= R #reward per game | |
avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward | |
m+=1 #counter | |
Saved_moves[episode] = m #moves per game | |
avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves | |
if draw: | |
break | |
else: # Move enemy King randomly to a safe location | |
#random action selection | |
allowed_enemy_a = np.where(a_white_king > 0)[0] | |
a_help = int(np.ceil(np.random.rand() * allowed_enemy_a.shape[0]) - 1) | |
a_enemy = allowed_enemy_a[a_help] | |
direction = a_enemy | |
steps = 1 | |
#actual movement | |
visualboard[wking_state[0],wking_state[1]]= 0 | |
mov = mapking[direction, :] * steps | |
visualboard[wking_state[0]+mov[0],wking_state[1]+mov[1]]= 4 | |
wking_state[0] = wking_state[0] + mov[0] | |
wking_state[1] = wking_state[1] + mov[1] | |
#As a piece was moved, it is needed to calculate possible actions again | |
#Calculating possible actions for each piece | |
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state) | |
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state) | |
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state) | |
#computing next Q | |
x_next = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check) | |
Q_next = np.maximum(0, W2.dot(out1)+bias_W2) #calculating initial q_next values with a relu | |
m+=1 #counter | |
# New Actions & new allowed_actions for black pieces (player 1) | |
new_a = np.concatenate([np.array(a_black_knight),np.array(a_black_rook), np.array(a_black_king)]) #0-7: knight, 36-43: king | |
allowed_a = np.where(a > 0)[0] #array with only new allowed | |
#Action selection with epsilon greedy apporach | |
Possible_Action = [] | |
greedy = int(np.random.rand() > epsilon_f) | |
if greedy: | |
#assinging q values | |
for i in allowed_a: | |
Possible_Action.append(Q[i]) | |
Possible_Action = Possible_Action.index(max(Possible_Action)) | |
action = allowed_a[Possible_Action] #using that index with higher Q | |
else: | |
action = np.random.choice(allowed_a) #Pick a random allowed action | |
sarsaQ = ((R + (gamma * np.max(Q_next)) - Q[action]) * np.heaviside(Q[action], 0)) #BP out-hidden | |
W2[action] = (W2[action] - (eta * sarsaQ * out1)) | |
bias_W2[action] = (bias_W2[action] - (eta * sarsaQ)) | |
out1delta = np.dot(W2[action], sarsaQ) * np.heaviside(out1, 0) #bp2 hidden-input | |
W1 = W1 - (eta * np.outer(out1delta,x)) | |
bias_W1 = (bias_W1 - (eta * out1delta)) | |
#Updating data for plotting | |
Saved_rewards[episode]= R #reward per game | |
avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward | |
m+=1 #counter | |
Saved_moves[episode] = m #moves per game | |
avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves | |
Saved_Q[episode] = sarsaQ # action-state value | |
print(visualboard) | |
fontSize = 18 | |
print("Results for Chess Game using SARSA:") | |
print("Average of the number of moves per game:") | |
# plots the running average of the number of moves per game | |
plt.plot(avg_saved_moves) | |
#set axis labels | |
plt.xlabel('Number of episodes', fontsize = fontSize) | |
plt.ylabel('Average Moves Per Game', fontsize = fontSize) | |
plt.show() #plot | |
print("Reward per game:") | |
#plot running average of rewards | |
#plt.plot(Average_Rewards) | |
plt.plot(Saved_rewards) | |
#set axis labels | |
plt.xlabel('Number of episodes', fontsize = fontSize) | |
plt.ylabel('Reward Per Game', fontsize = fontSize) | |
plt.show() #plot | |
print("Q(s,a) per game:") | |
#plot running average of rewards | |
#plt.plot(Average_Rewards) | |
plt.plot(Saved_Q) | |
#set axis labels | |
plt.xlabel('Number of episodes', fontsize = fontSize) | |
plt.ylabel('Q Per Game', fontsize = fontSize) | |
plt.show() #plot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment