Skip to content

Instantly share code, notes, and snippets.

@jesuscmadrigal
Created April 11, 2024 23:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jesuscmadrigal/1ffbd6e1042941834aa6200d671f2034 to your computer and use it in GitHub Desktop.
Save jesuscmadrigal/1ffbd6e1042941834aa6200d671f2034 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""FinalCode_RL_chess.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/116XsQ7P0d22cQlLEwtmNMaIu3h_jCmlT
**Installing libraries**
"""
import numpy as np #for linear algebra
import numpy.matlib #for plots
import matplotlib.pyplot as plt #for plots
"""**Generate chess board**"""
def Board():
#defining a current chess game intented to finish in 2 moves
#enemy king
wking_state= [1,0] #defining initial state for white knight
bknight_state= [4,3] #defining initial state for black knight
brook_state = [1,3] #defining initial state for black bishop 1 original 7,1
bking_state = [2,3] #defining initial state for black king
size_board=(8,8) #size of the board
visualboard= np.zeros([8, 8], dtype=int)
visualboard[bking_state[0],bking_state[1]]=1
visualboard[brook_state[0],brook_state[1]]=2
visualboard[bknight_state[0],bknight_state[1]]=3
visualboard[wking_state[0],wking_state[1]]=4
return visualboard, bking_state, brook_state, bknight_state, wking_state, size_board
visualboard, bking_state, brook_state, bknight_state, wking_state, size_board = Board()
visualboard
"""**Defining actions spaces of each piece**"""
#wking
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up
(wking_state[0]+1, wking_state[1]), # down
(wking_state[0], wking_state[1]-1), # left
(wking_state[0], wking_state[1]+1), # right
(wking_state[0]-1, wking_state[1]-1), # up-left
(wking_state[0]-1, wking_state[1]+1), # up-right
(wking_state[0]+1, wking_state[1]-1), # down-left
(wking_state[0]+1, wking_state[1]+1), # down-right
]
wking_action_space=np.array(wking_action_space)
possible_wking_actions=8
All_possible_white_actions=possible_wking_actions
#bking
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up
(bking_state[0]+1, bking_state[1]), # down
(bking_state[0], bking_state[1]-1), # left
(bking_state[0], bking_state[1]+1), # right
(bking_state[0]-1, bking_state[1]-1), # up-left
(bking_state[0]-1, bking_state[1]+1), # up-right
(bking_state[0]+1, bking_state[1]-1), # down-left
(bking_state[0]+1, bking_state[1]+1), # down-right
]
bking_action_space=np.array(bking_action_space)
possible_bking_actions=8
#brook
brook_action_space=[]
for amplitude in range(1, 8):
brook_action_space.append((brook_state[0] + amplitude, brook_state[1])) # down
brook_action_space.append((brook_state[0]-amplitude, brook_state[1])) # up
brook_action_space.append((brook_state[0], brook_state[1]+amplitude)) # right
brook_action_space.append((brook_state[0], brook_state[1]-amplitude)) # left
brook_action_space=np.array(brook_action_space)
possible_brook_actions=7*4
#bknight
bknight_action_space = [(bknight_state[0]-2, bknight_state[1]+1), # up-up-right
(bknight_state[0]-1, bknight_state[1]+2), # up-right-right
(bknight_state[0]+1, bknight_state[1]+2), # down-right-right
(bknight_state[0]+2, bknight_state[1]+1), # down-down-right
(bknight_state[0]+2, bknight_state[1]-1), # down-down-left
(bknight_state[0]+1, bknight_state[1]-2), # down-left-left
(bknight_state[0]-1, bknight_state[1]-2), # up-left-left
(bknight_state[0]-2, bknight_state[1]-1)] # up-up-left
bknight_action_space=np.array(bknight_action_space)
possible_bknight_actions=8
All_possible_black_actions=possible_bking_actions+possible_bknight_actions+\
possible_brook_actions
brook_action_space
"""**Defining allowed actions for each piece per move**
1. **Black king**
"""
def degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state):
s=size_board[0] #8
dfblack_king = np.zeros([s, s], dtype=int) #matrix with black king possible position
dfblack_king[bking_state[0], bking_state[1]] = 1
dfbking_black_pieces= np.zeros([s, s], dtype=int) # matrix with black pieces restrictions
dfbking_black_pieces[bknight_state[0], bknight_state[1]] = 1 #position black knight
dfbking_black_pieces[brook_state[0], brook_state[1]] = 1 #position black rook
a_black_king = np.zeros([8, 1], dtype=int) #matrix for allowed actions
#white action space
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up
(wking_state[0]+1, wking_state[1]), # down
(wking_state[0], wking_state[1]-1), # left
(wking_state[0], wking_state[1]+1), # right
(wking_state[0]-1, wking_state[1]-1), # up-left
(wking_state[0]-1, wking_state[1]+1), # up-right
(wking_state[0]+1, wking_state[1]-1), # down-left
(wking_state[0]+1, wking_state[1]+1), # down-right
]
wking_action_space=np.array(wking_action_space)
# allowed up
if bking_state[0] > 0:
if (bking_state[0] - 1 != bknight_state[0] or bking_state[1] != bknight_state[1]) and (bking_state[0] - 1 != brook_state[0] or bking_state[1] != brook_state[1]):
dfbking_black_pieces[bking_state[0] - 1, bking_state[1]] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0] - 1, bking_state[1]] = 1
a_black_king[0] = 1
# allowed down
if bking_state[0] < 7:
if bking_state[0] + 1 != bknight_state[0] or bking_state[1] != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] != brook_state[1]:
dfbking_black_pieces[bking_state[0] + 1, bking_state[1]] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0] + 1, bking_state[1]] = 1
a_black_king[1] = 1
# allowed left
if bking_state[1] > 0:
if bking_state[0] != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] != brook_state[0] or bking_state[1] - 1 != brook_state[1]:
dfbking_black_pieces[bking_state[0], bking_state[1] - 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0], bking_state[1] - 1] = 1
a_black_king[2] = 1
# allowed right
if bking_state[1] < 7:
if bking_state[0] != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] != brook_state[0] or bking_state[1] + 1 != brook_state[1]:
dfbking_black_pieces[bking_state[0], bking_state[1] + 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0], bking_state[1] + 1] = 1
a_black_king[3] = 1
# allowed up-left
if bking_state[0] > 0 and bking_state[1] > 0:
if bking_state[0] - 1 != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] - 1 != brook_state[0] or bking_state[1] - 1 != brook_state[1]:
dfbking_black_pieces[bking_state[0] - 1, bking_state[1] - 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0] - 1, bking_state[1] - 1] = 1
a_black_king[4] = 1
# up-right
if bking_state[0] > 0 and bking_state[1] < 7:
if bking_state[0] - 1 != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] - 1 != brook_state[0] or bking_state[1] + 1 != brook_state[1]:
dfbking_black_pieces[bking_state[0] - 1, bking_state[1] + 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0] - 1, bking_state[1] + 1] = 1
a_black_king[5] = 1
# down-left
if bking_state[0] < 7 and bking_state[1] > 0:
if bking_state[0] + 1 != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] - 1 != brook_state[1]:
dfbking_black_pieces[bking_state[0] + 1, bking_state[1] - 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0] + 1, bking_state[1] - 1] = 1
a_black_king[6] = 1
# down-right
if bking_state[0] < 7 and bking_state[1] < 7:
if bking_state[0] + 1 != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] + 1 != brook_state[1]:
dfbking_black_pieces[bking_state[0] + 1, bking_state[1] + 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_king[bking_state[0] + 1, bking_state[1] + 1] = 1
a_black_king[7] = 1
# previous location
dfblack_king[bking_state[0], bking_state[1]] = 0
return dfblack_king, a_black_king, dfbking_black_pieces
"""
2. **Black Rook**
"""
def degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state):
s=size_board[0] #8
dfblack_rook = np.zeros([s, s], dtype=int) #matrix with black rook possible position
dfblack_rook[brook_state[0], brook_state[1]] = 1
dfbrook_black_pieces = np.zeros([s, s], dtype=int) # matrix with black rook possible position cosnidering restrictions
dfbrook_black_pieces[brook_state[0], brook_state[1]] = 1
# Black Rook
blocked = np.zeros(8, dtype=int) #position blocked by white or black pieces
blocked2 = np.zeros(8, dtype=int) #position blocked by black king
blocked3 = np.zeros(8, dtype=int) #position blocked by black knight
a_black_rook = np.zeros([4 * (s - 1), 1], dtype=int) #matrix for allowed actions
#white action space
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up
(wking_state[0]+1, wking_state[1]), # down
(wking_state[0], wking_state[1]-1), # left
(wking_state[0], wking_state[1]+1), # right
(wking_state[0]-1, wking_state[1]-1), # up-left
(wking_state[0]-1, wking_state[1]+1), # up-right
(wking_state[0]+1, wking_state[1]-1), # down-left
(wking_state[0]+1, wking_state[1]+1), # down-right
]
wking_action_space=np.array(wking_action_space)
#bking
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up
(bking_state[0]+1, bking_state[1]), # down
(bking_state[0], bking_state[1]-1), # left
(bking_state[0], bking_state[1]+1), # right
(bking_state[0]-1, bking_state[1]-1), # up-left
(bking_state[0]-1, bking_state[1]+1), # up-right
(bking_state[0]+1, bking_state[1]-1), # down-left
(bking_state[0]+1, bking_state[1]+1), # down-right
]
bking_action_space=np.array(bking_action_space)
for j in range(s):
# allow_down
if brook_state[0] + j < 7:
if (brook_state[0] + j + 1 == bking_state[0] and brook_state[1] == bking_state[1]) or ((brook_state[0] + j + 1 == bknight_state[0] and brook_state[1] == bknight_state[1])) or ((brook_state[0] + j + 1 == wking_state[0] and brook_state[1] == wking_state[1])):
blocked[0] = 1
if brook_state[0] + j + 1 == bking_state[0] and brook_state[1] == bking_state[1]:
blocked2[0] = 1
dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = -1
if brook_state[0] + j + 1 == bknight_state[0] and brook_state[1] == bknight_state[1]:
blocked3[0] = 1
dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = -1
if blocked[0] == 0:
tmp = np.zeros([8], dtype=int)
for i in range(wking_action_space.shape[0]):
if brook_state[0] + j + 1 != wking_action_space[i, 0] or brook_state[1] != wking_action_space[i, 1]:
tmp[i] = 1
if np.all(tmp):
dfblack_rook[brook_state[0] + j + 1, brook_state[1]] = 1
a_black_rook[j] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if brook_state[0] + j + 1 == bking_action_space[ii, 0] and brook_state[1] == bking_action_space[ii, 1]:
dfblack_rook[brook_state[0] + j + 1, brook_state[1]] = 1
a_black_rook[j] = 1
if blocked2[0] == 0 and blocked3[0] == 0:
dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = 1
# allow_up
if brook_state[0] - j > 0:
if (brook_state[0] - j - 1 == bking_state[0] and brook_state[1] == bking_state[1]) or ((brook_state[0] - j - 1 == bknight_state[0] and brook_state[1] == bknight_state[1])) or ((brook_state[0] - j - 1 == wking_state[0] and brook_state[1] == wking_state[1])):
blocked[1] = 1
if brook_state[0] - j - 1 == bking_state[0] and brook_state[1] == bking_state[1]:
blocked2[1] = 1
if brook_state[0] - j - 1 == bknight_state[0] and brook_state[1] == bknight_state[1]:
blocked3[1] = 1
if blocked[1] == 0:
tmp = np.zeros([8], dtype=int)
for i in range(wking_action_space.shape[0]):
if brook_state[0] - j - 1 != wking_action_space[i, 0] or brook_state[1] != wking_action_space[i, 1]:
tmp[i] = 1
if np.all(tmp):
dfblack_rook[brook_state[0] - j - 1, brook_state[1]] = 1
a_black_rook[j+7] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if brook_state[0] - j - 1 == bking_action_space[ii, 0] and brook_state[1] == bking_action_space[ii, 1]:
dfblack_rook[brook_state[0] - j - 1, brook_state[1]] = 1
a_black_rook[j+7] = 1
if blocked2[1] == 0 and blocked3[1] == 0:
dfbrook_black_pieces[brook_state[0] - j - 1, brook_state[1]] = 1
# allow_right
if brook_state[1] + j < 7:
if (brook_state[0] == bking_state[0] and brook_state[1] + j + 1 == bking_state[1]) or ((brook_state[0] == bknight_state[0] and brook_state[1] + j + 1 == bknight_state[1])) or ((brook_state[0] == wking_state[0] and brook_state[1] + j + 1 == wking_state[1])):
blocked[2] = 1
if brook_state[0] == bking_state[0] and brook_state[1] + j + 1 == bking_state[1]:
blocked2[2] = 1
if brook_state[0] == bknight_state[0] and brook_state[1] + j + 1 == bknight_state[1]:
blocked3[2] = 1
if blocked[2] == 0:
tmp = np.zeros([8], dtype=int)
for i in range(wking_action_space.shape[0]):
if brook_state[0] != wking_action_space[i, 0] or brook_state[1] + j + 1 != wking_action_space[i, 1]:
tmp[i] = 1
if np.all(tmp):
dfblack_rook[brook_state[0], brook_state[1] + j + 1] = 1
a_black_rook[j+14] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if brook_state[0] == bking_action_space[ii, 0] and brook_state[1] + j + 1 == bking_action_space[ii, 1]:
dfblack_rook[brook_state[0], brook_state[1] + j + 1] = 1
a_black_rook[j+14] = 1
if blocked2[2] == 0 and blocked3[2] == 0:
dfbrook_black_pieces[brook_state[0], brook_state[1] + j + 1] = 1
# allow_left
if brook_state[1] - j > 0:
if (brook_state[0] == bking_state[0] and brook_state[1] - j - 1 == bking_state[1]) or ((brook_state[0] == bknight_state[0] and brook_state[1] - j - 1 == bknight_state[1])) or ((brook_state[0] == wking_state[0] and brook_state[1] - j - 1 == wking_state[1])):
blocked[3] = 1
if brook_state[0] == bking_state[0] and brook_state[1] - j - 1 == bking_state[1]:
blocked2[3] = 1
if brook_state[0] == bknight_state[0] and brook_state[1] - j - 1 == bknight_state[1]:
blocked3[3] = 1
if blocked[3] == 0:
tmp = np.zeros([8], dtype=int)
for i in range(wking_action_space.shape[0]):
if brook_state[0] != wking_action_space[i, 0] or brook_state[1] - j - 1 != wking_action_space[i, 1]:
tmp[i] = 1
if np.all(tmp):
dfblack_rook[brook_state[0], brook_state[1] - j - 1] = 1
a_black_rook[j+21] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if brook_state[0] == bking_action_space[ii, 0] and brook_state[1] - j - 1 == bking_action_space[ii, 1]:
dfblack_rook[brook_state[0], brook_state[1] - j - 1] = 1
a_black_rook[j+21] = 1
if blocked2[3] == 0 and blocked3[3] == 0:
dfbrook_black_pieces[brook_state[0], brook_state[1] - j - 1] = 1
dfblack_rook[brook_state[0], brook_state[1]] = 0
dfblack_rook[bking_state[0], bking_state[1]] = 0
dfblack_rook[bknight_state[0], bknight_state[1]] = 0
if wking_state[0] != np.inf:
dfblack_rook[wking_state[0], wking_state[1]] = 0
return dfblack_rook, a_black_rook, dfbrook_black_pieces
"""
3. **Black Knight**
"""
def degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state):
s=size_board[0] #8
dfblack_knight = np.zeros([s, s], dtype=int) #matrix with black knight possible position
dfblack_knight[bknight_state[0], bknight_state[1]] = 1
dfbknight_black_pieces = np.zeros([s, s], dtype=int) # matrix with black rook possible position cosnidering restrictions
dfbknight_black_pieces[bknight_state[0], bknight_state[1]] = 1
# Black Knight
a_black_knight = np.zeros([8, 1], dtype=int) #matrix for allowed action
#white action space
wking_action_space = [(wking_state[0]-1, wking_state[1]), # up
(wking_state[0]+1, wking_state[1]), # down
(wking_state[0], wking_state[1]-1), # left
(wking_state[0], wking_state[1]+1), # right
(wking_state[0]-1, wking_state[1]-1), # up-left
(wking_state[0]-1, wking_state[1]+1), # up-right
(wking_state[0]+1, wking_state[1]-1), # down-left
(wking_state[0]+1, wking_state[1]+1), # down-right
]
wking_action_space=np.array(wking_action_space)
#bking
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up
(bking_state[0]+1, bking_state[1]), # down
(bking_state[0], bking_state[1]-1), # left
(bking_state[0], bking_state[1]+1), # right
(bking_state[0]-1, bking_state[1]-1), # up-left
(bking_state[0]-1, bking_state[1]+1), # up-right
(bking_state[0]+1, bking_state[1]-1), # down-left
(bking_state[0]+1, bking_state[1]+1), # down-right
]
bking_action_space=np.array(bking_action_space)
# up-up-right
if bknight_state[0] -1 > 0 and bknight_state[1] < 7:
if bknight_state[0] - 2 != bking_state[0] or bknight_state[1] + 1 != bking_state[1] or bknight_state[0] - 2 != brook_state[0] or bknight_state[1] + 1 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] - 2, bknight_state[1] + 1] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] - 2 != wking_action_space[i, 0] or bknight_state[1] + 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] - 2, bknight_state[1] + 1] = 1
a_black_knight[0] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] - 2 == bking_action_space[ii, 0] and bknight_state[1] + 1 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] - 2, bknight_state[1] + 1] = 1
a_black_knight[0] = 1
# up-right-right
if bknight_state[0] > 0 and bknight_state[1] + 1 < 7:
if bknight_state[0] - 1 != bking_state[0] or bknight_state[1] + 2 != bking_state[1] or bknight_state[0] - 1 != brook_state[0] or bknight_state[1] + 2 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] - 1, bknight_state[1] + 2] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] - 1 != wking_action_space[i, 0] or bknight_state[1] + 2 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] - 1, bknight_state[1] + 2] = 1
a_black_knight[1] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] - 1 == bking_action_space[ii, 0] and bknight_state[1] + 2 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] - 1, bknight_state[1] + 2] = 1
a_black_knight[1] = 1
# down-right-right
if bknight_state[0] < 7 and bknight_state[1] + 1 < 7:
if bknight_state[0] + 1 != bking_state[0] or bknight_state[1] + 2 != bking_state[1] or bknight_state[0] + 1 != brook_state[0] or bknight_state[1] + 2 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] + 1, bknight_state[1] + 2] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] + 1 != wking_action_space[i, 0] or bknight_state[1] + 2 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] + 1, bknight_state[1] + 2] = 1
a_black_knight[2] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] + 1 == bking_action_space[ii, 0] and bknight_state[1] + 2 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] + 1, bknight_state[1] + 2] = 1
a_black_knight[2] = 1
# down-down-right
if bknight_state[0] + 1 < 7 and bknight_state[1] < 7:
if bknight_state[0] + 2 != bking_state[0] or bknight_state[1] + 1 != bking_state[1] or bknight_state[0] + 2 != brook_state[0] or bknight_state[1] + 1 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] + 2, bknight_state[1] + 1] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] + 2 != wking_action_space[i, 0] or bknight_state[1] + 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] + 2, bknight_state[1] + 1] = 1
a_black_knight[3] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] + 2 == bking_action_space[ii, 0] and bknight_state[1] + 1 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] + 2, bknight_state[1] + 1] = 1
a_black_knight[3] = 1
# down-down-left
if bknight_state[0] + 1 < 7 and bknight_state[1] > 0:
if bknight_state[0] + 2 != bking_state[0] or bknight_state[1] - 1 != bking_state[1] or bknight_state[0] + 2 != brook_state[0] or bknight_state[1] - 1 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] + 2, bknight_state[1] - 1] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] + 2 != wking_action_space[i, 0] or bknight_state[1] - 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] + 2, bknight_state[1] - 1] = 1
a_black_knight[4] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] + 2 == bking_action_space[ii, 0] and bknight_state[1] - 1 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] + 2, bknight_state[1] - 1] = 1
a_black_knight[4] = 1
# down-left-left
if bknight_state[0] < 7 and bknight_state[1] - 1 > 0:
if bknight_state[0] + 1 != bking_state[0] or bknight_state[1] - 2 != bking_state[1] or bknight_state[0] + 1 != brook_state[0] or bknight_state[1] - 2 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] + 1, bknight_state[1] - 2] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] + 1 != wking_action_space[i, 0] or bknight_state[1] - 2 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] + 1, bknight_state[1] - 2] = 1
a_black_knight[5] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] + 1 == bking_action_space[ii, 0] and bknight_state[1] - 2 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] + 1, bknight_state[1] - 2] = 1
a_black_knight[5] = 1
# up-left-left
if bknight_state[0] > 0 and bknight_state[1] - 1 > 0:
if bknight_state[0] - 1 != bking_state[0] or bknight_state[1] - 2 != bking_state[1] or bknight_state[0] - 1 != brook_state[0] or bknight_state[1] - 2 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] - 1, bknight_state[1] - 2] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] - 1 != wking_action_space[i, 0] or bknight_state[1] - 2 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] - 1, bknight_state[1] - 2] = 1
a_black_knight[6] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] - 1 == bking_action_space[ii, 0] and bknight_state[1] - 2 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] - 1, bknight_state[1] - 2] = 1
a_black_knight[6] = 1
# up-up-left
if bknight_state[0] - 1 > 0 and bknight_state[1] > 0:
if bknight_state[0] - 2 != bking_state[0] or bknight_state[1] - 1 != bking_state[1] or bknight_state[0] - 2 != brook_state[0] or bknight_state[1] - 1 != brook_state[1]:
dfbknight_black_pieces[bknight_state[0] - 2, bknight_state[1] - 1] = 1
# It is not the Black King and Rook's position
tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
for i in range(wking_action_space.shape[0]):
if bknight_state[0] - 2 != wking_action_space[i, 0] or bknight_state[1] - 1 != wking_action_space[i, 1]:
tmp[i] = 1
# check if it will be within the reach of White King
if np.all(tmp):
dfblack_knight[bknight_state[0] - 2, bknight_state[1] - 1] = 1
a_black_knight[7] = 1
else:
for i in range(len(tmp)):
if tmp[i] == 0:
for ii in range(bking_action_space.shape[0]):
if bknight_state[0] - 2 == bking_action_space[ii, 0] and bknight_state[1] - 1 == bking_action_space[ii, 1]:
dfblack_knight[bknight_state[0] - 2, bknight_state[1] - 1] = 1
a_black_knight[7] = 1
# previous location
dfblack_knight[bknight_state[0], bknight_state[1]] = 0
return dfblack_knight, a_black_knight, dfbknight_black_pieces
"""
4. **White King**
"""
def degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state):
s=size_board[0] #8
dfwhite_king = np.zeros([s, s], dtype=int) #matrix with with king possible position
dfwhite_king[wking_state[0], wking_state[1]] = 1
dfwking_black_pieces= np.zeros([s, s], dtype=int) # matrix with black pieces restrictions
dfwking_black_pieces[bknight_state[0], bknight_state[1]] = 1 #position black knight
dfwking_black_pieces[brook_state[0], brook_state[1]] = 1 #position black rook
dfwking_black_pieces[bking_state[0], bking_state[1]] = 1 #position black king
a_white_king = np.zeros([8, 1], dtype=int) #matrix for allowed actions
#brook
brook_action_space=[]
for amplitude in range(1, 8):
brook_action_space.append((brook_state[0] + amplitude, brook_state[1])) # down
brook_action_space.append((brook_state[0]-amplitude, brook_state[1])) # up
brook_action_space.append((brook_state[0], brook_state[1]+amplitude)) # right
brook_action_space.append((brook_state[0], brook_state[1]-amplitude)) # left
brook_action_space=np.array(brook_action_space)
#bknight
bknight_action_space = [(bknight_state[0]-2, bknight_state[1]+1), # up-up-right
(bknight_state[0]-1, bknight_state[1]+2), # up-right-right
(bknight_state[0]+1, bknight_state[1]+2), # down-right-right
(bknight_state[0]+2, bknight_state[1]+1), # down-down-right
(bknight_state[0]+2, bknight_state[1]-1), # down-down-left
(bknight_state[0]+1, bknight_state[1]-2), # down-left-left
(bknight_state[0]-1, bknight_state[1]-2), # up-left-left
(bknight_state[0]-2, bknight_state[1]-1)] # up-up-left
bknight_action_space=np.array(bknight_action_space)
#bking
bking_action_space = [(bking_state[0]-1, bking_state[1]), # up
(bking_state[0]+1, bking_state[1]), # down
(bking_state[0], bking_state[1]-1), # left
(bking_state[0], bking_state[1]+1), # right
(bking_state[0]-1, bking_state[1]-1), # up-left
(bking_state[0]-1, bking_state[1]+1), # up-right
(bking_state[0]+1, bking_state[1]-1), # down-left
(bking_state[0]+1, bking_state[1]+1), # down-right
]
bking_action_space=np.array(bking_action_space)
# allowed up
if wking_state[0] > 0:
if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] != brook_state[1]):
dfwking_black_pieces[wking_state[0] - 1, wking_state[1]] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0] - 1, wking_state[1]] = 1
a_white_king[0] = 1
# allowed down
if wking_state[0] < 7:
if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] != brook_state[1]):
dfwking_black_pieces[wking_state[0] + 1, wking_state[1]] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0] + 1, wking_state[1]] = 1
a_white_king[1] = 1
# allowed left
if wking_state[1] > 0:
if (wking_state[0] != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] != brook_state[0] or wking_state[1] - 1 != brook_state[1]):
dfwking_black_pieces[wking_state[0], wking_state[1] - 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0], wking_state[1] - 1] = 1
a_white_king[2] = 1
# allowed right
if wking_state[1] < 7:
if (wking_state[0] != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] != brook_state[0] or wking_state[1] + 1 != brook_state[1]):
dfwking_black_pieces[wking_state[0], wking_state[1] + 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0], wking_state[1] + 1] = 1
a_white_king[3] = 1
# allowed up-left
if wking_state[0] > 0 and wking_state[1] > 0:
if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] - 1 != brook_state[1]):
dfwking_black_pieces[wking_state[0] - 1, wking_state[1] - 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0] - 1, wking_state[1] - 1] = 1
a_white_king[4] = 1
# up-right
if wking_state[0] > 0 and wking_state[1] < 7:
if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] + 1 != brook_state[1]):
dfwking_black_pieces[wking_state[0] - 1, wking_state[1] + 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0] - 1, wking_state[1] + 1] = 1
a_white_king[5] = 1
# down-left
if wking_state[0] < 7 and wking_state[1] > 0:
if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] - 1 != brook_state[1]):
dfwking_black_pieces[wking_state[0] + 1, wking_state[1] - 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0] + 1, wking_state[1] - 1] = 1
a_white_king[6] = 1
# down-right
if wking_state[0] < 7 and wking_state[1] < 7:
if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] + 1 != brook_state[1]):
dfwking_black_pieces[wking_state[0] + 1, wking_state[1] + 1] = 1
# It is not the Black Knight and Rook's position
tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
for i in range(bking_action_space.shape[0]):
if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]):
tmp[i] = 1
for ii in range(brook_action_space.shape[0]):
if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]:
tmp[i] = 0
# check if it will be within the reach of black King
if np.all(tmp):
dfwhite_king[wking_state[0] + 1, wking_state[1] + 1] = 1
a_white_king[7] = 1
# previous location
dfwhite_king[wking_state[0], wking_state[1]] = 0
check = -1
if dfbrook_black_pieces[wking_state[0], wking_state[1]] == 1 or dfbknight_black_pieces[wking_state[0], wking_state[1]] == 1:
check = 1
return dfwhite_king, a_white_king, check
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)
dfbrook_black_pieces
visualboard
def states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check):
s=size_board[0] #8
# Degrees of freedom of the Enemy King
wking_dof = len(np.where(dfwhite_king == 1)[0])
#finding states in board
s_bking = np.array(visualboard == 1).astype(int).reshape(-1)
s_brook = np.array(visualboard == 2).astype(int).reshape(-1)
s_bknight = np.array(visualboard == 3).astype(int).reshape(-1)
s_wking = np.array(visualboard == 4).astype(int).reshape(-1)
#concatenating states in x
x = np.concatenate([s_bking, s_brook, s_bknight, s_wking, [check], [wking_dof]])
return x
x = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check)
n_input_layer = 258 # Number of neurons of the input layer.
n_hidden_layer = 200 # Number of neurons of the hidden layer
n_output_layer = 44 # Number of neurons of the output layer.
W1=np.random.uniform(0,1,(n_hidden_layer,n_input_layer)) #initializing random weights
W1=np.divide(W1,np.matlib.repmat(np.sum(W1,1)[:,None],1,n_input_layer))
W2=np.random.uniform(0,1,(n_output_layer,n_hidden_layer))
W2=np.divide(W2,np.matlib.repmat(np.sum(W2,1)[:,None],1,n_hidden_layer))
bias_W1=np.zeros((n_hidden_layer,)) #initializing bias in zero
bias_W2=np.zeros((n_output_layer,))
eta = 0.0035 #learning rate
Alpha = 0.0001 #exponential
beta = 0.00005 #epsilon discount factor
"""**Parameters**"""
epsilon_0 = 0.2 #epsilon for the e-greedy policy
gamma = 0.85 #SARSA Learning discount factor
total_episodes = 10 #Number of games, each game ends when we have a checkmate or a draw
"""**Mapping directions for black pieces**"""
mapking = np.array([[-1, 0],
[1, 0],
[0, -1],
[0, 1],
[-1, -1],
[-1, 1],
[1, -1],
[1, 1]])
maprook = np.array([[1, 0],
[-1, 0],
[0, 1],
[0, -1]])
mapknight = np.array([[-2, 1],
[-1, 2],
[1, 2],
[2, 1],
[2, -1],
[1, -2],
[-1, -2],
[-2, -1]])
#Preparing data for further plotting and analysis
Saved_rewards = np.zeros([total_episodes])
avg_saved_rewards = np.zeros([total_episodes])
Saved_moves = np.zeros([total_episodes])
avg_saved_moves = np.zeros([total_episodes])
Saved_Q = np.zeros([total_episodes])
Saved_actionspolicies = np.zeros([total_episodes])
"""**SARSA Algorithm implementation**"""
#Starting
for episode in range(total_episodes):
print(f"episode: {episode}")
epsilon_f = epsilon_0 / (1 + beta * episode) #epsilon is discounting per iteration to have less probability to explore
checkmate = 0 # 0 = not a checkmate, 1 = checkmate - flag
draw = 0 # 0 = not a draw, 1 = draw - flag
#generating board game
visualboard, bking_state, brook_state, bknight_state, wking_state, size_board = Board()
#Calculating possible actions for each piece
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)
m=0 #counter
while checkmate == 0 and draw == 0 and check==-1:
R = 0 # Reward
# Actions & allowed_actions for black pieces (player 1)
a = np.concatenate([np.array(a_black_knight),np.array(a_black_rook), np.array(a_black_king)]) #0-7: knight, 36-43: king
allowed_a = np.where(a > 0)[0] #array with only allowed
x = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check)
#Q-table initialization
out1=np.maximum(0, W1.dot(x) + bias_W1) #nodes activation
Q= np.maximum(0, W2.dot(out1)+bias_W2) #calculating initial q values with a relu
#Action selection with epsilon greedy apporach
Possible_Action = []
greedy = int(np.random.rand() > epsilon_f)
if greedy:
#assinging q values
for i in allowed_a:
Possible_Action.append(Q[i])
Possible_Action = Possible_Action.index(max(Possible_Action))
action = allowed_a[Possible_Action] #using that index with higher Q
else:
action = np.random.choice(allowed_a) #Pick a random allowed action
#getting current and next strate
#Moving a black piece according to the selected action
if 36 <= action <= 43: #king
direction = action - 36
steps = 1
visualboard[bking_state[0],bking_state[1]]= 0
mov = mapking[direction, :] * steps
visualboard[bking_state[0]+mov[0],bking_state[1]+mov[1]]= 1
bking_state[0] = bking_state[0] + mov[0]
bking_state[1] = bking_state[1] + mov[1]
state2=bking_state
elif 0 <= action <= 7: #knight
direction = action
steps = 1
visualboard[bknight_state[0],bknight_state[1]]= 0
mov = mapknight[direction, :] * steps
visualboard[bknight_state[0]+mov[0],bknight_state[1]+mov[1]]= 3
bknight_state[0] = bknight_state[0] + mov[0]
bknight_state[1] = bknight_state[1] + mov[1]
else: #rook 8-35/0-3
if 8<=action<=14:
direction = 0
step=action-7
if 15<=action<=21:
direction = 1
step=action-14
if 22<=action<=28:
direction = 2
step=action-21
if 29<=action<=35:
direction = 3
step=action-28
visualboard[brook_state[0],brook_state[1]]= 0
mov = maprook[direction, :] * step
visualboard[brook_state[0]+mov[0],brook_state[1]+mov[1]]= 2
brook_state[0] = brook_state[0] + mov[0]
brook_state[1] = brook_state[1] + mov[1]
m+=1 #counter
#As a piece was moved, it is needed to calculate possible actions again
#Calculating possible actions for each piece
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)
#after moving a black piece it is the turn of the player 2 (doesn't use greedy just a random allowed action)
print(visualboard)
#But first, let's check if moving the black piece gives a checkmate
if np.sum(dfwhite_king) == 0 and (dfbrook_black_pieces[wking_state[0], wking_state[1]] == 1 or dfbknight_black_pieces[wking_state[0], wking_state[1]] == 1) :
#previous if evaluates that white king has no actions and it is check by a black piece
checkmate = 1
R=1 #reward of winning
out2delta = (R - Q[action]) * np.heaviside(Q[action], 0) #bp2 out-hidden
W2[action] = (W2[action] - (eta * out2delta * out1))
bias_W2[action] = (bias_W2[action] - (eta * out2delta))
out1delta = np.dot(W2[action], out2delta) * np.heaviside(out1, 0) #bp2 hidden-input
W1 = W1 - (eta * np.outer(out1delta,x))
bias_W1 = (bias_W1 - (eta * out1delta))
#Updating data for plotting
Saved_rewards[episode]= R #reward per game
avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward
m+=1 #counter
Saved_moves[episode] = m #moves per game
avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves
if checkmate:
break
#If it wasn't checkmate checks if it is a draw
elif np.sum(dfwhite_king) == 0 and dfbrook_black_pieces[wking_state[0], wking_state[1]] == 0 and dfbknight_black_pieces[wking_state[0], wking_state[1]] == 0:
#previous if evaluates if none pieces have possible actions
draw = 1
R=0.01 #reward of drawing
out2delta = (R - Q[action]) * np.heaviside(Q[action], 0) #bp2 out-hidden
W2[action] = (W2[action] - (eta * out2delta * out1))
bias_W2[action] = (bias_W2[action] - (eta * out2delta))
out1delta = np.dot(W2[action], out2delta) * np.heaviside(out1, 0) #bp2 hidden-input
W1 = W1 - (eta * np.outer(out1delta,x))
bias_W1 = (bias_W1 - (eta * out1delta))
#Updating data for plotting
Saved_rewards[episode]= R #reward per game
avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward
m+=1 #counter
Saved_moves[episode] = m #moves per game
avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves
if draw:
break
else: # Move enemy King randomly to a safe location
#random action selection
allowed_enemy_a = np.where(a_white_king > 0)[0]
a_help = int(np.ceil(np.random.rand() * allowed_enemy_a.shape[0]) - 1)
a_enemy = allowed_enemy_a[a_help]
direction = a_enemy
steps = 1
#actual movement
visualboard[wking_state[0],wking_state[1]]= 0
mov = mapking[direction, :] * steps
visualboard[wking_state[0]+mov[0],wking_state[1]+mov[1]]= 4
wking_state[0] = wking_state[0] + mov[0]
wking_state[1] = wking_state[1] + mov[1]
#As a piece was moved, it is needed to calculate possible actions again
#Calculating possible actions for each piece
dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)
#computing next Q
x_next = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check)
Q_next = np.maximum(0, W2.dot(out1)+bias_W2) #calculating initial q_next values with a relu
m+=1 #counter
# New Actions & new allowed_actions for black pieces (player 1)
new_a = np.concatenate([np.array(a_black_knight),np.array(a_black_rook), np.array(a_black_king)]) #0-7: knight, 36-43: king
allowed_a = np.where(a > 0)[0] #array with only new allowed
#Action selection with epsilon greedy apporach
Possible_Action = []
greedy = int(np.random.rand() > epsilon_f)
if greedy:
#assinging q values
for i in allowed_a:
Possible_Action.append(Q[i])
Possible_Action = Possible_Action.index(max(Possible_Action))
action = allowed_a[Possible_Action] #using that index with higher Q
else:
action = np.random.choice(allowed_a) #Pick a random allowed action
sarsaQ = ((R + (gamma * np.max(Q_next)) - Q[action]) * np.heaviside(Q[action], 0)) #BP out-hidden
W2[action] = (W2[action] - (eta * sarsaQ * out1))
bias_W2[action] = (bias_W2[action] - (eta * sarsaQ))
out1delta = np.dot(W2[action], sarsaQ) * np.heaviside(out1, 0) #bp2 hidden-input
W1 = W1 - (eta * np.outer(out1delta,x))
bias_W1 = (bias_W1 - (eta * out1delta))
#Updating data for plotting
Saved_rewards[episode]= R #reward per game
avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward
m+=1 #counter
Saved_moves[episode] = m #moves per game
avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves
Saved_Q[episode] = sarsaQ # action-state value
print(visualboard)
fontSize = 18
print("Results for Chess Game using SARSA:")
print("Average of the number of moves per game:")
# plots the running average of the number of moves per game
plt.plot(avg_saved_moves)
#set axis labels
plt.xlabel('Number of episodes', fontsize = fontSize)
plt.ylabel('Average Moves Per Game', fontsize = fontSize)
plt.show() #plot
print("Reward per game:")
#plot running average of rewards
#plt.plot(Average_Rewards)
plt.plot(Saved_rewards)
#set axis labels
plt.xlabel('Number of episodes', fontsize = fontSize)
plt.ylabel('Reward Per Game', fontsize = fontSize)
plt.show() #plot
print("Q(s,a) per game:")
#plot running average of rewards
#plt.plot(Average_Rewards)
plt.plot(Saved_Q)
#set axis labels
plt.xlabel('Number of episodes', fontsize = fontSize)
plt.ylabel('Q Per Game', fontsize = fontSize)
plt.show() #plot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment