jesuscmadrigal/RLCHESS.py

## RLCHESS.py
# -*- coding: utf-8 -*-
"""FinalCode_RL_chess.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/116XsQ7P0d22cQlLEwtmNMaIu3h_jCmlT

**Installing libraries**
"""

import numpy as np #for linear algebra
import numpy.matlib #for plots
import matplotlib.pyplot as plt #for plots

"""**Generate chess board**"""

def Board():

        #defining a current chess game intented to finish in 2 moves
        #enemy king
        wking_state= [1,0] #defining initial state for white knight

        bknight_state= [4,3] #defining initial state for black knight
        brook_state = [1,3] #defining initial state for black bishop 1 original 7,1
        bking_state = [2,3] #defining initial state for black king

        size_board=(8,8) #size of the board
        visualboard= np.zeros([8, 8], dtype=int)
        visualboard[bking_state[0],bking_state[1]]=1
        visualboard[brook_state[0],brook_state[1]]=2
        visualboard[bknight_state[0],bknight_state[1]]=3
        visualboard[wking_state[0],wking_state[1]]=4

        return visualboard, bking_state, brook_state, bknight_state, wking_state, size_board

visualboard, bking_state, brook_state, bknight_state, wking_state, size_board = Board()
visualboard

"""**Defining actions spaces of each piece**"""

#wking
wking_action_space = [(wking_state[0]-1, wking_state[1]),  # up
                      (wking_state[0]+1, wking_state[1]),  # down
                      (wking_state[0], wking_state[1]-1),  # left
                      (wking_state[0], wking_state[1]+1),  # right
                      (wking_state[0]-1, wking_state[1]-1),  # up-left
                      (wking_state[0]-1, wking_state[1]+1),  # up-right
                      (wking_state[0]+1, wking_state[1]-1),  # down-left
                      (wking_state[0]+1, wking_state[1]+1),  # down-right
                                                                             ]
wking_action_space=np.array(wking_action_space)
possible_wking_actions=8
All_possible_white_actions=possible_wking_actions

#bking
bking_action_space = [(bking_state[0]-1, bking_state[1]),  # up
                      (bking_state[0]+1, bking_state[1]),  # down
                      (bking_state[0], bking_state[1]-1),  # left
                      (bking_state[0], bking_state[1]+1),  # right
                      (bking_state[0]-1, bking_state[1]-1),  # up-left
                      (bking_state[0]-1, bking_state[1]+1),  # up-right
                      (bking_state[0]+1, bking_state[1]-1),  # down-left
                      (bking_state[0]+1, bking_state[1]+1),  # down-right
                                                                             ]
bking_action_space=np.array(bking_action_space)
possible_bking_actions=8


#brook
brook_action_space=[]
for amplitude in range(1, 8):
  brook_action_space.append((brook_state[0] + amplitude, brook_state[1]))  # down
  brook_action_space.append((brook_state[0]-amplitude, brook_state[1]))  # up
  brook_action_space.append((brook_state[0], brook_state[1]+amplitude))  # right
  brook_action_space.append((brook_state[0], brook_state[1]-amplitude))  # left

brook_action_space=np.array(brook_action_space)
possible_brook_actions=7*4


#bknight
bknight_action_space = [(bknight_state[0]-2, bknight_state[1]+1),  # up-up-right
                       (bknight_state[0]-1, bknight_state[1]+2),  # up-right-right
                       (bknight_state[0]+1, bknight_state[1]+2),  # down-right-right
                       (bknight_state[0]+2, bknight_state[1]+1),  # down-down-right
                       (bknight_state[0]+2, bknight_state[1]-1),  # down-down-left
                       (bknight_state[0]+1, bknight_state[1]-2),  # down-left-left
                       (bknight_state[0]-1, bknight_state[1]-2),  # up-left-left
                       (bknight_state[0]-2, bknight_state[1]-1)]  # up-up-left

bknight_action_space=np.array(bknight_action_space)
possible_bknight_actions=8


All_possible_black_actions=possible_bking_actions+possible_bknight_actions+\
                           possible_brook_actions

brook_action_space

"""**Defining allowed actions for each piece per move**

1.   **Black king**
"""

def degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state):
  s=size_board[0] #8

  dfblack_king = np.zeros([s, s], dtype=int) #matrix with black king possible position
  dfblack_king[bking_state[0], bking_state[1]] = 1

  dfbking_black_pieces= np.zeros([s, s], dtype=int)  # matrix with black pieces restrictions
  dfbking_black_pieces[bknight_state[0], bknight_state[1]] = 1 #position black knight
  dfbking_black_pieces[brook_state[0], brook_state[1]] = 1 #position black rook

  a_black_king = np.zeros([8, 1], dtype=int) #matrix for allowed actions

  #white action space
  wking_action_space = [(wking_state[0]-1, wking_state[1]),  # up
                      (wking_state[0]+1, wking_state[1]),  # down
                      (wking_state[0], wking_state[1]-1),  # left
                      (wking_state[0], wking_state[1]+1),  # right
                      (wking_state[0]-1, wking_state[1]-1),  # up-left
                      (wking_state[0]-1, wking_state[1]+1),  # up-right
                      (wking_state[0]+1, wking_state[1]-1),  # down-left
                      (wking_state[0]+1, wking_state[1]+1),  # down-right
                                                                             ]
  wking_action_space=np.array(wking_action_space)

  # allowed up
  if bking_state[0] > 0:
        if (bking_state[0] - 1 != bknight_state[0] or bking_state[1] != bknight_state[1]) and (bking_state[0] - 1 != brook_state[0] or bking_state[1] != brook_state[1]):
            dfbking_black_pieces[bking_state[0] - 1, bking_state[1]] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0] - 1, bking_state[1]] = 1
                a_black_king[0] = 1


  # allowed down
  if bking_state[0] < 7:
        if bking_state[0] + 1 != bknight_state[0] or bking_state[1] != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] != brook_state[1]:
            dfbking_black_pieces[bking_state[0] + 1, bking_state[1]] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0] + 1, bking_state[1]] = 1
                a_black_king[1] = 1
  # allowed left
  if bking_state[1] > 0:
        if bking_state[0] != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] != brook_state[0] or bking_state[1] - 1 != brook_state[1]:
            dfbking_black_pieces[bking_state[0], bking_state[1] - 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0], bking_state[1] - 1] = 1
                a_black_king[2] = 1

  # allowed right
  if bking_state[1] < 7:
        if bking_state[0] != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] != brook_state[0] or bking_state[1] + 1 != brook_state[1]:
            dfbking_black_pieces[bking_state[0], bking_state[1] + 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0], bking_state[1] + 1] = 1
                a_black_king[3] = 1

  # allowed up-left
  if bking_state[0] > 0 and bking_state[1] > 0:
        if bking_state[0] - 1 != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] - 1 != brook_state[0] or bking_state[1] - 1 != brook_state[1]:
            dfbking_black_pieces[bking_state[0] - 1, bking_state[1] - 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0] - 1, bking_state[1] - 1] = 1
                a_black_king[4] = 1
  # up-right
  if bking_state[0] > 0 and bking_state[1] < 7:
        if bking_state[0] - 1 != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] - 1 != brook_state[0] or bking_state[1] + 1 != brook_state[1]:
            dfbking_black_pieces[bking_state[0] - 1, bking_state[1] + 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] - 1 != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0] - 1, bking_state[1] + 1] = 1
                a_black_king[5] = 1
  # down-left
  if bking_state[0] < 7 and bking_state[1] > 0:
        if bking_state[0] + 1 != bknight_state[0] or bking_state[1] - 1 != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] - 1 != brook_state[1]:
            dfbking_black_pieces[bking_state[0] + 1, bking_state[1] - 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] - 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0] + 1, bking_state[1] - 1] = 1
                a_black_king[6] = 1
  # down-right
  if bking_state[0] < 7 and bking_state[1] < 7:
        if bking_state[0] + 1 != bknight_state[0] or bking_state[1] + 1 != bknight_state[1] or bking_state[0] + 1 != brook_state[0] or bking_state[1] + 1 != brook_state[1]:
            dfbking_black_pieces[bking_state[0] + 1, bking_state[1] + 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bking_state[0] + 1 != wking_action_space[i, 0] or bking_state[1] + 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_king[bking_state[0] + 1, bking_state[1] + 1] = 1
                a_black_king[7] = 1

 # previous location
  dfblack_king[bking_state[0], bking_state[1]] = 0

  return dfblack_king, a_black_king, dfbking_black_pieces

"""

2.   **Black Rook**

"""

def degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state):
  s=size_board[0] #8

  dfblack_rook = np.zeros([s, s], dtype=int) #matrix with black rook possible position
  dfblack_rook[brook_state[0], brook_state[1]] = 1

  dfbrook_black_pieces = np.zeros([s, s], dtype=int)  # matrix with black rook possible position cosnidering restrictions
  dfbrook_black_pieces[brook_state[0], brook_state[1]] = 1
  # Black Rook

  blocked = np.zeros(8, dtype=int) #position blocked by white or black pieces
  blocked2 = np.zeros(8, dtype=int) #position blocked by black king
  blocked3 = np.zeros(8, dtype=int) #position blocked by black knight

  a_black_rook = np.zeros([4 * (s - 1), 1], dtype=int) #matrix for allowed actions

  #white action space
  wking_action_space = [(wking_state[0]-1, wking_state[1]),  # up
                      (wking_state[0]+1, wking_state[1]),  # down
                      (wking_state[0], wking_state[1]-1),  # left
                      (wking_state[0], wking_state[1]+1),  # right
                      (wking_state[0]-1, wking_state[1]-1),  # up-left
                      (wking_state[0]-1, wking_state[1]+1),  # up-right
                      (wking_state[0]+1, wking_state[1]-1),  # down-left
                      (wking_state[0]+1, wking_state[1]+1),  # down-right
                                                                             ]
  wking_action_space=np.array(wking_action_space)

  #bking
  bking_action_space = [(bking_state[0]-1, bking_state[1]),  # up
                      (bking_state[0]+1, bking_state[1]),  # down
                      (bking_state[0], bking_state[1]-1),  # left
                      (bking_state[0], bking_state[1]+1),  # right
                      (bking_state[0]-1, bking_state[1]-1),  # up-left
                      (bking_state[0]-1, bking_state[1]+1),  # up-right
                      (bking_state[0]+1, bking_state[1]-1),  # down-left
                      (bking_state[0]+1, bking_state[1]+1),  # down-right
                                                                             ]
  bking_action_space=np.array(bking_action_space)

  for j in range(s):
        # allow_down
        if brook_state[0] + j < 7:
            if (brook_state[0] + j + 1 == bking_state[0] and brook_state[1] == bking_state[1]) or ((brook_state[0] + j + 1 == bknight_state[0] and brook_state[1] == bknight_state[1])) or ((brook_state[0] + j + 1 == wking_state[0] and brook_state[1] == wking_state[1])):
                blocked[0] = 1

            if brook_state[0] + j + 1 == bking_state[0] and brook_state[1] == bking_state[1]:
                blocked2[0] = 1
                dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = -1

            if brook_state[0] + j + 1 == bknight_state[0] and brook_state[1] == bknight_state[1]:
                blocked3[0] = 1
                dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = -1

            if blocked[0] == 0:
                tmp = np.zeros([8], dtype=int)
                for i in range(wking_action_space.shape[0]):
                    if brook_state[0] + j + 1 != wking_action_space[i, 0] or brook_state[1] != wking_action_space[i, 1]:
                        tmp[i] = 1
                if np.all(tmp):
                    dfblack_rook[brook_state[0] + j + 1, brook_state[1]] = 1
                    a_black_rook[j] = 1
                else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if brook_state[0] + j + 1 == bking_action_space[ii, 0] and brook_state[1] == bking_action_space[ii, 1]:
                                    dfblack_rook[brook_state[0] + j + 1, brook_state[1]] = 1
                                    a_black_rook[j] = 1
            if blocked2[0] == 0 and blocked3[0] == 0:
                dfbrook_black_pieces[brook_state[0] + j + 1, brook_state[1]] = 1

        # allow_up
        if brook_state[0] - j > 0:
            if (brook_state[0] - j - 1 == bking_state[0] and brook_state[1] == bking_state[1]) or ((brook_state[0] - j - 1 == bknight_state[0] and brook_state[1] == bknight_state[1])) or ((brook_state[0] - j - 1 == wking_state[0] and brook_state[1] == wking_state[1])):
                blocked[1] = 1

            if brook_state[0] - j - 1 == bking_state[0] and brook_state[1] == bking_state[1]:
                blocked2[1] = 1

            if brook_state[0] - j - 1 == bknight_state[0] and brook_state[1] == bknight_state[1]:
                blocked3[1] = 1

            if blocked[1] == 0:
                tmp = np.zeros([8], dtype=int)
                for i in range(wking_action_space.shape[0]):
                    if brook_state[0] - j - 1 != wking_action_space[i, 0] or brook_state[1] != wking_action_space[i, 1]:
                        tmp[i] = 1
                if np.all(tmp):
                    dfblack_rook[brook_state[0] - j - 1, brook_state[1]] = 1
                    a_black_rook[j+7] = 1
                else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if brook_state[0] - j - 1 == bking_action_space[ii, 0] and brook_state[1] == bking_action_space[ii, 1]:
                                    dfblack_rook[brook_state[0] - j - 1, brook_state[1]] = 1
                                    a_black_rook[j+7] = 1
            if blocked2[1] == 0 and blocked3[1] == 0:
                dfbrook_black_pieces[brook_state[0] - j - 1, brook_state[1]] = 1

        # allow_right
        if brook_state[1] + j < 7:
            if (brook_state[0] == bking_state[0] and brook_state[1] + j + 1 == bking_state[1]) or ((brook_state[0] == bknight_state[0] and brook_state[1] + j + 1 == bknight_state[1])) or ((brook_state[0] == wking_state[0] and brook_state[1] + j + 1 == wking_state[1])):
                blocked[2] = 1

            if brook_state[0] == bking_state[0] and brook_state[1] + j + 1 == bking_state[1]:
                blocked2[2] = 1

            if brook_state[0] == bknight_state[0] and brook_state[1] + j + 1 == bknight_state[1]:
                blocked3[2] = 1

            if blocked[2] == 0:
                tmp = np.zeros([8], dtype=int)
                for i in range(wking_action_space.shape[0]):
                    if brook_state[0] != wking_action_space[i, 0] or brook_state[1] + j + 1 != wking_action_space[i, 1]:
                        tmp[i] = 1
                if np.all(tmp):
                    dfblack_rook[brook_state[0], brook_state[1] + j + 1] = 1
                    a_black_rook[j+14] = 1
                else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if brook_state[0] == bking_action_space[ii, 0] and brook_state[1] + j + 1 == bking_action_space[ii, 1]:
                                    dfblack_rook[brook_state[0], brook_state[1] + j + 1] = 1
                                    a_black_rook[j+14] = 1
            if blocked2[2] == 0 and blocked3[2] == 0:
                dfbrook_black_pieces[brook_state[0], brook_state[1] + j + 1] = 1

        # allow_left
        if brook_state[1] - j > 0:
            if (brook_state[0] == bking_state[0] and brook_state[1] - j - 1 == bking_state[1]) or ((brook_state[0] == bknight_state[0] and brook_state[1] - j - 1 == bknight_state[1])) or ((brook_state[0] == wking_state[0] and brook_state[1] - j - 1 == wking_state[1])):
                blocked[3] = 1

            if brook_state[0] == bking_state[0] and brook_state[1] - j - 1 == bking_state[1]:
                blocked2[3] = 1

            if brook_state[0] == bknight_state[0] and brook_state[1] - j - 1 == bknight_state[1]:
                blocked3[3] = 1

            if blocked[3] == 0:
                tmp = np.zeros([8], dtype=int)
                for i in range(wking_action_space.shape[0]):
                    if brook_state[0] != wking_action_space[i, 0] or brook_state[1] - j - 1 != wking_action_space[i, 1]:
                        tmp[i] = 1
                if np.all(tmp):
                    dfblack_rook[brook_state[0], brook_state[1] - j - 1] = 1
                    a_black_rook[j+21] = 1
                else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if brook_state[0] == bking_action_space[ii, 0] and brook_state[1] - j - 1 == bking_action_space[ii, 1]:
                                    dfblack_rook[brook_state[0], brook_state[1] - j - 1] = 1
                                    a_black_rook[j+21] = 1
            if blocked2[3] == 0 and blocked3[3] == 0:
                dfbrook_black_pieces[brook_state[0], brook_state[1] - j - 1] = 1

  dfblack_rook[brook_state[0], brook_state[1]] = 0
  dfblack_rook[bking_state[0], bking_state[1]] = 0
  dfblack_rook[bknight_state[0], bknight_state[1]] = 0


  if wking_state[0] != np.inf:
    dfblack_rook[wking_state[0], wking_state[1]] = 0

  return dfblack_rook, a_black_rook, dfbrook_black_pieces

"""

3.   **Black Knight**

"""

def degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state):
  s=size_board[0] #8

  dfblack_knight = np.zeros([s, s], dtype=int) #matrix with black knight possible position
  dfblack_knight[bknight_state[0], bknight_state[1]] = 1

  dfbknight_black_pieces = np.zeros([s, s], dtype=int)  # matrix with black rook possible position cosnidering restrictions
  dfbknight_black_pieces[bknight_state[0], bknight_state[1]] = 1
  # Black Knight

  a_black_knight = np.zeros([8, 1], dtype=int) #matrix for allowed action

  #white action space
  wking_action_space = [(wking_state[0]-1, wking_state[1]),  # up
                      (wking_state[0]+1, wking_state[1]),  # down
                      (wking_state[0], wking_state[1]-1),  # left
                      (wking_state[0], wking_state[1]+1),  # right
                      (wking_state[0]-1, wking_state[1]-1),  # up-left
                      (wking_state[0]-1, wking_state[1]+1),  # up-right
                      (wking_state[0]+1, wking_state[1]-1),  # down-left
                      (wking_state[0]+1, wking_state[1]+1),  # down-right
                                                                             ]
  wking_action_space=np.array(wking_action_space)

  #bking
  bking_action_space = [(bking_state[0]-1, bking_state[1]),  # up
                      (bking_state[0]+1, bking_state[1]),  # down
                      (bking_state[0], bking_state[1]-1),  # left
                      (bking_state[0], bking_state[1]+1),  # right
                      (bking_state[0]-1, bking_state[1]-1),  # up-left
                      (bking_state[0]-1, bking_state[1]+1),  # up-right
                      (bking_state[0]+1, bking_state[1]-1),  # down-left
                      (bking_state[0]+1, bking_state[1]+1),  # down-right
                                                                             ]
  bking_action_space=np.array(bking_action_space)

  # up-up-right
  if bknight_state[0] -1 > 0 and bknight_state[1] < 7:
        if bknight_state[0] - 2 != bking_state[0] or bknight_state[1] + 1 != bking_state[1] or bknight_state[0] - 2 != brook_state[0] or bknight_state[1] + 1 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] - 2, bknight_state[1] + 1] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] - 2 != wking_action_space[i, 0] or bknight_state[1] + 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] - 2, bknight_state[1] + 1] = 1
                a_black_knight[0] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] - 2 == bking_action_space[ii, 0] and bknight_state[1] + 1 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] - 2, bknight_state[1] + 1] = 1
                                    a_black_knight[0] = 1

  # up-right-right
  if bknight_state[0] > 0 and bknight_state[1] + 1 < 7:
        if bknight_state[0] - 1 != bking_state[0] or bknight_state[1] + 2 != bking_state[1] or bknight_state[0] - 1 != brook_state[0] or bknight_state[1] + 2 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] - 1, bknight_state[1] + 2] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] - 1 != wking_action_space[i, 0] or bknight_state[1] + 2 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] - 1, bknight_state[1] + 2] = 1
                a_black_knight[1] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] - 1 == bking_action_space[ii, 0] and bknight_state[1] + 2 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] - 1, bknight_state[1] + 2] = 1
                                    a_black_knight[1] = 1
  # down-right-right
  if bknight_state[0] < 7 and bknight_state[1] + 1 < 7:
        if bknight_state[0] + 1 != bking_state[0] or bknight_state[1] + 2 != bking_state[1] or bknight_state[0] + 1 != brook_state[0] or bknight_state[1] + 2 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] + 1, bknight_state[1] + 2] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] + 1 != wking_action_space[i, 0] or bknight_state[1] + 2 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] + 1, bknight_state[1] + 2] = 1
                a_black_knight[2] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] + 1 == bking_action_space[ii, 0] and bknight_state[1] + 2 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] + 1, bknight_state[1] + 2] = 1
                                    a_black_knight[2] = 1
  # down-down-right
  if bknight_state[0] + 1 < 7 and bknight_state[1] < 7:
        if bknight_state[0] + 2 != bking_state[0] or bknight_state[1] + 1 != bking_state[1] or bknight_state[0] + 2 != brook_state[0] or bknight_state[1] + 1 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] + 2, bknight_state[1] + 1] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] + 2 != wking_action_space[i, 0] or bknight_state[1] + 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] + 2, bknight_state[1] + 1] = 1
                a_black_knight[3] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] + 2 == bking_action_space[ii, 0] and bknight_state[1] + 1 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] + 2, bknight_state[1] + 1] = 1
                                    a_black_knight[3] = 1
  # down-down-left
  if bknight_state[0] + 1 < 7 and bknight_state[1] > 0:
        if bknight_state[0] + 2 != bking_state[0] or bknight_state[1] - 1 != bking_state[1] or bknight_state[0] + 2 != brook_state[0] or bknight_state[1] - 1 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] + 2, bknight_state[1] - 1] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] + 2 != wking_action_space[i, 0] or bknight_state[1] - 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] + 2, bknight_state[1] - 1] = 1
                a_black_knight[4] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] + 2 == bking_action_space[ii, 0] and bknight_state[1] - 1 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] + 2, bknight_state[1] - 1] = 1
                                    a_black_knight[4] = 1
  # down-left-left
  if bknight_state[0] < 7 and bknight_state[1] - 1 > 0:
        if bknight_state[0] + 1 != bking_state[0] or bknight_state[1] - 2 != bking_state[1] or bknight_state[0] + 1 != brook_state[0] or bknight_state[1] - 2 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] + 1, bknight_state[1] - 2] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] + 1 != wking_action_space[i, 0] or bknight_state[1] - 2 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] + 1, bknight_state[1] - 2] = 1
                a_black_knight[5] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] + 1 == bking_action_space[ii, 0] and bknight_state[1] - 2 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] + 1, bknight_state[1] - 2] = 1
                                    a_black_knight[5] = 1

  # up-left-left
  if bknight_state[0] > 0 and bknight_state[1] - 1 > 0:
        if bknight_state[0] - 1 != bking_state[0] or bknight_state[1] - 2 != bking_state[1] or bknight_state[0] - 1 != brook_state[0] or bknight_state[1] - 2 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] - 1, bknight_state[1] - 2] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] - 1 != wking_action_space[i, 0] or bknight_state[1] - 2 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] - 1, bknight_state[1] - 2] = 1
                a_black_knight[6] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] - 1 == bking_action_space[ii, 0] and bknight_state[1] - 2 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] - 1, bknight_state[1] - 2] = 1
                                    a_black_knight[6] = 1
  # up-up-left
  if bknight_state[0] - 1 > 0 and bknight_state[1] > 0:
        if bknight_state[0] - 2 != bking_state[0] or bknight_state[1] - 1 != bking_state[1] or bknight_state[0] - 2 != brook_state[0] or bknight_state[1] - 1 != brook_state[1]:
            dfbknight_black_pieces[bknight_state[0] - 2, bknight_state[1] - 1] = 1
            # It is not the Black King and Rook's position
            tmp = np.zeros([wking_action_space.shape[0]], dtype=int)
            for i in range(wking_action_space.shape[0]):
                if bknight_state[0] - 2 != wking_action_space[i, 0] or bknight_state[1] - 1 != wking_action_space[i, 1]:
                    tmp[i] = 1

            # check if it will be within the reach of White King
            if np.all(tmp):
                dfblack_knight[bknight_state[0] - 2, bknight_state[1] - 1] = 1
                a_black_knight[7] = 1
            else:
                    for i in range(len(tmp)):
                        if tmp[i] == 0:
                            for ii in range(bking_action_space.shape[0]):
                                if bknight_state[0] - 2 == bking_action_space[ii, 0] and bknight_state[1] - 1 == bking_action_space[ii, 1]:
                                    dfblack_knight[bknight_state[0] - 2, bknight_state[1] - 1] = 1
                                    a_black_knight[7] = 1

  # previous location
  dfblack_knight[bknight_state[0], bknight_state[1]] = 0

  return dfblack_knight, a_black_knight, dfbknight_black_pieces

"""

4.   **White King**

"""

def degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state):
  s=size_board[0] #8

  dfwhite_king = np.zeros([s, s], dtype=int) #matrix with with king possible position
  dfwhite_king[wking_state[0], wking_state[1]] = 1

  dfwking_black_pieces= np.zeros([s, s], dtype=int)  # matrix with black pieces restrictions
  dfwking_black_pieces[bknight_state[0], bknight_state[1]] = 1 #position black knight
  dfwking_black_pieces[brook_state[0], brook_state[1]] = 1 #position black rook
  dfwking_black_pieces[bking_state[0], bking_state[1]] = 1 #position black king

  a_white_king = np.zeros([8, 1], dtype=int) #matrix for allowed actions

  #brook
  brook_action_space=[]
  for amplitude in range(1, 8):
    brook_action_space.append((brook_state[0] + amplitude, brook_state[1]))  # down
    brook_action_space.append((brook_state[0]-amplitude, brook_state[1]))  # up
    brook_action_space.append((brook_state[0], brook_state[1]+amplitude))  # right
    brook_action_space.append((brook_state[0], brook_state[1]-amplitude))  # left

  brook_action_space=np.array(brook_action_space)

  #bknight
  bknight_action_space = [(bknight_state[0]-2, bknight_state[1]+1),  # up-up-right
                       (bknight_state[0]-1, bknight_state[1]+2),  # up-right-right
                       (bknight_state[0]+1, bknight_state[1]+2),  # down-right-right
                       (bknight_state[0]+2, bknight_state[1]+1),  # down-down-right
                       (bknight_state[0]+2, bknight_state[1]-1),  # down-down-left
                       (bknight_state[0]+1, bknight_state[1]-2),  # down-left-left
                       (bknight_state[0]-1, bknight_state[1]-2),  # up-left-left
                       (bknight_state[0]-2, bknight_state[1]-1)]  # up-up-left

  bknight_action_space=np.array(bknight_action_space)

  #bking
  bking_action_space = [(bking_state[0]-1, bking_state[1]),  # up
                      (bking_state[0]+1, bking_state[1]),  # down
                      (bking_state[0], bking_state[1]-1),  # left
                      (bking_state[0], bking_state[1]+1),  # right
                      (bking_state[0]-1, bking_state[1]-1),  # up-left
                      (bking_state[0]-1, bking_state[1]+1),  # up-right
                      (bking_state[0]+1, bking_state[1]-1),  # down-left
                      (bking_state[0]+1, bking_state[1]+1),  # down-right
                                                                             ]
  bking_action_space=np.array(bking_action_space)

  # allowed up
  if wking_state[0] > 0:
        if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] != brook_state[1]):
            dfwking_black_pieces[wking_state[0] - 1, wking_state[1]] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] == brook_action_space[ii, 1]:
                        tmp[i] = 0


            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0] - 1, wking_state[1]] = 1
                a_white_king[0] = 1


  # allowed down
  if wking_state[0] < 7:
        if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] != brook_state[1]):
            dfwking_black_pieces[wking_state[0] + 1, wking_state[1]] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] == brook_action_space[ii, 1]:
                        tmp[i] = 0

            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0] + 1, wking_state[1]] = 1
                a_white_king[1] = 1
  # allowed left
  if wking_state[1] > 0:
        if (wking_state[0] != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] != brook_state[0] or wking_state[1] - 1 != brook_state[1]):
            dfwking_black_pieces[wking_state[0], wking_state[1] - 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]:
                        tmp[i] = 0

            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0], wking_state[1] - 1] = 1
                a_white_king[2] = 1

  # allowed right
  if wking_state[1] < 7:
        if (wking_state[0] != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] != brook_state[0] or wking_state[1] + 1 != brook_state[1]):
            dfwking_black_pieces[wking_state[0], wking_state[1] + 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]:
                        tmp[i] = 0

            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0], wking_state[1] + 1] = 1
                a_white_king[3] = 1

  # allowed up-left
  if wking_state[0] > 0 and wking_state[1] > 0:
        if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] - 1 != brook_state[1]):
            dfwking_black_pieces[wking_state[0] - 1, wking_state[1] - 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]:
                        tmp[i] = 0

            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0] - 1, wking_state[1] - 1] = 1
                a_white_king[4] = 1
  # up-right
  if wking_state[0] > 0 and wking_state[1] < 7:
        if (wking_state[0] - 1 != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] - 1 != brook_state[0] or wking_state[1] + 1 != brook_state[1]):
            dfwking_black_pieces[wking_state[0] - 1, wking_state[1] + 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] - 1 != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] - 1 != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] - 1 == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]:
                        tmp[i] = 0

            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0] - 1, wking_state[1] + 1] = 1
                a_white_king[5] = 1
  # down-left
  if wking_state[0] < 7 and wking_state[1] > 0:
        if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] - 1 != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] - 1 != brook_state[1]):
            dfwking_black_pieces[wking_state[0] + 1, wking_state[1] - 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] - 1 != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] - 1 != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] - 1 == brook_action_space[ii, 1]:
                        tmp[i] = 0

            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0] + 1, wking_state[1] - 1] = 1
                a_white_king[6] = 1
  # down-right
  if wking_state[0] < 7 and wking_state[1] < 7:
        if (wking_state[0] + 1 != bknight_state[0] or wking_state[1] + 1 != bknight_state[1]) and (wking_state[0] + 1 != brook_state[0] or wking_state[1] + 1 != brook_state[1]):
            dfwking_black_pieces[wking_state[0] + 1, wking_state[1] + 1] = 1
            # It is not the Black Knight and Rook's position
            tmp = np.zeros([bking_action_space.shape[0]], dtype=int)
            for i in range(bking_action_space.shape[0]):
                if (wking_state[0] + 1 != bking_action_space[i, 0] or wking_state[1] + 1 != bking_action_space[i, 1]) and (wking_state[0] + 1 != bknight_action_space[i, 0] or wking_state[1] + 1 != bknight_action_space[i, 1]):
                    tmp[i] = 1
                    for ii in range(brook_action_space.shape[0]):
                      if wking_state[0] + 1 == brook_action_space[ii, 0] and wking_state[1] + 1 == brook_action_space[ii, 1]:
                        tmp[i] = 0

            # check if it will be within the reach of black King
            if np.all(tmp):
                dfwhite_king[wking_state[0] + 1, wking_state[1] + 1] = 1
                a_white_king[7] = 1

 # previous location
  dfwhite_king[wking_state[0], wking_state[1]] = 0

  check = -1
  if dfbrook_black_pieces[wking_state[0], wking_state[1]] == 1 or dfbknight_black_pieces[wking_state[0], wking_state[1]] == 1:
        check = 1

  return dfwhite_king, a_white_king, check

dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)

dfbrook_black_pieces

visualboard

def states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check):

    s=size_board[0] #8

    # Degrees of freedom of the Enemy King
    wking_dof = len(np.where(dfwhite_king == 1)[0])
    #finding states in board
    s_bking = np.array(visualboard == 1).astype(int).reshape(-1)
    s_brook = np.array(visualboard == 2).astype(int).reshape(-1)
    s_bknight = np.array(visualboard == 3).astype(int).reshape(-1)
    s_wking = np.array(visualboard == 4).astype(int).reshape(-1)
    #concatenating states in x
    x = np.concatenate([s_bking, s_brook, s_bknight, s_wking, [check], [wking_dof]])

    return x

x = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check)
n_input_layer = 258  # Number of neurons of the input layer.
n_hidden_layer = 200  # Number of neurons of the hidden layer
n_output_layer = 44  # Number of neurons of the output layer.
W1=np.random.uniform(0,1,(n_hidden_layer,n_input_layer)) #initializing random weights
W1=np.divide(W1,np.matlib.repmat(np.sum(W1,1)[:,None],1,n_input_layer))
W2=np.random.uniform(0,1,(n_output_layer,n_hidden_layer))
W2=np.divide(W2,np.matlib.repmat(np.sum(W2,1)[:,None],1,n_hidden_layer))
bias_W1=np.zeros((n_hidden_layer,)) #initializing bias in zero
bias_W2=np.zeros((n_output_layer,))
eta = 0.0035      #learning rate
Alpha = 0.0001 #exponential
beta = 0.00005    #epsilon discount factor

"""**Parameters**"""

epsilon_0 = 0.2   #epsilon for the e-greedy policy
gamma = 0.85      #SARSA Learning discount factor

total_episodes = 10 #Number of games, each game ends when we have a checkmate or a draw

"""**Mapping directions for black pieces**"""

mapking = np.array([[-1, 0],
                [1, 0],
                [0, -1],
                [0, 1],
                [-1, -1],
                [-1, 1],
                [1, -1],
                [1, 1]])
maprook = np.array([[1, 0],
                [-1, 0],
                [0, 1],
                [0, -1]])
mapknight = np.array([[-2, 1],
                [-1, 2],
                [1, 2],
                [2, 1],
                [2, -1],
                [1, -2],
                [-1, -2],
                [-2, -1]])

#Preparing data for further plotting and analysis
Saved_rewards = np.zeros([total_episodes])
avg_saved_rewards = np.zeros([total_episodes])
Saved_moves = np.zeros([total_episodes])
avg_saved_moves = np.zeros([total_episodes])
Saved_Q = np.zeros([total_episodes])
Saved_actionspolicies = np.zeros([total_episodes])

"""**SARSA Algorithm implementation**"""

#Starting
for episode in range(total_episodes):
        print(f"episode: {episode}")
        epsilon_f = epsilon_0 / (1 + beta * episode) #epsilon is discounting per iteration to have less probability to explore
        checkmate = 0  # 0 = not a checkmate, 1 = checkmate - flag
        draw = 0  # 0 = not a draw, 1 = draw - flag

        #generating board game
        visualboard, bking_state, brook_state, bknight_state, wking_state, size_board = Board()

        #Calculating possible actions for each piece
        dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
        dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
        dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
        dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)

        m=0 #counter

        while checkmate == 0 and draw == 0 and check==-1:

            R = 0  # Reward

            # Actions & allowed_actions for black pieces (player 1)
            a = np.concatenate([np.array(a_black_knight),np.array(a_black_rook), np.array(a_black_king)]) #0-7: knight, 36-43: king
            allowed_a = np.where(a > 0)[0] #array with only allowed
            x = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check)

            #Q-table initialization
            out1=np.maximum(0, W1.dot(x) + bias_W1) #nodes activation
            Q= np.maximum(0, W2.dot(out1)+bias_W2) #calculating initial q values with a relu

            #Action selection with epsilon greedy apporach
            Possible_Action = []
            greedy = int(np.random.rand() > epsilon_f)
            if greedy:
               #assinging q values
               for i in allowed_a:
                    Possible_Action.append(Q[i])
               Possible_Action = Possible_Action.index(max(Possible_Action))
               action = allowed_a[Possible_Action] #using that index with higher Q
            else:
                action = np.random.choice(allowed_a) #Pick a random  allowed action

            #getting current and next strate
            #Moving a black piece according to the selected action

            if 36 <= action <= 43: #king
             direction = action - 36
             steps = 1
             visualboard[bking_state[0],bking_state[1]]= 0
             mov = mapking[direction, :] * steps
             visualboard[bking_state[0]+mov[0],bking_state[1]+mov[1]]= 1

             bking_state[0] = bking_state[0] + mov[0]
             bking_state[1] = bking_state[1] + mov[1]
             state2=bking_state

            elif 0 <= action <= 7: #knight
             direction = action
             steps = 1
             visualboard[bknight_state[0],bknight_state[1]]= 0
             mov = mapknight[direction, :] * steps
             visualboard[bknight_state[0]+mov[0],bknight_state[1]+mov[1]]= 3
             bknight_state[0] = bknight_state[0] + mov[0]
             bknight_state[1] = bknight_state[1] + mov[1]

            else: #rook 8-35/0-3
             if 8<=action<=14:
              direction = 0
              step=action-7
             if 15<=action<=21:
              direction = 1
              step=action-14
             if 22<=action<=28:
              direction = 2
              step=action-21
             if 29<=action<=35:
              direction = 3
              step=action-28

             visualboard[brook_state[0],brook_state[1]]= 0
             mov = maprook[direction, :] * step
             visualboard[brook_state[0]+mov[0],brook_state[1]+mov[1]]= 2

             brook_state[0] = brook_state[0] + mov[0]
             brook_state[1] = brook_state[1] + mov[1]

            m+=1 #counter

            #As a piece was moved, it is needed to calculate possible actions again
            #Calculating possible actions for each piece
            dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
            dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
            dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
            dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)

            #after moving a black piece it is the turn of the player 2 (doesn't use greedy just a random allowed action)
            print(visualboard)
            #But first, let's check if moving the black piece gives a checkmate
            if np.sum(dfwhite_king) == 0 and (dfbrook_black_pieces[wking_state[0], wking_state[1]] == 1 or dfbknight_black_pieces[wking_state[0], wking_state[1]] == 1) :
              #previous if evaluates that white king has no actions and it is check by a black piece
              checkmate = 1
              R=1 #reward of winning

              out2delta = (R - Q[action]) * np.heaviside(Q[action], 0)  #bp2 out-hidden
              W2[action] = (W2[action] - (eta * out2delta * out1))
              bias_W2[action] = (bias_W2[action] - (eta * out2delta))
              out1delta = np.dot(W2[action], out2delta) * np.heaviside(out1, 0) #bp2 hidden-input
              W1 = W1 - (eta * np.outer(out1delta,x))
              bias_W1 = (bias_W1 -  (eta * out1delta))


              #Updating data for plotting
              Saved_rewards[episode]= R #reward per game
              avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward
              m+=1 #counter
              Saved_moves[episode] = m #moves per game
              avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves

              if checkmate:
                    break

            #If it wasn't checkmate checks if it is a draw
            elif np.sum(dfwhite_king) == 0 and dfbrook_black_pieces[wking_state[0], wking_state[1]] == 0 and dfbknight_black_pieces[wking_state[0], wking_state[1]] == 0:
              #previous if evaluates if none pieces have possible actions
              draw = 1
              R=0.01 #reward of drawing

              out2delta = (R - Q[action]) * np.heaviside(Q[action], 0)  #bp2 out-hidden
              W2[action] = (W2[action] - (eta * out2delta * out1))
              bias_W2[action] = (bias_W2[action] - (eta * out2delta))
              out1delta = np.dot(W2[action], out2delta) * np.heaviside(out1, 0) #bp2 hidden-input
              W1 = W1 - (eta * np.outer(out1delta,x))
              bias_W1 = (bias_W1 -  (eta * out1delta))

              #Updating data for plotting
              Saved_rewards[episode]= R #reward per game
              avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward
              m+=1 #counter
              Saved_moves[episode] = m #moves per game
              avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves

              if draw:
                    break
            else: # Move enemy King randomly to a safe location
              #random action selection
              allowed_enemy_a = np.where(a_white_king > 0)[0]
              a_help = int(np.ceil(np.random.rand() * allowed_enemy_a.shape[0]) - 1)
              a_enemy = allowed_enemy_a[a_help]
              direction = a_enemy
              steps = 1
              #actual movement
              visualboard[wking_state[0],wking_state[1]]= 0
              mov = mapking[direction, :] * steps
              visualboard[wking_state[0]+mov[0],wking_state[1]+mov[1]]= 4
              wking_state[0] = wking_state[0] + mov[0]
              wking_state[1] = wking_state[1] + mov[1]

            #As a piece was moved, it is needed to calculate possible actions again
            #Calculating possible actions for each piece
            dfblack_king, a_black_king, dfbking_black_pieces=degree_freedom_black_king(wking_state,bking_state,bknight_state,brook_state)
            dfblack_rook, a_black_rook, dfbrook_black_pieces=degree_freedom_black_rook(wking_state,bking_state,bknight_state,brook_state)
            dfblack_knight, a_black_knight, dfbknight_black_pieces=degree_freedom_black_knight(wking_state,bking_state,bknight_state,brook_state)
            dfwhite_king, a_white_king, check = degree_freedom_white_king(wking_state, bknight_state, brook_state, bking_state)

            #computing next Q
            x_next = states(bknight_state, brook_state, bking_state, wking_state, dfwhite_king, visualboard, check)
            Q_next = np.maximum(0, W2.dot(out1)+bias_W2) #calculating initial q_next values with a relu

            m+=1 #counter

            # New Actions & new allowed_actions for black pieces (player 1)
            new_a = np.concatenate([np.array(a_black_knight),np.array(a_black_rook), np.array(a_black_king)]) #0-7: knight, 36-43: king
            allowed_a = np.where(a > 0)[0] #array with only new allowed

            #Action selection with epsilon greedy apporach
            Possible_Action = []
            greedy = int(np.random.rand() > epsilon_f)
            if greedy:
               #assinging q values
               for i in allowed_a:
                    Possible_Action.append(Q[i])
               Possible_Action = Possible_Action.index(max(Possible_Action))
               action = allowed_a[Possible_Action] #using that index with higher Q
            else:
                action = np.random.choice(allowed_a) #Pick a random  allowed action


            sarsaQ = ((R + (gamma * np.max(Q_next)) - Q[action]) * np.heaviside(Q[action], 0)) #BP out-hidden
            W2[action] = (W2[action] - (eta * sarsaQ * out1))
            bias_W2[action] = (bias_W2[action] - (eta * sarsaQ))
            out1delta = np.dot(W2[action], sarsaQ) * np.heaviside(out1, 0) #bp2 hidden-input
            W1 = W1 - (eta * np.outer(out1delta,x))
            bias_W1 = (bias_W1 -  (eta * out1delta))

            #Updating data for plotting
            Saved_rewards[episode]= R #reward per game
            avg_saved_rewards[episode] = np.mean(Saved_rewards[:episode]) #average reward
            m+=1 #counter
            Saved_moves[episode] = m #moves per game
            avg_saved_moves[episode] = np.mean(Saved_moves[:episode]) #average moves
            Saved_Q[episode] = sarsaQ # action-state value
            print(visualboard)

fontSize = 18

print("Results for Chess Game using SARSA:")

print("Average of the number of moves per game:")

# plots the running average of the number of moves per game
plt.plot(avg_saved_moves)
#set axis labels
plt.xlabel('Number of episodes', fontsize = fontSize)
plt.ylabel('Average Moves Per Game', fontsize = fontSize)
plt.show() #plot

print("Reward per game:")

#plot running average of rewards
#plt.plot(Average_Rewards)
plt.plot(Saved_rewards)
#set axis labels
plt.xlabel('Number of episodes', fontsize = fontSize)
plt.ylabel('Reward Per Game', fontsize = fontSize)
plt.show() #plot

print("Q(s,a) per game:")

#plot running average of rewards
#plt.plot(Average_Rewards)
plt.plot(Saved_Q)
#set axis labels
plt.xlabel('Number of episodes', fontsize = fontSize)
plt.ylabel('Q Per Game', fontsize = fontSize)
plt.show() #plot