Skip to content

Instantly share code, notes, and snippets.

@cpatdowling
Last active April 27, 2021 00:50
Show Gist options
  • Save cpatdowling/1ff5583904e51ae86cfe2288107a81c0 to your computer and use it in GitHub Desktop.
Save cpatdowling/1ff5583904e51ae86cfe2288107a81c0 to your computer and use it in GitHub Desktop.
A linear transition kernel akin to continuous voting for a multi-agent MDP
import numpy as np
def return_linear_pdf(n):
#n is the number of dimensions/players in the joint action
#this describes a n-dimensional linear surface defined from [0,1] that integrates to 1
P = lambda x: (2.0/float(n))*np.sum(x)
return(P)
def return_linear_cdf(n):
#cummulative distribution function for linear PDF
#C(X <= z) is the probability of a multivariate linear RV X being less or eq to z
C = lambda x: (1.0/float(n))*np.sum(np.power(x, 2))
return(C)
def scale_action(action, max_action=1.0, min_action=-1.0):
#srescale joint n x 1 action vector to 0,1 interval to keep cdf and pdf simple
return((action - min_action)/(max_action - min_action))
#example
#we will say that a random variable X is linearly distributed, this like
#continous voting, where all 0's implies a 0 probability of X being true
#and all 1's implies a probability of 1 that X is true
#we'll use this in an MDP context, where if X is true, the MDP transitions out
#of the current state with uniform probability across other states
#and if X is false, the MDP does not transition
n = 5 #number of agents
#these are lambda functions on action vectors in [0,1] of size 5
PDF = return_linear_pdf(5) #might want this to plot
CDF = return_linear_cdf(5) #this gives probability of X <= z for some fixed z
#actions are defined over the 0,1 interval, but can be normalized to 0,1
#for this transition kernel
#suppose the learned policies choose the following joint action, for example
action = np.array([1.0, -0.25, 0.1, -0.89, 0.03]) #actions here defined from -1 to 1
scaled_action = scale_action(action) #default min and max is -1 and 1
print(scaled_action)
cdf = CDF(scaled_action)
#cdf gives value of biased coin flip with probability of event being true
#can simulate with uniform dist
#let current state s=1, and M total states
unif_sample = np.random.uniform(0,1)
if unif_sample <= cdf:
#transition to new state
transition = True
#transition amongst uniformly
new_state = np.random.choice([s for s in states if s not current_state])
else:
#remain at current state
transition = False
new_state = current_state
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment