cpatdowling/linear_transition_kernel.py

## linear_transition_kernel.py
import numpy as np

def return_linear_pdf(n):
    #n is the number of dimensions/players in the joint action
    #this describes a n-dimensional linear surface defined from [0,1] that integrates to 1
    P = lambda x: (2.0/float(n))*np.sum(x)
    return(P)

def return_linear_cdf(n):
    #cummulative distribution function for linear PDF
    #C(X <= z) is the probability of a multivariate linear RV X being less or eq to z
    C = lambda x: (1.0/float(n))*np.sum(np.power(x, 2))
    return(C)

def scale_action(action, max_action=1.0, min_action=-1.0):
    #srescale joint n x 1 action vector to 0,1 interval to keep cdf and pdf simple
    return((action - min_action)/(max_action - min_action))

#example
#we will say that a random variable X is linearly distributed, this like
#continous voting, where all 0's implies a 0 probability of X being true
#and all 1's implies a probability of 1 that X is true

#we'll use this in an MDP context, where if X is true, the MDP transitions out
#of the current state with uniform probability across other states
#and if X is false, the MDP does not transition

n = 5 #number of agents
#these are lambda functions on action vectors in [0,1] of size 5
PDF = return_linear_pdf(5) #might want this to plot
CDF = return_linear_cdf(5) #this gives probability of X <= z for some fixed z

#actions are defined over the 0,1 interval, but can be normalized to 0,1
#for this transition kernel
#suppose the learned policies choose the following joint action, for example
action = np.array([1.0, -0.25, 0.1, -0.89, 0.03]) #actions here defined from -1 to 1
scaled_action = scale_action(action) #default min and max is -1 and 1
print(scaled_action)
cdf = CDF(scaled_action)

#cdf gives value of biased coin flip with probability of event being true
#can simulate with uniform dist
#let current state s=1, and M total states
unif_sample = np.random.uniform(0,1)
if unif_sample <= cdf:
    #transition to new state
    transition = True
    #transition amongst uniformly
    new_state = np.random.choice([s for s in states if s not current_state])
else:
    #remain at current state
    transition = False
    new_state = current_state
	import numpy as np

	def return_linear_pdf(n):
	#n is the number of dimensions/players in the joint action
	#this describes a n-dimensional linear surface defined from [0,1] that integrates to 1
	P = lambda x: (2.0/float(n))*np.sum(x)
	return(P)

	def return_linear_cdf(n):
	#cummulative distribution function for linear PDF
	#C(X <= z) is the probability of a multivariate linear RV X being less or eq to z
	C = lambda x: (1.0/float(n))*np.sum(np.power(x, 2))
	return(C)

	def scale_action(action, max_action=1.0, min_action=-1.0):
	#srescale joint n x 1 action vector to 0,1 interval to keep cdf and pdf simple
	return((action - min_action)/(max_action - min_action))

	#example
	#we will say that a random variable X is linearly distributed, this like
	#continous voting, where all 0's implies a 0 probability of X being true
	#and all 1's implies a probability of 1 that X is true

	#we'll use this in an MDP context, where if X is true, the MDP transitions out
	#of the current state with uniform probability across other states
	#and if X is false, the MDP does not transition

	n = 5 #number of agents
	#these are lambda functions on action vectors in [0,1] of size 5
	PDF = return_linear_pdf(5) #might want this to plot
	CDF = return_linear_cdf(5) #this gives probability of X <= z for some fixed z

	#actions are defined over the 0,1 interval, but can be normalized to 0,1
	#for this transition kernel
	#suppose the learned policies choose the following joint action, for example
	action = np.array([1.0, -0.25, 0.1, -0.89, 0.03]) #actions here defined from -1 to 1
	scaled_action = scale_action(action) #default min and max is -1 and 1
	print(scaled_action)
	cdf = CDF(scaled_action)

	#cdf gives value of biased coin flip with probability of event being true
	#can simulate with uniform dist
	#let current state s=1, and M total states
	unif_sample = np.random.uniform(0,1)
	if unif_sample <= cdf:
	#transition to new state
	transition = True
	#transition amongst uniformly
	new_state = np.random.choice([s for s in states if s not current_state])
	else:
	#remain at current state
	transition = False
	new_state = current_state