stefanbschneider/sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml

## sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml
# copy from our private repo: https://github.com/RealVNF/rl-coordination/blob/master/res/config/agent/sac/sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml
# for DeepCoord DRL agent:
# https://github.com/RealVNF/DeepCoord

# module for configuring the RL agent
# configuration parameters are loaded and used both when using the agent via the CLI and via the interface
# all parameters are required, defaults are in comments

# observation_space = ['ingress_traffic', 'node_load']
observation_space:
 - ingress_traffic

# Agent type: SAC or DDPG

agent_type: 'SAC'

# shuffle the order of nodes in state and action. slower but should be more effective. default = False
shuffle_nodes: False

# Setting episode steps
episode_steps: 200

# NN Config for actor and critic
hidden_layers: [64]

# Delay config for normalization of reward

# Reward weights
flow_reward_weight: 2
delay_reward_weight: 1

gamma: 0.99
learning_rate: 0.01
buffer_size: 10000 # Same as mem_limit?
learning_starts: 0
train_freq: 1
batch_size: 64
tau: 0.0001
ent_coef: 0.01
target_update_interval: 1
gradient_steps: 1
target_entropy: 'auto'
action_noise: None
random_exploration: 0.05
	# copy from our private repo: https://github.com/RealVNF/rl-coordination/blob/master/res/config/agent/sac/sac_obs1_combi_64hid_099gam_00001tau_001alp_001ent_005exp.yaml
	# for DeepCoord DRL agent:
	# https://github.com/RealVNF/DeepCoord

	# module for configuring the RL agent
	# configuration parameters are loaded and used both when using the agent via the CLI and via the interface
	# all parameters are required, defaults are in comments

	# observation_space = ['ingress_traffic', 'node_load']
	observation_space:
	- ingress_traffic

	# Agent type: SAC or DDPG

	agent_type: 'SAC'

	# shuffle the order of nodes in state and action. slower but should be more effective. default = False
	shuffle_nodes: False

	# Setting episode steps
	episode_steps: 200

	# NN Config for actor and critic
	hidden_layers: [64]

	# Delay config for normalization of reward

	# Reward weights
	flow_reward_weight: 2
	delay_reward_weight: 1

	gamma: 0.99
	learning_rate: 0.01
	buffer_size: 10000 # Same as mem_limit?
	learning_starts: 0
	train_freq: 1
	batch_size: 64
	tau: 0.0001
	ent_coef: 0.01
	target_update_interval: 1
	gradient_steps: 1
	target_entropy: 'auto'
	action_noise: None
	random_exploration: 0.05