cvigoe/RL_plot.py

## RL_plot.py
NUM_ALGO_TRIALS = _
NUM_TRAINING_TIMESTEPS = _
NUM_TESTING_TIMESTEPS = _

set_seed()
for algo_iteration in range(NUM_ALGO_TRIALS):
	for training_timesteps in range(NUM_TRAINING_TIMESTEPS): #for epsiodic tasks, keep resetting the env
		deploy_policy_with_exploration()
		update_policy_with_new_data()
		for testing_timesteps in range(NUM_TESTING_TIMESTEPS): #for epsiodic tasks, keep resetting the env
			deploy_policy_without_exploration()
		compute_mean_return_from_test_data()
		add_mean_return_to_log(algo_iteration,training_episode)

plot_return_mean(log) 	#for each point on x-axis, compute mean using NUM_ALGO_TRIALS "samples" (av. of averages)
plot_return_SD(log)		  #for each point on x-axis, compute SD using NUM_ALGO_TRIALS "samples" (SD of averages)
	NUM_ALGO_TRIALS = _
	NUM_TRAINING_TIMESTEPS = _
	NUM_TESTING_TIMESTEPS = _

	set_seed()
	for algo_iteration in range(NUM_ALGO_TRIALS):
	for training_timesteps in range(NUM_TRAINING_TIMESTEPS): #for epsiodic tasks, keep resetting the env
	deploy_policy_with_exploration()
	update_policy_with_new_data()
	for testing_timesteps in range(NUM_TESTING_TIMESTEPS): #for epsiodic tasks, keep resetting the env
	deploy_policy_without_exploration()
	compute_mean_return_from_test_data()
	add_mean_return_to_log(algo_iteration,training_episode)

	plot_return_mean(log) #for each point on x-axis, compute mean using NUM_ALGO_TRIALS "samples" (av. of averages)
	plot_return_SD(log) #for each point on x-axis, compute SD using NUM_ALGO_TRIALS "samples" (SD of averages)