Skip to content

Instantly share code, notes, and snippets.

@Paulescu
Created March 4, 2022 10:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Paulescu/6718b5164c182ae50cc5646b941d618b to your computer and use it in GitHub Desktop.
Save Paulescu/6718b5164c182ae50cc5646b941d618b to your computer and use it in GitHub Desktop.
def objective(
trial: optuna.trial.Trial,
force_linear_model: bool = False,
n_episodes_to_train: int = 200,
) -> float:
"""
Samples hyperparameters, trains, and evaluates the RL agent.
It outputs the average reward on 1,000 episodes.
"""
env_name = 'CartPole-v1'
env = gym.make('CartPole-v1')
with mlflow.start_run():
# generate unique agent_id
agent_id = get_agent_id(env_name)
mlflow.log_param('agent_id', agent_id)
# hyper-parameters
args = sample_hyper_parameters(trial,
force_linear_model=force_linear_model)
mlflow.log_params(trial.params)
# fix seeds to ensure reproducible runs
set_seed(env, args['seed'])
# create agent object
agent = QAgent(
env,
learning_rate=args['learning_rate'],
discount_factor=args['discount_factor'],
batch_size=args['batch_size'],
memory_size=args['memory_size'],
freq_steps_train=args['freq_steps_train'],
freq_steps_update_target=args['freq_steps_update_target'],
n_steps_warm_up_memory=args['n_steps_warm_up_memory'],
n_gradient_steps=args['n_gradient_steps'],
nn_hidden_layers=args['nn_hidden_layers'],
max_grad_norm=args['max_grad_norm'],
normalize_state=args['normalize_state'],
epsilon_start=args['epsilon_start'],
epsilon_end=args['epsilon_end'],
steps_epsilon_decay=args['steps_epsilon_decay'],
log_dir=TENSORBOARD_LOG_DIR / env_name / agent_id
)
# train loop
train(agent,
env,
n_episodes=n_episodes_to_train,
log_dir=TENSORBOARD_LOG_DIR / env_name / agent_id)
agent.save_to_disk(SAVED_AGENTS_DIR / env_name / agent_id)
# evaluate its performance
rewards, steps = evaluate(agent, env, n_episodes=1000, epsilon=0.00)
mean_reward = np.mean(rewards)
std_reward = np.std(rewards)
mlflow.log_metric('mean_reward', mean_reward)
mlflow.log_metric('std_reward', std_reward)
return mean_reward
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment