Skip to content

Instantly share code, notes, and snippets.

@d13g0
Last active September 9, 2023 16:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d13g0/76f3d092313d261308528297da21bbba to your computer and use it in GitHub Desktop.
Save d13g0/76f3d092313d261308528297da21bbba to your computer and use it in GitHub Desktop.
Monitoring HPO in ClearML
##
# project: ClearML Tutorial
# date: Sep 2nd 2023
import os
import logging
import time
from datetime import datetime
import hydra
from omegaconf import DictConfig
from clearml import Task
from clearml.automation import DiscreteParameterRange
from clearml.automation import GridSearch, HyperParameterOptimizer
from utils import get_semantic_id
@hydra.main(version_base=None, config_path='conf', config_name='config-lesson-3')
def run_optimization(cfg: DictConfig):
"""Runs an optimization task"""
# Initialize ClearML logging
task_name = f'HPO-task-{get_semantic_id(2)}' #e.g. HPO-task-valerie-eelgrass
print(f'Experiment Name: {task_name}')
# This is the task we want to optimize
# replace this variable with the ID of your experiment
base_task_id = '' #add task id here
queue = '' #select queue here
# create the HPO task
hpo_task = Task.init(project_name=cfg.project_name,
task_name=task_name,
tags=['HPO','Hydra'])
# creating an optimizer
optimizer = HyperParameterOptimizer(
base_task_id=base_task_id,
hyper_parameters=[
DiscreteParameterRange('Hydra/trainer.batch_size', values=[8,16,32]),
],
objective_metric_title='loss',
objective_metric_series='test',
objective_metric_sign='min',
max_number_of_concurrent_tasks=5,
optimizer_class=GridSearch,
execution_queue=queue,
optimization_time_limit=10,
time_limit_per_job=3,
pool_period_min=0.5)
# This will start the optimizer
optimizer.start()
# This will start the optimizer locally
#optimizer.start_locally()
# Check every 10 seconds ...
while not optimizer.reached_time_limit():
top_exp = optimizer.get_top_experiments(top_k=2)
tim = time.time()
print(f'Active experiments: {optimizer.get_num_active_experiments()}')
print('')
print(f'Start time:{datetime.fromtimestamp(optimizer.optimization_start_time)}')
print(f'Time limit: {optimizer.get_time_limit()}')
print('')
print(f'Current time: {datetime.fromtimestamp(tim)}')
print(f'Reached time limit: {optimizer.reached_time_limit()}')
print(f'Elapsed (min): {optimizer.elapsed()}')
print('---')
time.sleep(10)
# wait until timed-out... this doesn't seem to check whether or not optimization is completed 🤨
optimizer.wait()
# stop jobs
optimizer.stop()
if __name__=='__main__':
os.environ['HYDRA_FULL_ERROR'] = '1'
run_optimization()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment