Skip to content

Instantly share code, notes, and snippets.

@PetrochukM
Last active April 11, 2023 06:39
Show Gist options
  • Star 20 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save PetrochukM/2c5fae9daf0529ed589018c6353c9f7b to your computer and use it in GitHub Desktop.
Save PetrochukM/2c5fae9daf0529ed589018c6353c9f7b to your computer and use it in GitHub Desktop.
Here we implement hyperband and successive halving adaptions. We found that the original hyperband implementation was messy and not tested. We also wanted to adapt it to include model reuse.
"""
We implement additional hyperparameter optimization methods not present in
https://scikit-optimize.github.io/.
Gist: https://gist.github.com/Deepblue129/2c5fae9daf0529ed589018c6353c9f7b
"""
import math
import logging
import random
from tqdm import tqdm
logger = logging.getLogger(__name__)
def _random_points(dimensions, n_points, random_seed=None):
""" Generate a random sample of points from dimensions """
# NOTE: We supply as `randint` to `random_state`; otherwise, dimensions with the same distribution would
# recive the same sequence of random numbers.
# We seed `random` so the random seeds generated are deterministic.
random.seed(random_seed)
points = {
d.name: d.rvs(n_samples=n_points, random_state=random.randint(0, 2**32))
for d in dimensions
}
points = [{k: points[k][i] for k in points} for i in range(n_points)]
return points
def successive_halving(
objective,
dimensions,
max_resources_per_model=81,
downsample=3, # Test random downsamples work and check boundaries
initial_resources=3,
n_models=45,
random_seed=None,
progress_bar=True):
"""
Adaptation of the Successive Halving algorithm.
tl;dr keep the best models every iteration of the `initial_models` downsampling each time
Adaptation: Instead of running for N / 2 models for 2T, we run N / 2 models for T**downsample.
This adaptation is the same adaptation of Successive Halving in hyperband.
Reference: http://proceedings.mlr.press/v51/jamieson16.pdf
Reference: http://www.argmin.net/2016/06/23/hyperband/
TODO: Splitting in half is a fairly random number. We could possibly look for a better split
point. For example, we could use the large margin to split points. Or predict the performance
of hyperparameters would do well in the future.
Args:
objective (callable): objective function to minimize
Named Args:
resources (int): number of resources (e.g. epochs) to use while training model
checkpoint (any): saved data from past run
**hyperparameters (any): hyperparameters to run
Returns:
score (float): score to minimize
checkpoint (any): saved data from run
dimensions (list of skopt.Dimensions): list of dimensions to minimize under
max_resources_per_model: Max number of resources (e.g. epochs) to use per model
downsample: Downsampling of models (e.g. halving is a downsampling of 2)
initial_resources: Number of resources (e.g. epochs) to use initially to evaluate first
round.
n_models (int): Number of models to evaluate
random_seed (int, optional): Random seed for generating hyperparameters
progress_bar (boolean or tqdm): Iff to use or update a progress bar.
Returns:
scores (list of floats): Scores of the top objective executions
hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence
to scores.
"""
if downsample <= 1:
raise ValueError('Downsample must be > 1; otherwise, the number of resources allocated' +
'does not grow')
round_n_models = lambda n: max(round(n), 1)
total_resources_per_model = 0
hyperparameters = _random_points(dimensions, round_n_models(n_models), random_seed)
checkpoints = [None for _ in range(round_n_models(n_models))]
scores = [math.inf for _ in range(round_n_models(n_models))]
# Create a new progress bar
remember_to_close = False
if not isinstance(progress_bar, tqdm) and progress_bar:
remember_to_close = True
# TODO: Compute the tqdm total
progress_bar = tqdm()
# Keep tabs on a set of stats
setattr(progress_bar, 'stats', {'min_score': math.inf, 'models_evaluated': 0})
while total_resources_per_model < max_resources_per_model:
# Compute number of resources to continue running each model with
if total_resources_per_model == 0:
update_n_resources = initial_resources
else:
update_n_resources = min(
total_resources_per_model * downsample - total_resources_per_model,
max_resources_per_model - total_resources_per_model)
results = []
for score, checkpoint, params in zip(scores, checkpoints, hyperparameters):
new_score, new_checkpoint = objective(
resources=update_n_resources, checkpoint=checkpoint, **params)
new_score = min(score, new_score)
results.append(tuple([new_score, new_checkpoint]))
if isinstance(progress_bar, tqdm):
progress_bar.update(update_n_resources)
if progress_bar.stats['min_score'] > new_score:
progress_bar.stats['min_score'] = new_score
progress_bar.set_postfix(progress_bar.stats)
total_resources_per_model += update_n_resources
# NOTE: If this is not the last
is_last_iteration = total_resources_per_model >= max_resources_per_model
if not is_last_iteration:
# Sort by minimum score `k[0][0]`
results = sorted(zip(results, hyperparameters), key=lambda k: k[0][0])
models_evaluated = len(results) - round_n_models(n_models / downsample)
results = results[:round_n_models(n_models / downsample)]
# Update `hyperparameters` lists
results, hyperparameters = zip(*results)
n_models = n_models / downsample
else:
models_evaluated = len(results)
# Update `scores` and `checkpoints` lists
scores, checkpoints = zip(*results)
if isinstance(progress_bar, tqdm):
progress_bar.stats['models_evaluated'] += models_evaluated
progress_bar.set_postfix(progress_bar.stats)
if remember_to_close:
progress_bar.close()
return scores, hyperparameters
def hyperband(objective,
dimensions,
max_resources_per_model=81,
downsample=3,
total_resources=None,
random_seed=None,
progress_bar=True):
"""
Adaptation of the Hyperband algorithm
tl;dr search over the space of successive halving hyperparameters
Adaptation: Originally Hyperband was implemented with the assumption that we cannot reuse
models. We redid the math allowing for reusing models. This is particularly helpful in speeding
up 1 GPU hyperparameter optimization. Just to clarify, by reusing models, we mean that
given hyperparameters `x` and epochs `y`, we can use one model to evaluate all `y` integers
with hyperparameters `x`.
Reference: https://arxiv.org/pdf/1603.06560.pdf
Reference: http://www.argmin.net/2016/06/23/hyperband/
TODO: Implement extension to hyperband proporting an increase of x4:
https://arxiv.org/pdf/1705.10823.pdf
http://www.ijcai.org/Proceedings/15/Papers/487.pdf
Args:
objective (callable): objective function to minimize
Named Args:
resources (int): number of resources (e.g. epochs) to use while training model
checkpoint (any): saved data from past run
**hyperparameters (any): hyperparameters to run
Returns:
score (float): score to minimize
checkpoint (any): saved data from run
dimensions (list of skopt.Dimensions): list of dimensions to minimize under
max_resources_per_model (float): Max number of resources (e.g. epochs) to use per model
downsample (int): Downsampling of models (e.g. halving is a downsampling of 2)
total_resources (optional): Max number of resources hyperband is allowed to use over the
entirety of the algorithm.
random_seed (int, optional): Random seed for generating hyperparameters
progress_bar (boolean, optional): Boolean for displaying tqdm
Returns:
scores (list of floats): Scores of the top objective executions
hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence
to scores.
"""
if downsample <= 1:
raise ValueError('Downsample must be > 1; otherwise, the number of resources allocated' +
'does not grow')
all_scores = []
all_hyperparameters = []
# Number of times to run hyperband
# Ex. `max_resources_per_model = 81 and downsample = 3`
# Then => initial_resources = [1, 3, 9, 27, 81]
# And => `hyperband_rounds = 5`
# And => `successive_halving_rounds = [5, 4, 3, 2, 1]`
n_hyperband_rounds = math.floor(math.log(max_resources_per_model, downsample)) + 1
if total_resources is None:
# TODO: Multiply by the number of dimensions so it scales the number of models
# given the large space
total_resources_per_round = max_resources_per_model * n_hyperband_rounds
else:
total_resources_per_round = total_resources / n_hyperband_rounds
total_models_evaluated = 0
if progress_bar:
progress_bar = tqdm(total=total_resources_per_round * n_hyperband_rounds)
setattr(progress_bar, 'stats', {'min_score': math.inf, 'models_evaluated': 0})
for i in reversed(range(n_hyperband_rounds)):
n_successive_halving_rounds = i + 1
# NOTE: Attained by running the below code on https://sandbox.open.wolframcloud.com:
# Reduce[Power[d, j - 1] * (x / Power[d, j]) +
# Sum[(Power[d, i] - Power[d, i - 1]) * (x / Power[d, i]), {i, j, k}] == e
# && k >=j>=1 && k>=1 && d>=1, {x}]
# `e` is `total_resources_per_round`
# `x` is `n_models`
# `k - j` is `i`
# `d` is downsample
# The summation is similar to the successive halving rounds loop. It computes the number
# of resources with reuse run in total. This is different from hyperband that assumes
# no reuse.
n_models = downsample * total_resources_per_round
n_models /= downsample * (1 + i) - i
n_models /= downsample**(-i + n_hyperband_rounds - 1)
total_models_evaluated += n_models
scores, hyperparameters = successive_halving(
objective=objective,
dimensions=dimensions,
max_resources_per_model=max_resources_per_model,
downsample=downsample,
initial_resources=max_resources_per_model / downsample**i,
n_models=n_models,
random_seed=random_seed,
progress_bar=progress_bar)
logger.info('Finished hyperband round: %d of %d', n_hyperband_rounds - i - 1,
n_hyperband_rounds - 1)
all_scores.extend(scores)
all_hyperparameters.extend(hyperparameters)
if isinstance(progress_bar, tqdm):
progress_bar.close()
logger.info('Total models evaluated: %f', total_models_evaluated)
logger.info('Total resources used: %f', total_resources_per_round * n_hyperband_rounds)
logger.info('Total resources used per model on average: %f',
total_models_evaluated / total_resources_per_round * n_hyperband_rounds)
return all_scores, all_hyperparameters
### TEST ###
import unittest
import random
from skopt.space import Real, Integer
from lib.utils import config_logging
config_logging()
mock_dimensions = [Integer(1, 100, name='integer')]
def mock(resources, integer=0, checkpoint=None):
# `integer` is a hyperparameter set the first batch
if checkpoint is not None:
return checkpoint, checkpoint
return integer, integer
class TestHyperparameterOptimization(unittest.TestCase):
def test_hyperband_simple(self):
# Basic check on hyperband
scores, hyperparameters = hyperband(objective=mock, dimensions=mock_dimensions)
for score, hyperparameter in zip(scores, hyperparameters):
self.assertEqual(score, hyperparameter['integer'])
def test_successive_halving_simple(self):
# Basic check on successive halving
scores, hyperparameters = successive_halving(objective=mock, dimensions=mock_dimensions)
for score, hyperparameter in zip(scores, hyperparameters):
self.assertEqual(score, hyperparameter['integer'])
def test_hyperband_no_progress_bar(self):
# Basic check on hyperband
scores, hyperparameters = hyperband(
objective=mock, dimensions=mock_dimensions, progress_bar=False)
for score, hyperparameter in zip(scores, hyperparameters):
self.assertEqual(score, hyperparameter['integer'])
def test_successive_halving_no_progress_bar(self):
# Basic check on successive halving
scores, hyperparameters = successive_halving(
objective=mock, dimensions=mock_dimensions, progress_bar=False)
for score, hyperparameter in zip(scores, hyperparameters):
self.assertEqual(score, hyperparameter['integer'])
def test_successive_halving_downsample(self):
with self.assertRaises(ValueError):
successive_halving(
objective=mock,
dimensions=mock_dimensions,
progress_bar=False,
downsample=1,
n_models=45)
if __name__ == '__main__':
unittest.main()
@PetrochukM
Copy link
Author

PetrochukM commented Feb 1, 2018

Ran this with an LSTM model that I tunned with 10 hyperparameters over the course of an hour. The results were reproducible and achieved higher results than hand tunning!

Each model takes around 3.5 minutes to run. Hyperband in an hour evaluated 65 averaging a 1 minute a model due to its early stopping mechanism.

As well, the TQDM time estimate was on point. It estimated an hour and 10 minutes 3% of the way into the task. This was an accurate estimate.

@Diyago
Copy link

Diyago commented Mar 30, 2019

Some usage example would be great

@jjerphan
Copy link

jjerphan commented Apr 15, 2019

@Diyago : Here is a simple example with scikit-learn:

from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from skopt.space import Integer, Real

from algo import hyperband


if __name__ == "__main__":

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.20,
                                                        random_state=1337)

    mock_dimensions = [
        Real(0.0, 1.0, name='min_samples_split'),
        Integer(2, 10, name='max_depth')
    ]

    def fit_and_score(resources, checkpoint, **hyperparameters):
        import warnings
        warnings.simplefilter(action='ignore', category=FutureWarning)

        clf = RandomForestClassifier(**hyperparameters)

        clf.fit(X_train, y_train)

        accuracy = clf.score(X_test, y_test)

        # Maximisation problem
        return - accuracy, hyperparameters


    accuracies, hps = hyperband(objective=fit_and_score, dimensions=mock_dimensions)
    for acc, hp in zip(accuracies, hps):
        print(acc, hp)

@Rayn2402
Copy link

I'd like to use your code but i'm not sure to understand the format of the 'checkpoint' argument of the objective function. What should it be? Thank you!

@PetrochukM
Copy link
Author

PetrochukM commented Nov 26, 2019

Hi Guys!

This is an example from my usage: https://github.com/PetrochukM/Simple-QA-EMNLP-2018/blob/d9234df6f9fe4316e540d885e58eb007c992b13f/notebooks/Simple%20QA%20Models/Relation%20Classifier%20RNN%20Model.ipynb

The objective function takes a set of hyperparameters, resources, and a checkpoint. It should use those to train a model (from the checkpoint) with those particular hyperparameters using resources. Finally, it should return an updated checkpoint and the current score (i.e. loss).

Sorry for not providing an example, initially!

@Rayn2402
Copy link

I was also wondering how I can set an Integer() skopt space with values chosen by myself?

Example : Integer([1,2,10,15,20], "dummyVar")

Can I do this with Categorical instead?

Thanks!

@maxmatical
Copy link

I am trying to use hyperband for a RL problem, however, when I run hyperband, I get the following error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/Desktop/Python_Projects/RL_Trader/hyperband_optimize.py in ()
     77     return -1.*val_returns, [max_trading_session, lookback, trade_frac, reward_func, ta, gamma, lr]
     78 
---> 79 returns, hps = hyperband(objective=fit_and_score, dimensions=space)
     80 
     81 for acc, hp in zip(returns, hps):

~/Desktop/Python_Projects/RL_Trader/hyperband.py in hyperband(objective, dimensions, max_resources_per_model, downsample, total_resources, random_seed, progress_bar)
    243             n_models=n_models,
    244             random_seed=random_seed,
--> 245             progress_bar=progress_bar)
    246         logger.info('Finished hyperband round: %d of %d', n_hyperband_rounds - i - 1,
    247                     n_hyperband_rounds - 1)

~/Desktop/Python_Projects/RL_Trader/hyperband.py in successive_halving(objective, dimensions, max_resources_per_model, downsample, initial_resources, n_models, random_seed, progress_bar)
    108         for score, checkpoint, params in zip(scores, checkpoints, hyperparameters):
    109             new_score, new_checkpoint = objective(
--> 110                 resources=update_n_resources, checkpoint=checkpoint, **params)
    111             new_score = min(score, new_score)
    112             results.append(tuple([new_score, new_checkpoint]))

~/Desktop/Python_Projects/RL_Trader/hyperband_optimize.py in fit_and_score(resources, checkpoint, max_trading_session, lookback, trade_frac, reward_func, ta, gamma, lr)
     39     #                 prioritized_replay_beta0 = 0.4,
     40     #                 prioritized_replay_eps= 1e-06)
---> 41     model = DQN(DQN_MlpPolicy, train_env)
     42     print('Begin Training')
     43     model.learn(total_timesteps=NEPOCHS)

~/anaconda3/lib/python3.6/site-packages/stable_baselines/deepq/dqn.py in __init__(self, policy, env, gamma, learning_rate, buffer_size, exploration_fraction, exploration_final_eps, exploration_initial_eps, train_freq, batch_size, double_q, learning_starts, target_network_update_freq, prioritized_replay, prioritized_replay_alpha, prioritized_replay_beta0, prioritized_replay_beta_iters, prioritized_replay_eps, param_noise, n_cpu_tf_sess, verbose, tensorboard_log, _init_setup_model, policy_kwargs, full_tensorboard_log, seed)
    103 
    104         if _init_setup_model:
--> 105             self.setup_model()
    106 
    107     def _get_pretrain_placeholders(self):

~/anaconda3/lib/python3.6/site-packages/stable_baselines/deepq/dqn.py in setup_model(self)
    141                     sess=self.sess,
    142                     full_tensorboard_log=self.full_tensorboard_log,
--> 143                     double_q=self.double_q
    144                 )
    145                 self.proba_step = self.step_model.proba_step

~/anaconda3/lib/python3.6/site-packages/stable_baselines/deepq/build_graph.py in build_train(q_func, ob_space, ac_space, optimizer, sess, grad_norm_clipping, gamma, double_q, scope, reuse, param_noise, param_noise_filter_func, full_tensorboard_log)
    365                                                         param_noise_filter_func=param_noise_filter_func)
    366         else:
--> 367             act_f, obs_phs = build_act(q_func, ob_space, ac_space, stochastic_ph, update_eps_ph, sess)
    368 
    369         # q network evaluation

~/anaconda3/lib/python3.6/site-packages/stable_baselines/deepq/build_graph.py in build_act(q_func, ob_space, ac_space, stochastic_ph, update_eps_ph, sess)
    139     eps = tf.get_variable("eps", (), initializer=tf.constant_initializer(0))
    140 
--> 141     policy = q_func(sess, ob_space, ac_space, 1, 1, None)
    142     obs_phs = (policy.obs_ph, policy.processed_obs)
    143     deterministic_actions = tf.argmax(policy.q_values, axis=1)

~/anaconda3/lib/python3.6/site-packages/stable_baselines/deepq/policies.py in __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse, obs_phs, dueling, **_kwargs)
    222         super(MlpPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse,
    223                                         feature_extraction="mlp", obs_phs=obs_phs, dueling=dueling,
--> 224                                         layer_norm=False, **_kwargs)
    225 
    226 

~/anaconda3/lib/python3.6/site-packages/stable_baselines/deepq/policies.py in __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse, layers, cnn_extractor, feature_extraction, obs_phs, layer_norm, dueling, act_fun, **kwargs)
    115                         action_out = act_fun(action_out)
    116 
--> 117                 action_scores = tf_layers.fully_connected(action_out, num_outputs=self.n_actions, activation_fn=None)
    118 
    119             if self.dueling:

~/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py in func_with_args(*args, **kwargs)
    180       current_args = current_scope[key_func].copy()
    181       current_args.update(kwargs)
--> 182     return func(*args, **current_args)
    183 
    184   _add_op(func)

~/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py in fully_connected(inputs, num_outputs, activation_fn, normalizer_fn, normalizer_params, weights_initializer, weights_regularizer, biases_initializer, biases_regularizer, reuse, variables_collections, outputs_collections, trainable, scope)
   1826   if not isinstance(num_outputs, six.integer_types):
   1827     raise ValueError('num_outputs type should be one of %s, got %s.' % (
-> 1828         list(six.integer_types), type(num_outputs)))
   1829 
   1830   layer_variable_getter = _build_variable_getter({

ValueError: num_outputs type should be one of [], got .

And this issue doesn't seem to arise when I'm doing supervised learning. Is there anything I can do to fix this error?

@PetrochukM
Copy link
Author

@maxmatical Can you provide a test case to replicate the problem?

@maxmatical
Copy link

I re-ran some tests, and it seems like it was an error only with custom gym environments. It seems like hyperband doesn't like when the number of possible outputs for an action is Integer. I changed possible number of outputs to Categorical and it seemed to be working fine.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment