Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benoitdescamps/26c79a02beebf461df3e2290ea8deb24 to your computer and use it in GitHub Desktop.
Save benoitdescamps/26c79a02beebf461df3e2290ea8deb24 to your computer and use it in GitHub Desktop.
code snippet for Tuning Hyperparameters (part I): SuccessiveHalving
class SuccessiveHalving(object):
"""Applies successhalving on a model for n configurations max r ressources.
Args:
estimator: object instance with subclass SHBaseEstimator:
estimator wrapper
n: integer:
number of hyperparameter configurations to explore
r: integer:
maximum number of ressources.
param_grid: dict:
Dictionary where the keys are parameters and values are distributions from which a parameter is to be sampled. Distributions either have to provide a ``rvs`` function to sample from them, or can be given as a list of values, where a uniform distribution is assumed. e.g.: This could be a multiple of boosting iterations
must be of the form:
{
'param_1': distribution_n,
etc...
'param_n': distribution_n
}
seed: integer
ressource_name: str
Name of the ressource parameter
e.g. for XGBClassifier this is 'n_estimators"
ressource unit: int
minimal step of the ressource.
Example:
for xgboost this could be n_estimators = 10
"""
def __init__(self,estimator,n,r,param_grid,
ressource_name = 'n_estimators',
ressource_unit = 10,
scoring=None, n_jobs=1,cv=None,seed=0):
self.estimator = estimator
self.n = n
self.r = r
self.param_grid = param_grid
self.ressource_name = ressource_name
self.ressource_unit = ressource_unit
self.seed = seed
self.scoring = scoring
self.n_jobs = n_jobs
self.history = list()
def apply(self,
Xtrain,ytrain,Xval,yval
):
"""Apply Successive Halving:
1. evaluate the performance of all configurations
2. throw out the worst half
3. return to 1. until one configurations remains.
Args:
Xtrain: array:
training data
ytrain array:
training target
Xval: array:
validation data
yval: array:
validation target
Returns:
best configuration
"""
T = self._get_hyperparameter_configurations(self.n)
first_fit =True
eta = np.exp( np.log(self.r/float(self.ressource_unit))/math.floor(np.log(len(T))/np.log(2.)) )
n_iterations = self.ressource_unit
while (len(T) > 1):
T = self._run_and_score_models(T,ri=int(n_iterations),Xtrain=Xtrain,ytrain=ytrain,
Xval=Xval,yval=yval,first_fit=first_fit)
T = self._get_top_k(T,k=math.ceil(len(T) / 2))
n_iterations*= eta
if first_fit:
first_fit= False
return T
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment