Skip to content

Instantly share code, notes, and snippets.

@Arturus
Created October 15, 2018 14:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Arturus/c32b186f9b964a7007a786daec6ca310 to your computer and use it in GitHub Desktop.
Save Arturus/c32b186f9b964a7007a786daec6ca310 to your computer and use it in GitHub Desktop.
SMAC parameter tuning
from trainer import train
# Import ConfigSpace and different types of parameters
from smac.configspace import ConfigurationSpace, Configuration
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
UniformFloatHyperparameter, UniformIntegerHyperparameter
from ConfigSpace.conditions import InCondition
# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC
from smac.runhistory.runhistory import RunHistory
from timeit import default_timer as timer
import pickle
import numpy as np
import os.path
import logging
import math
from hparams import default_hparams, apply_configuration, SMAC_configuration
from functools import partial
import datetime
import time
import argparse
import json
import glob
import re
parser = argparse.ArgumentParser(description='Runs SMAC3 hyperparameter search')
parser.add_argument('run_dir', help='Output directory')
parser.add_argument('instance_id', help='Instance id', type=int, default=0)
parser.add_argument('--max_evals', help='Max # of evaluations', type=int, default=100)
parser.add_argument('--max_time', help='Time limit (hours)', type=float, default=math.inf)
parser.add_argument('--max_same_runs', help='Max # of same configuration runs', type=int, default=5)
parser.add_argument('--max_epoch', help='Epoch limit', type=int, default=100)
parser.add_argument('--train_sampling', help='Sampling pct. for training', type=float, default=1)
parser.add_argument('--eval_sampling', help='Sampling pct. for evaluation', type=float, default=1)
parser.add_argument('--patience', help='Early stopping: max # of epochs without improvement', type=int, default=5)
parser.add_argument('--mem', help='GPU memory, GB', type=float, default=5)
parser.add_argument('--gpu', help='Index of GPU device', type=int, default=-1)
parser.add_argument('--gpu_allow_growth', help="Don't use all GPU mem", type=bool, default=False)
parser.add_argument('--tqdm', help="Use tqdm bar", type=bool, default=False)
args = parser.parse_args()
def get_work_dir():
existing_runs = glob.glob(args.run_dir + "/run_*")
run_re = re.compile('/run_(\d+)$')
current_run = -1
for run_path in existing_runs:
match = run_re.search(run_path)
if match:
run_no = int(match.group(1))
if run_no > current_run:
current_run = run_no
current_run += 1
return args.run_dir + "/run_" + str(current_run)
logging.basicConfig(level=logging.INFO)
logging.getLogger('smac').setLevel(logging.DEBUG)
n_gpu = args.gpu if args.gpu >=0 else args.instance_id
os.environ['CUDA_VISIBLE_DEVICES'] = str(n_gpu)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
log_dir = args.run_dir
os.makedirs(log_dir, exist_ok=True)
tf_log_dir = log_dir + "/tf"
run_dir = log_dir + "/smac"
os.makedirs(tf_log_dir, exist_ok=True)
conf_dir = log_dir + "/conf"
os.makedirs(conf_dir, exist_ok=True)
cs = SMAC_configuration()
scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime)
"runcount-limit": args.max_evals, # maximum function evaluations
"cs": cs, # configuration space
"deterministic": False,
'maxR':args.max_same_runs,
'shared_model': True,
"input_psmac_dirs": run_dir + '_run*',
"output_dir": run_dir,
'wallclock_limit':args.max_time * 3600,
}, run_id=args.instance_id)
iter_count = 0
def target_func(cfg:Configuration, seed):
global iter_count
iter_count += 1
#print('seed', seed, cfg)
params = apply_configuration(cfg)
loss = train('smac_%d_%d' % (args.instance_id, iter_count), params, logdir=tf_log_dir, max_epoch=args.max_epoch,
n_models=1, seed=seed,
train_sampling=args.train_sampling, eval_sampling=args.eval_sampling,
patience=args.patience, eval_memsize=args.mem,
has_forward_data=True,
write_summaries=True,
gpu=args.gpu if args.gpu >=0 else 0, gpu_allow_growth=args.gpu_allow_growth, tqdm=args.tqdm)
with open(conf_dir + f"/{args.instance_id}_{iter_count}.json", 'w') as f:
d = cfg.get_dictionary().copy()
d['seed'] = int(seed)
json.dump(d, f, sort_keys=True, indent=4)
return loss
runner = ExecuteTAFuncDict(target_func, use_pynisher=False)
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42 + args.instance_id), tae_runner=runner)
incumbent = smac.optimize()
print('incumbent', incumbent)
import tensorflow.contrib.training as training
from smac.configspace import ConfigurationSpace, Configuration
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
UniformFloatHyperparameter, UniformIntegerHyperparameter, OrdinalHyperparameter
from ConfigSpace.conditions import InCondition, EqualsCondition, LessThanCondition, OrConjunction, AndConjunction, GreaterThanCondition
import re
def_params_s32 = dict(
batch_size=256,
#train_window=380,
train_window=283,
train_skip_first=0,
rnn_depth=267,
use_attn=False,
attention_depth=64,
attention_heads=1,
encoder_readout_dropout=0.4768781146510798,
encoder_rnn_layers=1,
decoder_rnn_layers=1,
# decoder_state_dropout_type=['outside','outside'],
decoder_input_dropout=[1.0, 1.0, 1.0],
decoder_output_dropout=[0.975, 1.0, 1.0], # min 0.95
decoder_state_dropout=[0.99, 0.995, 0.995], # min 0.95
decoder_variational_dropout=[False, False, False],
# decoder_candidate_l2=[0.0, 0.0],
# decoder_gates_l2=[0.0, 0.0],
#decoder_state_dropout_type='outside',
#decoder_input_dropout=1.0,
#decoder_output_dropout=1.0,
#decoder_state_dropout=0.995, #0.98, # min 0.95
# decoder_variational_dropout=False,
decoder_candidate_l2=0.0,
decoder_gates_l2=0.0,
fingerprint_fc_dropout=0.8232342370695286,
gate_dropout=0.9967589439360334,#0.9786,
gate_activation='none',
encoder_dropout=0.030490422531402273,
encoder_stability_loss=0.0, # max 100
encoder_activation_loss=1e-06, # max 0.001
decoder_stability_loss=0.0, # max 100
decoder_activation_loss=5e-06, # max 0.001
)
# Default incumbent on last smac3 search
def_params_definc = dict(
batch_size=256,
train_window=100,
train_skip_first=0,
rnn_depth=128,
use_attn=True,
attention_depth=64,
attention_heads=1,
encoder_readout_dropout=0.4768781146510798,
encoder_rnn_layers=1,
decoder_rnn_layers=1,
decoder_input_dropout=[1.0, 1.0, 1.0],
decoder_output_dropout=[1.0, 1.0, 1.0],
decoder_state_dropout=[0.995, 0.995, 0.995],
decoder_variational_dropout=[False, False, False],
decoder_candidate_l2=0.0,
decoder_gates_l2=0.0,
fingerprint_fc_dropout=0.8232342370695286,
gate_dropout=0.8961710392091516,
gate_activation='none',
encoder_dropout=0.030490422531402273,
encoder_stability_loss=0.0,
encoder_activation_loss=1e-05,
decoder_stability_loss=0.0,
decoder_activation_loss=5e-05,
)
# Found incumbent 0.35503610596060753
#"decoder_activation_loss='1e-05'", "decoder_output_dropout:0='1.0'", "decoder_rnn_layers='1'", "decoder_state_dropout:0='0.995'", "encoder_activation_loss='1e-05'", "encoder_rnn_layers='1'", "gate_dropout='0.7934826952854418'", "rnn_depth='243'", "train_window='135'", "use_attn='1'", "attention_depth='17'", "attention_heads='2'", "encoder_readout_dropout='0.7711751356092252'", "fingerprint_fc_dropout='0.9693950737901414'"
def_params_foundinc = dict(
batch_size=256,
train_window=135,
train_skip_first=0,
rnn_depth=243,
use_attn=True,
attention_depth=17,
attention_heads=2,
encoder_readout_dropout=0.7711751356092252,
encoder_rnn_layers=1,
decoder_rnn_layers=1,
decoder_input_dropout=[1.0, 1.0, 1.0],
decoder_output_dropout=[1.0, 1.0, 1.0],
decoder_state_dropout=[0.995, 0.995, 0.995],
decoder_variational_dropout=[False, False, False],
decoder_candidate_l2=0.0,
decoder_gates_l2=0.0,
fingerprint_fc_dropout=0.9693950737901414,
gate_dropout=0.7934826952854418,
gate_activation='none',
encoder_dropout=0.0,
encoder_stability_loss=0.0,
encoder_activation_loss=1e-05,
decoder_stability_loss=0.0,
decoder_activation_loss=1e-05,
)
# 81 on smac_run0 (0.3552077534247418 x 7)
#{'decoder_activation_loss': 0.0, 'decoder_output_dropout:0': 0.85, 'decoder_rnn_layers': 2, 'decoder_state_dropout:0': 0.995,
# 'encoder_activation_loss': 0.0, 'encoder_rnn_layers': 2, 'gate_dropout': 0.7665920904244501, 'rnn_depth': 201,
# 'train_window': 143, 'use_attn': 1, 'attention_depth': 17, 'attention_heads': 2, 'decoder_output_dropout:1': 0.975,
# 'decoder_state_dropout:1': 0.99, 'encoder_dropout': 0.0304904225, 'encoder_readout_dropout': 0.4444295965935664, 'fingerprint_fc_dropout': 0.26412480387331017}
def_params_inst81 = dict(
batch_size=256,
train_window=143,
train_skip_first=0,
rnn_depth=201,
use_attn=True,
attention_depth=17,
attention_heads=2,
encoder_readout_dropout=0.4444295965935664,
encoder_rnn_layers=2,
decoder_rnn_layers=2,
decoder_input_dropout=[1.0, 1.0, 1.0],
decoder_output_dropout=[0.85, 0.975, 1.0],
decoder_state_dropout=[0.995, 0.99, 0.995],
decoder_variational_dropout=[False, False, False],
decoder_candidate_l2=0.0,
decoder_gates_l2=0.0,
fingerprint_fc_dropout=0.26412480387331017,
gate_dropout=0.7665920904244501,
gate_activation='none',
encoder_dropout=0.0304904225,
encoder_stability_loss=0.0,
encoder_activation_loss=0.0,
decoder_stability_loss=0.0,
decoder_activation_loss=0.0,
)
# 121 on smac_run0 (0.3548671560628074 x 3)
# {'decoder_activation_loss': 1e-05, 'decoder_output_dropout:0': 0.975, 'decoder_rnn_layers': 2, 'decoder_state_dropout:0': 1.0,
# 'encoder_activation_loss': 1e-05, 'encoder_rnn_layers': 1, 'gate_dropout': 0.8631496699358483, 'rnn_depth': 122,
# 'train_window': 269, 'use_attn': 1, 'attention_depth': 29, 'attention_heads': 4, 'decoder_output_dropout:1': 0.975,
# 'decoder_state_dropout:1': 0.975, 'encoder_readout_dropout': 0.9835390239895767, 'fingerprint_fc_dropout': 0.7452161827064421}
# 83 on smac_run1 (0.355050330259362 x 7)
# {'decoder_activation_loss': 1e-06, 'decoder_output_dropout:0': 0.925, 'decoder_rnn_layers': 2, 'decoder_state_dropout:0': 0.98,
# 'encoder_activation_loss': 1e-06, 'encoder_rnn_layers': 1, 'gate_dropout': 0.9275441207192259, 'rnn_depth': 138,
# 'train_window': 84, 'use_attn': 1, 'attention_depth': 52, 'attention_heads': 2, 'decoder_output_dropout:1': 0.925,
# 'decoder_state_dropout:1': 0.98, 'encoder_readout_dropout': 0.6415488109353416, 'fingerprint_fc_dropout': 0.2581296623398802}
def_params_inst83 = dict(
batch_size=256,
train_window=84,
train_skip_first=0,
rnn_depth=138,
use_attn=True,
attention_depth=52,
attention_heads=2,
encoder_readout_dropout=0.6415488109353416,
encoder_rnn_layers=1,
decoder_rnn_layers=2,
decoder_input_dropout=[1.0, 1.0, 1.0],
decoder_output_dropout=[0.925, 0.925, 1.0],
decoder_state_dropout=[0.98, 0.98, 0.995],
decoder_variational_dropout=[False, False, False],
decoder_candidate_l2=0.0,
decoder_gates_l2=0.0,
fingerprint_fc_dropout=0.2581296623398802,
gate_dropout=0.9275441207192259,
gate_activation='none',
encoder_dropout=0.0,
encoder_stability_loss=0.0,
encoder_activation_loss=1e-06,
decoder_stability_loss=0.0,
decoder_activation_loss=1e-06,
)
def_params = def_params_s32
def default_hparams(params=def_params):
return training.HParams(**params)
def SMAC_configuration() -> ConfigurationSpace:
cs = ConfigurationSpace()
def uniform_int(name:str, lower:int, upper:int, log=False):
param = UniformIntegerHyperparameter(name, lower, upper, default=def_params[name], log=log)
cs.add_hyperparameter(param)
return param
def uniform_float(name: str, lower: float, upper: float, log=False):
param = UniformFloatHyperparameter(name, lower, upper, default=def_params[name], log=log)
cs.add_hyperparameter(param)
return param
def categorical(name: str, choices, n_params=1):
if n_params == 1:
param = CategoricalHyperparameter(name, choices, default=def_params[name])
cs.add_hyperparameter(param)
return param
else:
params = [CategoricalHyperparameter(f'{name}:{idx}', choices, default=def_params[name][idx]) for idx in range(n_params)]
cs.add_hyperparameters(params)
return params
def ordinal(name: str, choices, n_params=1):
if n_params == 1:
param = OrdinalHyperparameter(name, choices, default=def_params[name])
cs.add_hyperparameter(param)
return param
else:
params = [OrdinalHyperparameter(f'{name}:{idx}', choices, default=def_params[name][idx]) for idx in range(n_params)]
cs.add_hyperparameters(params)
return params
uniform_int('train_window', 80, 380, log=True)
uniform_int('rnn_depth', 64, 512, log=True)
use_attn = categorical('use_attn', [1, 0])
attention_depth = uniform_int('attention_depth', 2, 96, log=True)
attention_heads = uniform_int('attention_heads', 1, 4)
fingerprint_fc_dropout = uniform_float('fingerprint_fc_dropout', 0.25, 1)
encoder_readout_dropout = uniform_float('encoder_readout_dropout', 0.3, 1.0)
encoder_rnn_layers = ordinal('encoder_rnn_layers', [1, 2])
decoder_rnn_layers = ordinal('decoder_rnn_layers', [1, 2])
decoder_output_dropout = ordinal('decoder_output_dropout', [0.75, 0.8, 0.85, 0.9, 0.925, 0.95, 0.975, 0.99, 1.0], 2)
decoder_state_dropout = ordinal('decoder_state_dropout', [0.97, 0.975, 0.98, 0.985, 0.99, 0.995, 1.0], 2)
#decoder_variational_dropout = CategoricalHyperparameter('decoder_variational_dropout', [True, False], default=False)
uniform_float('gate_dropout', 0.75, 1.0)
encoder_dropout = uniform_float('encoder_dropout', 0.0, 0.5)
# decoder_state_dropout_type = CategoricalHyperparameter('decoder_state_dropout_type', ['inside', 'outside'], default='outside')
ordinal('encoder_activation_loss', [0.0, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4])
ordinal('decoder_activation_loss', [0.0, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4])
#has_input_dropout = LessThanCondition(child=decoder_variational_dropout, parent=decoder_input_dropout, value=1.0)
#has_state_dropout = LessThanCondition(child=decoder_variational_dropout, parent=decoder_state_dropout, value=1.0)
#cs.add_condition(has_state_dropout)
cs.add_condition(EqualsCondition(child=attention_depth, parent=use_attn, value=1))
cs.add_condition(EqualsCondition(child=attention_heads, parent=use_attn, value=1))
cs.add_condition(EqualsCondition(child=fingerprint_fc_dropout, parent=use_attn, value=1))
cs.add_condition(EqualsCondition(child=encoder_readout_dropout, parent=use_attn, value=1))
cs.add_condition(GreaterThanCondition(child=encoder_dropout, parent=encoder_rnn_layers, value=1))
cs.add_condition(GreaterThanCondition(child=decoder_output_dropout[1], parent=decoder_rnn_layers, value=1))
cs.add_condition(GreaterThanCondition(child=decoder_state_dropout[1], parent=decoder_rnn_layers, value=1))
#cs.add_condition(OrConjunction(has_input_dropout, has_state_dropout))
#cs.add_condition(GreaterThanCondition(child=encoder_dropout, parent=encoder_rnn_layers, value=1))
# encoder_readout_dropout,
# gate_dropout, encoder_dropout,
# encoder_stability_loss, decoder_stability_loss
# value=1.0)
# has_state_dropout_0 = LessThanCondition(child=decoder_variational_dropout[0], parent=decoder_state_dropout[0],
# value=1.0)
# decoder_recurrent_dropout = ordinal_pair(cs, 'decoder_recurrent_dropout', [0.98, 0.99, 0.995, 1.0], default=1.0)
# decoder_input_dropout = ordinal_pair(cs, 'decoder_input_dropout', [0.75, 0.9, 0.95, 1.0], default=1.0)
# decoder_state_dropout = ordinal_pair(cs, 'decoder_state_dropout', [0.98, 0.99, 0.995, 1.0], default=1.0)
# decoder_state_dropout_type = categorical_pair(cs, 'decoder_state_dropout_type', ['inside','outside'], default='outside')
# decoder_variational_dropout = categorical_pair(cs, 'decoder_variational_dropout', [0, 1], default=1)
# #decoder_candidate_l2 = ordinal_pair(cs, 'decoder_candidate_l2', [0.0, 1e-5, 1e-4, 1e-3, 1e-2], default=0.0)
# #decoder_gates_l2 = ordinal_pair(cs, 'decoder_gates_l2', [0.0, 1e-5, 1e-4, 1e-3, 1e-2], default=0.0)
#
# gate_dropout = OrdinalHyperparameter('gate_dropout', [0.5, 0.75, 0.9, 0.95, 1.0], default=1.0)
# encoder_dropout = OrdinalHyperparameter('encoder_dropout', [0.0, 0.05, 0.1, 0.25, 0.5, 0.75], default=0.0)
# #encoder_stability_loss = OrdinalHyperparameter('encoder_stability_loss', [0.0, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0], default=0.0)
# #decoder_stability_loss = OrdinalHyperparameter('decoder_stability_loss',
# # [0.0, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0],
# # default=0.0)
# cs.add_hyperparameters([#train_window,
# encoder_depth, decoder_depth, attention_depth, attention_heads,
# encoder_rnn_layers, decoder_rnn_layers,
# # encoder_readout_dropout,
# # gate_dropout, encoder_dropout,
# # encoder_stability_loss, decoder_stability_loss
# ])
# cs.add_condition(LessThanCondition(child=decoder_state_dropout_type[0], parent=decoder_state_dropout[0], value=1.0))
# cs.add_condition(AndConjunction(
# LessThanCondition(child=decoder_state_dropout_type[1], parent=decoder_state_dropout[1], value=1.0),
# GreaterThanCondition(child=decoder_state_dropout_type[1], parent=decoder_rnn_layers, value=1)))
#
# has_input_dropout_0 = LessThanCondition(child=decoder_variational_dropout[0], parent=decoder_input_dropout[0], value=1.0)
# has_input_dropout_1 = LessThanCondition(child=decoder_variational_dropout[1], parent=decoder_input_dropout[1],
# value=1.0)
# has_state_dropout_0 = LessThanCondition(child=decoder_variational_dropout[0], parent=decoder_state_dropout[0],
# value=1.0)
# has_state_dropout_1 = LessThanCondition(child=decoder_variational_dropout[1], parent=decoder_state_dropout[1],
# value=1.0)
# has_external_dropout_0 = EqualsCondition(child=decoder_variational_dropout[0], parent=decoder_state_dropout_type[0],
# value='outside')
# has_external_dropout_1 = EqualsCondition(child=decoder_variational_dropout[1], parent=decoder_state_dropout_type[1],
# value='outside')
# has_second_layer = GreaterThanCondition(child=decoder_variational_dropout[1], parent=decoder_rnn_layers, value=1)
#
# variational_dropout_cond_0 = OrConjunction(has_input_dropout_0, AndConjunction(has_state_dropout_0, has_external_dropout_0))
# cs.add_condition(variational_dropout_cond_0)
#
# variational_dropout_cond_1 = AndConjunction(OrConjunction(has_input_dropout_1,
# AndConjunction(has_state_dropout_1, has_external_dropout_1)),
# has_second_layer)
# cs.add_condition(variational_dropout_cond_1)
#
#
# cs.add_conditions([
# GreaterThanCondition(child=decoder_input_dropout[1], parent=decoder_rnn_layers, value=1),
# GreaterThanCondition(child=decoder_state_dropout[1], parent=decoder_rnn_layers, value=1),
# GreaterThanCondition(child=decoder_state_dropout[1], parent=decoder_rnn_layers, value=1),
# #GreaterThanCondition(child=decoder_candidate_l2[1], parent=decoder_rnn_layers, value=1),
# #GreaterThanCondition(child=decoder_gates_l2[1], parent=decoder_rnn_layers, value=1),
# GreaterThanCondition(child=encoder_dropout, parent=encoder_rnn_layers, value=1)
# ])
return cs
def apply_configuration(cfg:Configuration)-> training.HParams:
index_re = re.compile('(.+):(\d+)$')
params = def_params.copy()
for key in cfg.keys():
value = cfg[key]
if value is None:
continue
match = index_re.match(key)
if match:
name, idx = match.groups()
idx = int(idx)
values = params[name]
orig_type = type(values[0])
values[idx] = orig_type(value)
else:
orig_type = type(params[key])
params[key] = orig_type(value)
return training.HParams(**params)
if __name__ == '__main__':
c = SMAC_configuration()
print(c)
config = c.sample_configuration()
print(config)
print(apply_configuration(config))
# INFO:smac.intensification.intensification.Intensifier:Changes in incumbent:
# INFO:smac.intensification.intensification.Intensifier: decoder_activation_loss : 1e-05 -> 5e-05
# INFO:smac.intensification.intensification.Intensifier: decoder_stability_loss : 0.05 -> 0.0
# INFO:smac.intensification.intensification.Intensifier: decoder_state_dropout : 0.96 -> 0.995
# DEBUG:smac.intensification.intensification.Intensifier: decoder_variational_dropout remains unchanged: False
# INFO:smac.intensification.intensification.Intensifier: encoder_activation_loss : 5e-05 -> 1e-05
# INFO:smac.intensification.intensification.Intensifier: encoder_dropout : 0.07037477789188357 -> 0.030490422531402273
# INFO:smac.intensification.intensification.Intensifier: encoder_readout_dropout : 0.32175183536718266 -> 0.4768781146510798
# DEBUG:smac.intensification.intensification.Intensifier: encoder_stability_loss remains unchanged: 0.0
# INFO:smac.intensification.intensification.Intensifier: fingerprint_fc_dropout : 0.815060269058276 -> 0.8232342370695286
# INFO:smac.intensification.intensification.Intensifier: gate_dropout : 0.9388152550390564 -> 0.8961710392091516
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment