Skip to content

Instantly share code, notes, and snippets.

@vikramsoni2
Created February 27, 2019 11:46
Show Gist options
  • Save vikramsoni2/001709817720c442be4f3139d85a5b7f to your computer and use it in GitHub Desktop.
Save vikramsoni2/001709817720c442be4f3139d85a5b7f to your computer and use it in GitHub Desktop.
import optuna
from collections import Counter
# optuna
predictors, target, key_id, entity, timestamp, order_by_entity, group_within_entity = build_predictors_naming_conventions(list(df_train.columns.values))
def get_pos_weight(train, label=target):
w = Counter(train[label])
return round((w[0]/w[1])*0.90,2)
train = df_train[predictors].copy()
groups = df_train[entity]
label = df_train[target].copy()
SEED = 2019
FOLDS = 5
pos_weight = get_pos_weight(df_train)
for col in [v for v in predictors if v.startswith('i_c_')]:
train[col] = train[col].astype('category')
def optuna_lightgbm(trial):
seed=SEED
num_folds = FOLDS
# params optimized by optuna
learning_rate_tuna = trial.suggest_uniform('learning_rate', 0.001, 1.0)
min_child_weight_tuna = trial.suggest_int('min_child_weight', 5, 500)
min_child_samples_tuna = trial.suggest_int('min_child_samples', 10, 200)
min_data_in_leaf_tuna = trial.suggest_int('min_data_in_leaf', 10, 200)
num_leaves_tuna=trial.suggest_int('num_leaves', 5, 1000)
scale_pos_weight_tuna=trial.suggest_uniform('scale_pos_weight', pos_weight*0.8, pos_weight*1.2)
min_gain_split_tuna=trial.suggest_uniform('min_gain_split', 0, 100)
reg_lambda_tuna=trial.suggest_uniform('reg_lambda', 5, 500)
subsample_tuna = trial.suggest_uniform('sub_sample', 0, 1.0)
reg_alpha_tuna=trial.suggest_uniform('reg_alpha', 0, 20)
colsample_bytree_tuna = trial.suggest_uniform('colsample_bytree', 0, 1.0)
max_depth_tuna= trial.suggest_int('max_depth', 5, 100)
param ={'task': 'train',
'objective': 'binary',
'learning_rate': learning_rate_tuna ,
'subsample': subsample_tuna,
'max_depth': max_depth_tuna,
'num_leaves': num_leaves_tuna,
'min_child_weight': min_child_weight_tuna,
'min_child_samples': min_child_samples_tuna,
'scale_pos_weight' : scale_pos_weight_tuna,
'reg_alpha': reg_alpha_tuna,
'colsample_bytree':colsample_bytree_tuna ,
'min_split_gain': min_gain_split_tuna,
'reg_lambda': reg_lambda_tuna,
'min_data_in_leaf': min_data_in_leaf_tuna,
'verbose': -1,
'seed':seed,
'bagging_seed':seed,
'drop_seed':seed,
'n_estimators': 2000,
'subsample_freq': 2,
'metric_freq': 10,
'n_jobs' : 6
}
# Create arrays and dataframes to store results
oof = np.zeros(len(train))
start = time.time()
folds = GroupKFold(n_splits=FOLDS).split(train, label, groups)
start = time.time()
for fold_, (trn_idx, val_idx) in enumerate(folds):
model = lgb.LGBMClassifier(**param)
model.fit(train.iloc[trn_idx].values, label.iloc[trn_idx].values,
eval_set=[(train.iloc[val_idx].values, label.iloc[val_idx].values)],
early_stopping_rounds=50, verbose=0, eval_metric='auc')
oof[val_idx] = model.predict_proba(train.iloc[val_idx], model.best_iteration_)[:, 1]
oof_loss = roc_auc_score(label, oof)
gc.collect()
return 1 - oof_loss
optuna.logging.set_verbosity(optuna.logging.INFO)
study = optuna.create_study()
study.optimize(optuna_lightgbm, n_trials=100)
print('Best trial:')
study.best_trial
print(' Value: {}'.format(study.best_trial.value))
print(' Params: ')
for key, value in study.best_trial.params.items():
print(' {}: {}'.format(key, value))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment