Skip to content

Instantly share code, notes, and snippets.

@steermomo
Created March 12, 2019 03:24
Show Gist options
  • Save steermomo/29738f49be24ed09774cf523aca46ff1 to your computer and use it in GitHub Desktop.
Save steermomo/29738f49be24ed09774cf523aca46ff1 to your computer and use it in GitHub Desktop.
BayesianOptimization lightgbm
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, auc, roc_auc_score
import datetime
import argparse
import pickle
import data_loader
import warnings
from time import time
import pprint
from functools import partial
from bayes_opt import BayesianOptimization
warnings.filterwarnings("ignore")
parser = argparse.ArgumentParser()
parser.add_argument('--jobs', type=int, help='display an integer', default=8)
args = parser.parse_args()
n_jobs = args.jobs
n_class = 2
def obj_fun(
learning_rate,
num_leaves, # int
feature_fraction, # int
bagging_freq, # int
bagging_fraction,
min_data_in_leaf, # int
min_sum_hessian_in_leaf,
X, y # data
):
num_leaves, min_data_in_leaf, bagging_freq = map(int, map(
round, [num_leaves, min_data_in_leaf, bagging_freq]))
param = {
'learning_rate': learning_rate,
'num_leaves': num_leaves, # int
'max_depth': -1, # int
'feature_fraction': feature_fraction, # int
'bagging_freq': bagging_freq, # int
'bagging_fraction': bagging_fraction,
'min_data_in_leaf': min_data_in_leaf, # int
'min_sum_hessian_in_leaf': min_sum_hessian_in_leaf,
'n_estimators': 1000000,
'metric': 'auc',
'objective': 'binary',
'verbose': -1,
'n_jobs': n_jobs,
}
skf = StratifiedKFold(n_splits=5, random_state=np.random.randint(900))
val_prob = np.zeros((X.shape[0]), dtype=np.float) # n_sampless * 1
for i, (train_index, val_index) in enumerate(skf.split(X, y)):
X_train, X_val = X[train_index], X[val_index]
y_train, y_val = y[train_index], y[val_index]
model = lgb.LGBMClassifier(**param)
model.fit(X_train, y_train,
eval_set=[(X_val, y_val)],
verbose=-1,
early_stopping_rounds=3000)
val_prob[val_index] = model.predict_proba(X_val)[:, 1] # 验证集上的概率
return roc_auc_score(y, val_prob)
if __name__ == "__main__":
X, y = data_loader.read_data()
x_t, idc = data_loader.read_test_data()
search_spaces = {
'learning_rate': (0.01, 0.005),
'num_leaves': (2, 500),
'feature_fraction': (0, 0.5),
'bagging_freq': (3, 10),
'bagging_fraction': (0.2, 0.8),
'min_data_in_leaf': (50, 100),
'min_sum_hessian_in_leaf': (5, 20)
}
black_box_function = partial(obj_fun, X=X, y=y) # pass data first
optimizer = BayesianOptimization(
f=black_box_function,
pbounds=search_spaces,
verbose=2,
random_state=7,
)
optimizer.probe(
params={"num_leaves": 13, "feature_fraction": 0.05,
'learning_rate':0.01,
'bagging_freq': 5, 'bagging_fraction':0.4, 'min_data_in_leaf': 80, 'min_sum_hessian_in_leaf': 10.0},
lazy=True,
)
optimizer.maximize(init_points=5, n_iter=50)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment