Skip to content

Instantly share code, notes, and snippets.

@JoshuaC3
Created October 28, 2019 11:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoshuaC3/b3b656120540c2962aecc9dd49c07358 to your computer and use it in GitHub Desktop.
Save JoshuaC3/b3b656120540c2962aecc9dd49c07358 to your computer and use it in GitHub Desktop.
import numpy as np
import lightgbm as lgb
# generate simulation data
para=np.random.random((5000, 2))
data=np.zeros((10000,10))
for i in range(5000):
mu, sigma=para[i,:]
s=np.random.normal(mu, sigma, 1000)
data[i,:]=np.histogram(s, bins=10, density=False,range=[-1,1])[0]
data_shuffle=data[:5000,:].copy()
for i in range(5000):
np.random.shuffle(data_shuffle[i,:])
data[5000:,:]=data_shuffle
train_data = lgb.Dataset(data,label=[1]*5000+[0]*5000,free_raw_data=False)
train_data.save_binary('train_data.bin')
train_data = lgb.Dataset("train_data.bin")
subset_index=np.random.choice(np.arange(10000), 5000, replace=False)
subset_train_data_1=train_data.subset(subset_index).construct()
# generate new subset_index
subset_index=np.random.choice(np.arange(10000), 5000, replace=False)
subset_train_data_2=train_data.subset(subset_index).construct()
train_data_3 = lgb.Dataset(data,label=[1]*5000+[0]*5000,free_raw_data=False, reference=train_data)
subset_train_data_3=train_data_3.subset(subset_index).construct()
subset_train_data_4=lgb.Dataset(data[subset_index,:],label=np.array([1]*5000+[0]*5000)[subset_index],\
free_raw_data=False,reference=train_data).construct()
params = {
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': ["binary_error",'binary_logloss'],
'metric_freq': 10,
'num_leaves': 31,
'num_threads': 1,
'learning_rate': 0.1,
'feature_fraction': 1,
'boost_from_average': False,
'verbose': 1
}
# train using subset_train_data_1 and it works
gbm = lgb.train(params=params,
train_set=subset_train_data_1,
num_boost_round=10,
valid_sets=[train_data],
keep_training_booster=True)
# continue training with subset_train_data_2, fail
gbm = lgb.train(params=params,
train_set=subset_train_data_2,
num_boost_round=10,
valid_sets=[train_data],
init_model=gbm)
# continue training with subset_train_data_3, fail
gbm = lgb.train(params=params,
train_set=subset_train_data_3,
num_boost_round=10,
valid_sets=[train_data],
init_model=gbm)
# continue training with subset_train_data_4, fail
gbm = lgb.train(params=params,
train_set=subset_train_data_4,
num_boost_round=10,
valid_sets=[train_data],
init_model=gbm)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment