Last active
December 14, 2022 22:24
-
-
Save Wann-Jiun/d5e6f55682eb5ef21f8cd2e46c0b6cc1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# creating matrices for sklearn: | |
x_train = np.array(train_df_munged) | |
x_test = np.array(test_df_munged) | |
y_train = label_df.values | |
ntrain = x_train.shape[0] | |
ntest = x_test.shape[0] | |
kf = KFold(ntrain, n_folds=NFOLDS, shuffle=True, random_state=SEED) | |
class SklearnWrapper(object): | |
def __init__(self, clf, seed=0, params=None): | |
params['random_state'] = seed | |
self.clf = clf(**params) | |
def train(self, x_train, y_train): | |
self.clf.fit(x_train, y_train) | |
def predict(self, x): | |
return self.clf.predict(x) | |
class XgbWrapper(object): | |
def __init__(self, seed=0, params=None): | |
self.param = params | |
self.param['seed'] = seed | |
self.nrounds = params.pop('nrounds', 250) | |
def train(self, x_train, y_train): | |
dtrain = xgb.DMatrix(x_train, label=y_train) | |
self.gbdt = xgb.train(self.param, dtrain, self.nrounds) | |
def predict(self, x): | |
return self.gbdt.predict(xgb.DMatrix(x)) | |
def get_oof(clf): | |
oof_train = np.zeros((ntrain,)) | |
oof_test = np.zeros((ntest,)) | |
oof_test_skf = np.empty((NFOLDS, ntest)) | |
for i, (train_index, test_index) in enumerate(kf): | |
x_tr = x_train[train_index] | |
y_tr = y_train[train_index] | |
x_te = x_train[test_index] | |
clf.train(x_tr, y_tr) | |
oof_train[test_index] = clf.predict(x_te) | |
oof_test_skf[i, :] = clf.predict(x_test) | |
oof_test[:] = oof_test_skf.mean(axis=0) | |
return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1) | |
xgb_params = { | |
'seed': 0, | |
'colsample_bytree': 0.7, | |
'silent': 1, | |
'subsample': 0.7, | |
'learning_rate': 0.075, | |
'objective': 'reg:linear', | |
'max_depth': 4, | |
'num_parallel_tree': 1, | |
'min_child_weight': 1, | |
'eval_metric': 'rmse', | |
'nrounds': 500 | |
} | |
rd_params={'alpha': 10} | |
ls_params={'alpha': 0.05} | |
gbm_params={ } | |
xg = XgbWrapper(seed=SEED, params=xgb_params) | |
rd = SklearnWrapper(clf=Ridge, seed=SEED, params=rd_params) | |
ls = SklearnWrapper(clf=Lasso, seed=SEED, params=ls_params) | |
gbm = SklearnWrapper(clf=GradientBoostingRegressor, seed=SEED, params=gbm_params) | |
xg_oof_train, xg_oof_test = get_oof(xg) | |
rd_oof_train, rd_oof_test = get_oof(rd) | |
ls_oof_train, ls_oof_test = get_oof(ls) | |
gbm_oof_train, gbm_oof_test = get_oof(gbm) | |
print("XG-CV: {}".format(sqrt(mean_squared_error(y_train, xg_oof_train)))) | |
print("RD-CV: {}".format(sqrt(mean_squared_error(y_train, rd_oof_train)))) | |
print("LS-CV: {}".format(sqrt(mean_squared_error(y_train, ls_oof_train)))) | |
print("GB-CV: {}".format(sqrt(mean_squared_error(y_train, gbm_oof_train)))) | |
x_train = np.concatenate((xg_oof_train, rd_oof_train, ls_oof_train, gbm_oof_train), axis=1) | |
x_test = np.concatenate((xg_oof_test, rd_oof_test, ls_oof_test, gbm_oof_test), axis=1) | |
print("{},{}".format(x_train.shape, x_test.shape)) | |
dtrain = xgb.DMatrix(x_train, label=y_train) | |
dtest = xgb.DMatrix(x_test) | |
xgb_params = { | |
'seed': 0, | |
'colsample_bytree': 0.8, | |
'silent': 1, | |
'subsample': 0.6, | |
'learning_rate': 0.01, | |
'objective': 'reg:linear', | |
'max_depth': 1, | |
'num_parallel_tree': 1, | |
'min_child_weight': 1, | |
'eval_metric': 'rmse', | |
} | |
res = xgb.cv(xgb_params, dtrain, num_boost_round=1000, nfold=4, seed=SEED, stratified=False, | |
early_stopping_rounds=25, verbose_eval=10, show_stdv=True) | |
best_nrounds = res.shape[0] - 1 | |
cv_mean = res.iloc[-1, 0] | |
cv_std = res.iloc[-1, 1] | |
print('Ensemble-CV: {0} + {1}'.format(cv_mean, cv_std)) | |
gbdt = xgb.train(xgb_params, dtrain, best_nrounds) | |
output_file = 'oof_stacking' | |
final_file = '0110_'+ output_file +'.csv' | |
submission = pd.read_csv(SUBMISSION_FILE) | |
submission.iloc[:, 1] = gbdt.predict(dtest) | |
saleprice = np.exp(submission['SalePrice']) | |
submission['SalePrice'] = saleprice | |
submission.to_csv(path+final_file, index=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment