Last active
March 20, 2020 22:17
-
-
Save miki998/de9de8228af7c66208f9e4e12c6f93b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#imported libs | |
import numpy as np | |
import pandas as pd | |
from xgboost import XGBClassifier | |
import matplotlib.pyplot as plt | |
from scipy import stats | |
import seaborn as sns | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.pipeline import Pipeline | |
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV | |
import sys | |
train = pd.read_csv("train.csv") | |
X = train.drop(['DEFCON_Level','ID'],axis=1) | |
y = train['DEFCON_Level'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) | |
#For classification | |
#Random Search | |
xgb_pipeline = Pipeline([('scaler', StandardScaler()), ('classifier',XGBClassifier())]) | |
params = { | |
'min_child_weight': [1, 5, 10], | |
'gamma': [0.5, 1, 1.5, 2, 5], | |
'subsample': [0.6, 0.8, 1.0], | |
'colsample_bytree': [0.6, 0.8, 1.0], | |
'max_depth': [3, 4, 5] | |
} | |
random_search = RandomizedSearchCV(xgb_pipeline, param_distributions=params, n_iter=100, | |
scoring='f1_weighted', n_jobs=4, verbose=3, random_state=1001 ) | |
random_search.fit(X_train,y_train) | |
#OR | |
#Grid Search | |
xgb_pipeline = Pipeline([('scaler', StandardScaler()), ('classifier',XGBClassifier())]) | |
gbm_param_grid = { | |
'classifier__learning_rate': np.array([0.01,0.001]), | |
'classifier__n_estimators': np.array([100,200,300,400]), | |
'classifier__subsample': np.array([0.7,0.8,0.9]), | |
'classifier__max_depth': np.array([10,11,12,13,14,15,16,17]), | |
'classifier__lambda': np.array([1]), | |
'classifier__gamma': np.array([0]) | |
#'classifier__colsample_bytree': np.arange(0,1.1,.2) | |
} | |
grid_search = GridSearchCV(estimator=xgb_pipeline, param_grid=gbm_param_grid, n_jobs= -1, | |
scoring='f1_weighted', verbose=10) | |
grid_search.fit(X_train,y_train) | |
#Print out best parameters | |
print(random_search.best_params_) | |
print(grid_search.best_params_) | |
#Print out scores on validation set | |
print(random_search.score(X_test,y_test)) | |
print(grid_search.score(X_test,y_test)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment