Skip to content

Instantly share code, notes, and snippets.

@miki998
Last active March 20, 2020 22:17
Show Gist options
  • Save miki998/de9de8228af7c66208f9e4e12c6f93b9 to your computer and use it in GitHub Desktop.
Save miki998/de9de8228af7c66208f9e4e12c6f93b9 to your computer and use it in GitHub Desktop.
#imported libs
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
import sys
train = pd.read_csv("train.csv")
X = train.drop(['DEFCON_Level','ID'],axis=1)
y = train['DEFCON_Level']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#For classification
#Random Search
xgb_pipeline = Pipeline([('scaler', StandardScaler()), ('classifier',XGBClassifier())])
params = {
'min_child_weight': [1, 5, 10],
'gamma': [0.5, 1, 1.5, 2, 5],
'subsample': [0.6, 0.8, 1.0],
'colsample_bytree': [0.6, 0.8, 1.0],
'max_depth': [3, 4, 5]
}
random_search = RandomizedSearchCV(xgb_pipeline, param_distributions=params, n_iter=100,
scoring='f1_weighted', n_jobs=4, verbose=3, random_state=1001 )
random_search.fit(X_train,y_train)
#OR
#Grid Search
xgb_pipeline = Pipeline([('scaler', StandardScaler()), ('classifier',XGBClassifier())])
gbm_param_grid = {
'classifier__learning_rate': np.array([0.01,0.001]),
'classifier__n_estimators': np.array([100,200,300,400]),
'classifier__subsample': np.array([0.7,0.8,0.9]),
'classifier__max_depth': np.array([10,11,12,13,14,15,16,17]),
'classifier__lambda': np.array([1]),
'classifier__gamma': np.array([0])
#'classifier__colsample_bytree': np.arange(0,1.1,.2)
}
grid_search = GridSearchCV(estimator=xgb_pipeline, param_grid=gbm_param_grid, n_jobs= -1,
scoring='f1_weighted', verbose=10)
grid_search.fit(X_train,y_train)
#Print out best parameters
print(random_search.best_params_)
print(grid_search.best_params_)
#Print out scores on validation set
print(random_search.score(X_test,y_test))
print(grid_search.score(X_test,y_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment