Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created January 16, 2021 11:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/9b701614a3d6940cb141251e1a3ad0a1 to your computer and use it in GitHub Desktop.
Save amankharwal/9b701614a3d6940cb141251e1a3ad0a1 to your computer and use it in GitHub Desktop.
import numpy as np # linear algebra
import pandas as pd # data processing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import matplotlib.style as style
from matplotlib import pyplot
from matplotlib.ticker import ScalarFormatter
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
from lightgbm import LGBMClassifier
import random
pd.set_option("display.max_rows",None)
pd.set_option("display.max_columns",None)
train_data = pd.read_csv("train.csv.zip")
test_data = pd.read_csv("test.csv.zip")
train_data.head()
train_data = pd.get_dummies(train_data, columns=["color"], prefix=["color"])
map_type = {"Ghoul":1, "Goblin":2, "Ghost":0} # change ghoul, goblin, ghost to 1, 2 and 0
train_data.loc[:, "type"] = train_data.type.map(map_type)
train_data = train_data.set_index('id')
X = train_data.drop(["type"],axis=1)
y = train_data.type
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
param_grid = {
'class_weight': [None, 'balanced'],
'boosting_type': ['gbdt', 'goss', 'dart'],
'num_leaves': list(range(30, 150)),
'learning_rate': [0.01,0.1,0.5],
'subsample_for_bin': [20000,50000,100000,120000,150000],
'min_child_samples': [20,50,100,200,500],
'colsample_bytree': [0.6,0.8,1],
"max_depth": [5,10,50,100]
}
lgbm = LGBMClassifier() # Shortname the LGBMClassifier()
lgbm.fit(X_train, y_train) # Train the lgbm on train sets
lgbm_tuned = LGBMClassifier(boosting_type = 'gbdt',
class_weight = None,
min_child_samples = 20,
num_leaves = 30,
subsample_for_bin = 20000,
learning_rate=0.01,
max_depth=10,
n_estimators=40,
colsample_bytree=0.6) # LightGBM Classifier with optimum paramteres
lgbm_tuned.fit(X_train, y_train)
y_test_pred = lgbm_tuned.predict(X_test) #Predicting X_test to find the solutiın
score = round(accuracy_score(y_test, y_test_pred), 3) # Find the accuracy of y_test and predicitons, and round the result
print(score)
sns.set_context("talk")
style.use('fivethirtyeight')
fi = pd.DataFrame()
fi['features'] = X.columns.values.tolist()
fi['importance'] = lgbm_tuned.booster_.feature_importance(importance_type='gain')
sns.barplot(x='importance', y='features', data=fi.sort_values(by='importance', ascending=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment