Skip to content

Instantly share code, notes, and snippets.

@dev-tyta
Last active November 9, 2022 21:49
Show Gist options
  • Save dev-tyta/b7349ac0fe4096b2e339e991727887d1 to your computer and use it in GitHub Desktop.
Save dev-tyta/b7349ac0fe4096b2e339e991727887d1 to your computer and use it in GitHub Desktop.
# importing python libraries
import pandas as pd
import pickle as pkl
import lightgbm as lgb
from lightgbm.sklearn import LGBMClassifier
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import RobustScaler, OrdinalEncoder
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings("ignore")
# loading diabetes data into variable data
data = pd.read_csv("./dataset/diabetes.csv")
# wrangling and feature extraction.
data.chol_hdl_ratio = round(data.cholesterol / data.hdl_chol, 2)
data.waist_hip_ratio = round(data.waist / data.hip, 2)
# correcting comma separated number to decimal separated number.
data.bmi = pd.to_numeric(data.bmi.str.replace(",", "."))
print(data.head())
# encoding columns with object values using Ordinal Encoding
s = (data.dtypes == "object")
obj_col = s[s].index
print("Ordinal Encoding")
orde = OrdinalEncoder()
data[obj_col] = orde.fit_transform(data[obj_col])
print("Splitting features and target.")
# dropping off target and unnecessary columns (diabetes and patient number columns)
X = data.drop(["patient_number", "diabetes"], axis=1)
y = data.diabetes
print("Robust Scaling on X, y.")
# scaling data using RobustScaler
scale = RobustScaler()
scaled_X = scale.fit_transform(X, y)
print("Stratified Split.")
# StratifiedShuffleSplit on Data
split = StratifiedShuffleSplit(n_splits=4, random_state=42)
for train_index, test_index in split.split(scaled_X, y):
X_train, X_test = scaled_X[train_index], scaled_X[test_index]
y_train, y_test = y[train_index], y[test_index]
# Loading LightGBM classifier to be used for training model
lgbm = LGBMClassifier(n_estimators=200, max_depth=-2, random_state=42)
lgbm.fit(X_train, y_train)
pred = lgbm.predict(X_test)
f1 = f1_score(pred, y_test)
print(f"F1 Score for LightGBM: {f1}.")
# Using pickle to save model
lightgbm = open("../deployment/lightgbm.pickle", "wb")
pkl.dump(lgbm, lightgbm)
lightgbm.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment