This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from matplotlib import rcParams | |
rcParams['axes.spines.top'] = False | |
rcParams['axes.spines.right'] = False | |
df = pd.read_csv('winequality-white.csv', sep=';') | |
df.head() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X_train, X_test, y_train, y_test = train_test_split( | |
X_sm, y_sm, test_size=0.25, random_state=42 | |
) | |
model = RandomForestClassifier(random_state=42) | |
model.fit(X_train, y_train) | |
preds = model.predict(X_test) | |
print(f'Accuracy = {accuracy_score(y_test, preds):.2f}\nRecall = {recall_score(y_test, preds):.2f}\n') | |
cm = confusion_matrix(y_test, preds) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from imblearn.over_sampling import SMOTE | |
sm = SMOTE(random_state=42) | |
X_sm, y_sm = sm.fit_resample(X, y) | |
print(f'''Shape of X before SMOTE: {X.shape} | |
Shape of X after SMOTE: {X_sm.shape}''') | |
print('\nBalance of positive and negative classes (%):') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix | |
# Train | |
model = RandomForestClassifier(random_state=42) | |
model.fit(X_train, y_train) | |
preds = model.predict(X_test) | |
# Evaluate | |
print(f'Accuracy = {accuracy_score(y_test, preds):.2f}\nRecall = {recall_score(y_test, preds):.2f}\n') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
X = merged.drop('TARGET', axis=1) | |
y = merged['TARGET'] | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.25, random_state=42 | |
) | |
print(f'''% Positive class in Train = {np.round(y_train.value_counts(normalize=True)[1] * 100, 2)} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import MinMaxScaler | |
# Scale only columns that have values greater than 1 | |
to_scale = [col for col in df.columns if df[col].max() > 1] | |
mms = MinMaxScaler() | |
scaled = mms.fit_transform(merged[to_scale]) | |
scaled = pd.DataFrame(scaled, columns=to_scale) | |
# Replace original columns with scaled ones | |
for col in scaled: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Remap to integers | |
df['GENDER'] = [0 if x == 'M' else 1 for x in df['GENDER']] | |
df['CAR'] = [1 if x == 'Y' else 0 for x in df['CAR']] | |
df['REALITY'] = [1 if x == 'Y' else 0 for x in df['REALITY']] | |
# Create dummy variables | |
dummy_income_type = pd.get_dummies(df['INCOME_TYPE'], prefix='INC_TYPE', drop_first=True) | |
dummy_edu_type = pd.get_dummies(df['EDUCATION_TYPE'], prefix='EDU_TYPE', drop_first=True) | |
dummy_family_type = pd.get_dummies(df['FAMILY_TYPE'], prefix='FAM_TYPE', drop_first=True) | |
dummy_house_type = pd.get_dummies(df['HOUSE_TYPE'], prefix='HOUSE_TYPE', drop_first=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ax = df['TARGET'].value_counts().plot(kind='bar', figsize=(10, 6), fontsize=13, color='#087E8B') | |
ax.set_title('Credit card fraud (0 = normal, 1 = fraud)', size=20, pad=30) | |
ax.set_ylabel('Number of transactions', fontsize=14) | |
for i in ax.patches: | |
ax.text(i.get_x() + 0.19, i.get_height() + 700, str(round(i.get_height(), 2)), fontsize=15) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
df = pd.read_csv('credit_dataset.csv') | |
df.head() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure() | |
ax, fig = joyplot( | |
data=sydney[['MinTemp', 'MaxTemp', 'Month']], | |
by='Month', | |
column=['MinTemp', 'MaxTemp'], | |
color=['#686de0', '#eb4d4b'], | |
legend=True, | |
alpha=0.85, | |
figsize=(12, 8) |