Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
accuracy_score,
precision_score,
recall_score,
confusion_matrix,
)
from sklearn.compose import ColumnTransformer, make_column_selector as selector
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, FunctionTransformer
import torch
import torch.nn as nn
import torch.nn.functional as F
from skorch import NeuralNetClassifier
# Our Custom Neural Net Classifier
class NN(nn.Module):
def __init__(
self, input_dim=7, first_layer_ns=25, second_layer_ns=25, output_dim=3
):
super(NN, self).__init__()
self.first_layer = nn.Linear(input_dim, first_layer_ns)
self.dropout = nn.Dropout(0.5)
self.second_layer = nn.Linear(first_layer_ns, second_layer_ns)
self.output = nn.Linear(second_layer_ns, output_dim)
self.softmax = nn.Softmax(dim=-1)
def forward(self, X, **kwargs):
f1 = self.first_layer(X)
p1 = F.relu(f1)
do = self.dropout(p1)
f2 = self.second_layer(do)
p2 = F.relu(f2)
output = self.output(p2)
X = self.softmax(output)
return X
# Get Data
PENGUINS_DATA = (
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
df = pd.read_csv(
PENGUINS_DATA,
)
FEATURES = [
"bill_length_mm",
"bill_depth_mm",
"flipper_length_mm",
"body_mass_g",
"sex",
"island",
]
TARGET = "species"
# Pre-prepocessing
df.dropna(axis="index", subset=FEATURES, how="all", inplace=True)
df[df.select_dtypes(include=["object"]).columns] = df.select_dtypes(
include=["object"]
).astype("category")
# Split Data
X, y = df[FEATURES], df[TARGET]
y = y.astype("category")
y_ = y.cat.codes # species names to category code
X_train, X_test, y_train, y_test = train_test_split(
X, y_, train_size=0.7, random_state=42, stratify=y
)
# initialize our
net = NeuralNetClassifier(
NN,
max_epochs=200,
lr=1e-1,
iterator_train__shuffle=True,
)
# numeric transformation
numeric_transformer = Pipeline(
steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", MinMaxScaler())]
)
# categorical transformation
categorical_transformer = Pipeline(
steps=[
("imputer", SimpleImputer(strategy="most_frequent")),
("onehot", OneHotEncoder(drop="first")),
]
)
# combine transformations
preprocessor = ColumnTransformer(
transformers=[
(
"num",
numeric_transformer,
selector(dtype_exclude="category"),
),
("cat", categorical_transformer, selector(dtype_include="category")),
]
)
# a pipeline with transformation and model
pipe = Pipeline(
[
("preprocess", preprocessor),
("transform", FunctionTransformer(np.float32)),
("net", net),
]
)
# preprocess data and train our model
_ = pipe.fit(X_train, torch.LongTensor(y_train))
yhat = pipe.predict(X_test)
print("Model Evaluation:")
print(
f"{(y_test.values == yhat).sum()} out of {y_test.shape[0]} correct predictions"
f" | Test Accuracy = {(y_test.values == yhat).sum()/ y_test.shape[0]:.2%}\n"
)
print(
f"Accuracy: {accuracy_score(y_test.values, yhat):.2%}"
f" | Recall: {recall_score(y_test.values, yhat, average='macro',):.2%} "
f"| Precision {precision_score(y_test.values, yhat, average='macro'):.2%}"
)
print("\nConfusion Matrix")
print(
pd.DataFrame(
confusion_matrix(y_test.values, yhat, labels=[0, 1, 2]),
columns=y.cat.categories,
index=y.cat.categories,
)
)
# print((y.cat.categories[yhat.numpy()] == 'Adelie').sum())
print("\nCorrect | Actual Species | Predicted Species")
for actual_species, predicted_species in zip(
y.cat.categories[y_test.values], y.cat.categories[yhat]
):
print(
f"{'✔️' if actual_species == predicted_species else '❌':<10} {actual_species:>5} {predicted_species:>16}"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment