Skip to content

Instantly share code, notes, and snippets.

View douglaspsteen's full-sized avatar

Doug Steen douglaspsteen

View GitHub Profile
import numpy as np
import pandas as pd
# Visualization
import matplotlib.pyplot as plt
# Data processing, modeling, and model evaluation
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
# Load and check data
df = pd.read_csv('data.csv')
display(df.head())
# Drop first column of id information
df = df.drop('Unnamed: 0', axis=1)
# Re-cast target variable (y) as either having a seizure (1) and all else (0)
for i in range(len(df)):
if df.iloc[i]['y'] != 1:
df.at[i, 'y'] = 0
# Fill null values with each column mean
df = df.fillna(df.mean())
# Define X and y
X = df.drop('y', axis=1)
y = df.y
# Visualize class distribution
# Standardize data
scaler = StandardScaler()
X = scaler.fit_transform(X)
# PCA
pca = PCA(n_components=178)
pca.fit(X)
X_pca = pca.transform(X)
# Calculate cumulative explained variance across all PCs
cum_exp_var = []
var_exp = 0
for i in pca.explained_variance_ratio_:
var_exp += i
cum_exp_var.append(var_exp)
# Plot cumulative explained variance for all PCs
train_f1 = []
test_f1 = []
for i in range(20):
X = X_pca[:,0:i+1]
# Train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25,
X = X_pca[:,0:4]
# Train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25,
random_state=42)
# Perform feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
plot_confusion_matrix(knn, X_test, y_test, display_labels=['No Seizure',
'Seizure'],
normalize='true', cmap='Blues');