Skip to content

Instantly share code, notes, and snippets.

@thejevans
Created March 10, 2018 21:58
Show Gist options
  • Save thejevans/dea252e5fca130651032dcdfa01c2029 to your computer and use it in GitHub Desktop.
Save thejevans/dea252e5fca130651032dcdfa01c2029 to your computer and use it in GitHub Desktop.
"""
John Evans
PHYS476
Homework 2
Problem 1
3/10/2018
"""
import sys
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
#fix random seed
seed = 7
np.random.seed(seed)
#import data
data = pd.read_excel(sys.argv[1])
#munge data
data = pd.get_dummies(data, columns = ['Genotype', 'Treatment', 'Behavior', 'class'])
del data['MouseID']
del data['BAD_N']
del data['BCL2_N']
del data['pS6_N']
del data['pCFOS_N']
del data['SYP_N']
del data['H3AcK18_N']
del data['EGR1_N']
del data['H3MeK4_N']
data = data.dropna()
#build training and testing sets
train = data.sample(frac = 9/10, axis = 0)
test = data.drop(train.index)
#split sets into inputs and outputs
output_cols = [x for x in list(data) if x.startswith('class')]
del data
def split_sample(sample, cols):
outputs = sample[cols].values
for x in cols:
del sample[x]
inputs = sample.values
return inputs, outputs, len(sample.columns), len(cols)
train_in, train_out, *_ = split_sample(train, output_cols)
test_in, test_out, *dims = split_sample(test, output_cols)
del train
del test
#build model
neigh = KNeighborsClassifier(n_neighbors=15)
#train model
neigh.fit(train_in, train_out)
#test model
scores = neigh.score(test_in, test_out)
#output results
print(scores)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment