Skip to content

Instantly share code, notes, and snippets.

@thejevans
Last active March 10, 2018 21:44
Show Gist options
  • Save thejevans/2cec098e295e598915ca39242c1ec8f5 to your computer and use it in GitHub Desktop.
Save thejevans/2cec098e295e598915ca39242c1ec8f5 to your computer and use it in GitHub Desktop.
"""
John Evans
PHYS476
Homework 2
Problem 2
3/10/2018
"""
import sys
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
import pandas as pd
import numpy as np
#fix random seed
seed = 7
np.random.seed(seed)
#import data
header_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num']
data = pd.read_csv(sys.argv[1], header = None, names = header_names)
#munge data
del data['slope']
del data['ca']
del data['thal']
data = data.replace('?', float('NaN'))
data = data.dropna()
#build training and testing sets
train = data.sample(frac = 9/10, axis = 0)
test = data.drop(train.index)
#split sets into inputs and outputs
output_cols = ['num']
del data
def split_sample(sample, cols):
outputs = sample[cols].values
for x in cols:
del sample[x]
inputs = sample.values
return inputs, outputs.T[0], len(sample.columns), len(cols)
train_in, train_out, *_ = split_sample(train, output_cols)
test_in, test_out, *dims = split_sample(test, output_cols)
del train
del test
#build models
classifiers = [DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=20,
min_samples_split=2, min_samples_leaf=1,
min_weight_fraction_leaf=0.0, max_features=4,
random_state=None, max_leaf_nodes=None,
class_weight=None, presort=False),
RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=50,
min_samples_split=2, min_samples_leaf=1,
min_weight_fraction_leaf=0.0, max_features=1,
max_leaf_nodes=None, bootstrap=True, oob_score=False,
n_jobs=1, random_state=None, verbose=0, warm_start=False,
class_weight=None),
GaussianNB(priors=None)]
#train and test models, print results
for x in classifiers:
x.fit(train_in, train_out)
print(x.score(test_in, test_out))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment