Skip to content

Instantly share code, notes, and snippets.

@benjamincohen1
Created April 21, 2017 03:04
Show Gist options
  • Save benjamincohen1/a47b881369d588fd2eb404c52e9bb88b to your computer and use it in GitHub Desktop.
Save benjamincohen1/a47b881369d588fd2eb404c52e9bb88b to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 20 20:53:05 2017
@author: ben
"""
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 13 21:51:23 2017
@author: ben
"""
import csv
import numpy
import matplotlib.pyplot as plt
myfile = open('train.csv')
myreader = csv.DictReader(myfile)
avg_age = 29.5
def get_age(line):
if line['Age'] == '':
age = avg_age
else:
age = float(line['Age'])
return age
def get_feature_vector(datapoint):
age = get_age(datapoint)
pclass = int(line['Pclass'])
if datapoint['Sex'] == 'female':
return [age, 0, pclass]
else:
return [age, 1, pclass]
features_list = []
true_values = []
#survivor_ages = []
#other_ages = []
for line in myreader:
features = get_feature_vector(line)
features_list.append(features)
#
if line['Survived'] == '1':
survived = True
true_values.append(True)
else:
survived = False
true_values.append(False)
#
# if line['Sex'] == 'male':
# males.append(survived)
# else:
# females.append(survived)
#from sklearn.linear_model import LogisticRegression
#mymodel = LogisticRegression()
from sklearn.neighbors import KNeighborsClassifier
mymodel = KNeighborsClassifier(n_neighbors=3, weights='distance')
mymodel.fit(features_list, true_values)
predictions = mymodel.predict(features_list)
correct = 0.0
for i in range(len(predictions)):
if predictions[i] == true_values[i]:
correct += 1
print 'Accuracy: ', (correct/len(predictions)) * 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment