Skip to content

Instantly share code, notes, and snippets.

@benjamincohen1
Created June 16, 2017 03:00
Show Gist options
  • Save benjamincohen1/47088ae0ffe41b05ee8f6bc1bf020787 to your computer and use it in GitHub Desktop.
Save benjamincohen1/47088ae0ffe41b05ee8f6bc1bf020787 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 8 21:46:30 2017
@author: ben
"""
import csv
from first_class import mean
import matplotlib.pyplot as plt
myfile = open('train.csv')
def get_feature_vector(datapoint):
# take in a datapoint, return a list of feature values
pclass = int(datapoint['Pclass'])
age = fix_age(datapoint['Age'])
if datapoint['Sex'] == 'male':
sex = 1
else:
sex = 0
parents = int(line['Parch'])
return [pclass, age, sex, parents]
def fix_age(value):
if value == '':
return 29.5
else:
return float(value)
#yesses = []
#nos = []
datapoints = []
corrects = []
mycsvreader = csv.DictReader(myfile)
for line in mycsvreader:
survived= line['Survived']
# pclass = line['Pclass']
age = fix_age(line['Age'])
if survived == '1':
corrects.append(True)
# yesses.append(int(age))
else:
corrects.append(False)
# nos.append(int(age))
datapoints.append(get_feature_vector(line))
#instead of manually writing a prediction func, use machine learning
# to do it for us
train_points = datapoints[:800]
test_points = datapoints[800:]
train_answers = corrects[:800]
test_answers = corrects[800:]
#from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=3, weights='distance')
classifier.fit(train_points, train_answers)
predictions = classifier.predict(test_points)
num_correct = 0.0
for x in range(len(predictions)):
if predictions[x] == test_answers[x]:
num_correct += 1 # same thing as num_correct = num_correct + 1
print 'You got ' + str(num_correct * 100 /len(test_answers)) + ' right'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment