benjamincohen1/class3.py

## class3.py
# -*- coding: utf-8 -*-
"""
Created on Thu Jun  8 21:46:30 2017

@author: ben
"""
import csv
from first_class import mean

import matplotlib.pyplot as plt

myfile = open('train.csv')

def get_feature_vector(datapoint):
    # take in a datapoint, return a list of feature values

    pclass = int(datapoint['Pclass'])
    age = fix_age(datapoint['Age'])
    if datapoint['Sex'] == 'male':
        sex = 1
    else:
        sex = 0
    parents = int(line['Parch'])
    return [pclass, age, sex, parents]

def fix_age(value):
    if value == '':
        return 29.5
    else:
        return float(value)

#yesses = []
#nos = []
datapoints = []
corrects = []

mycsvreader = csv.DictReader(myfile)
for line in mycsvreader:
    survived= line['Survived']
#    pclass = line['Pclass']
    age = fix_age(line['Age'])
    if survived == '1':
        corrects.append(True)
#        yesses.append(int(age))
    else:
        corrects.append(False)
#        nos.append(int(age))
    datapoints.append(get_feature_vector(line))

#instead of manually writing a prediction func, use machine learning
# to do it for us

train_points = datapoints[:800]
test_points = datapoints[800:]

train_answers = corrects[:800]
test_answers = corrects[800:]

#from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=3, weights='distance')
classifier.fit(train_points, train_answers)

predictions = classifier.predict(test_points)

num_correct = 0.0
for x in range(len(predictions)):
    if predictions[x] == test_answers[x]:
        num_correct += 1  # same thing as num_correct = num_correct + 1
print 'You got ' + str(num_correct * 100 /len(test_answers)) + ' right'
	# -- coding: utf-8 --
	"""
	Created on Thu Jun 8 21:46:30 2017

	@author: ben
	"""
	import csv
	from first_class import mean

	import matplotlib.pyplot as plt

	myfile = open('train.csv')

	def get_feature_vector(datapoint):
	# take in a datapoint, return a list of feature values

	pclass = int(datapoint['Pclass'])
	age = fix_age(datapoint['Age'])
	if datapoint['Sex'] == 'male':
	sex = 1
	else:
	sex = 0
	parents = int(line['Parch'])
	return [pclass, age, sex, parents]

	def fix_age(value):
	if value == '':
	return 29.5
	else:
	return float(value)

	#yesses = []
	#nos = []
	datapoints = []
	corrects = []

	mycsvreader = csv.DictReader(myfile)
	for line in mycsvreader:
	survived= line['Survived']
	# pclass = line['Pclass']
	age = fix_age(line['Age'])
	if survived == '1':
	corrects.append(True)
	# yesses.append(int(age))
	else:
	corrects.append(False)
	# nos.append(int(age))
	datapoints.append(get_feature_vector(line))

	#instead of manually writing a prediction func, use machine learning
	# to do it for us

	train_points = datapoints[:800]
	test_points = datapoints[800:]

	train_answers = corrects[:800]
	test_answers = corrects[800:]

	#from sklearn.linear_model import LogisticRegression
	from sklearn.neighbors import KNeighborsClassifier
	classifier = KNeighborsClassifier(n_neighbors=3, weights='distance')
	classifier.fit(train_points, train_answers)

	predictions = classifier.predict(test_points)

	num_correct = 0.0
	for x in range(len(predictions)):
	if predictions[x] == test_answers[x]:
	num_correct += 1 # same thing as num_correct = num_correct + 1
	print 'You got ' + str(num_correct * 100 /len(test_answers)) + ' right'