Created
April 22, 2016 03:03
-
-
Save benjamincohen1/3c29fdafbaddae1cf86081d3500f814d to your computer and use it in GitHub Desktop.
week 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Thu Apr 21 21:45:14 2016 | |
@author: ben | |
""" | |
import csv | |
import matplotlib.pyplot as plt | |
my_file = csv.DictReader(open('train.csv', 'rw')) | |
#http://scikit-learn.org/ | |
def clean_data(datapoint, average): | |
###fill in missingage and convert to floats | |
age = datapoint[0] | |
pclass = datapoint[1] | |
cleaned_up = [] | |
if age == "": | |
age = average | |
else: | |
age = float(age) | |
pclass = int(pclass) | |
cleaned_up.append(age) | |
cleaned_up.append(pclass) | |
return cleaned_up | |
#def predict(datapoint): | |
# age = datapoint[0] | |
# pclass = datapoint[1] | |
# if age >= 60: | |
# return '0' | |
# if pclass == 1: | |
# return '1' | |
# elif pclass == 3: | |
# return '0' | |
# elif pclass == 2: | |
# if age <= 15: | |
# return '1' | |
# else: | |
# return '0' | |
information = [] | |
outcomes = [] | |
predictions = [] | |
for line in my_file: | |
datapoint = [line['Age'], line['Pclass']] | |
information.append(clean_data(datapoint, 29)) | |
# pred = predict(clean_data(datapoint, 29)) | |
# predictions.append(pred) | |
outcomes.append(int(line['Survived'])) | |
from sklearn import linear_model | |
regr = linear_model.LinearRegression() | |
regr.fit(information, outcomes) | |
def predict(datapoint): | |
pred = regr.predict([datapoint]) | |
if pred > .5: | |
return 1 | |
else: | |
return 0 | |
my_file = csv.DictReader(open('train.csv', 'rw')) | |
information = [] | |
outcomes = [] | |
predictions = [] | |
for line in my_file: | |
datapoint = [line['Age'], line['Pclass']] | |
pred = predict(clean_data(datapoint, 29)) | |
predictions.append(pred) | |
outcomes.append(int(line['Survived'])) | |
from sklearn.metrics import accuracy_score | |
print accuracy_score(outcomes, predictions) | |
#num_correct = 0. | |
#for x in range(len(outcomes)): | |
# if outcomes[x] == predictions[x]: | |
# num_correct = num_correct + 1 | |
# | |
#print (num_correct/len(predictions)) * 100 | |
#ages = [] | |
#classes = [] | |
#for datapoint in information: | |
# ages.append(datapoint[0]) | |
# classes.append(datapoint[1]) | |
# | |
#colors = [] | |
#for datapoint in outcomes: | |
# if datapoint == '0': | |
# colors.append('r') | |
# elif datapoint == '1': | |
# colors.append('g') | |
# | |
#plt.scatter(ages, classes, color=colors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Spyder Editor | |
hjkalskjdhasdk askjdhasldjads | |
This is a temporary script file. | |
""" | |
import numpy # ashdjkahdkjalsdh asdhkajsd asdhjkl | |
import random | |
#definitions = { | |
# 'building': 'a thing that has windows and doors', | |
# 'house': 'a building that people live in', | |
# 'dog': 'a furry animal with 4 legs that barks' | |
#} | |
#definitions['cat'] = 'a furry animal with 4 legs that meows' | |
#definitions['dog'] = 'a VERY VERY furry animal with 4 legs that barks' | |
# | |
##print definitions['dog'] | |
# | |
#my_dict = { | |
# 1: [1,2,3], # this sets the key 1 | |
# 2: [4,5,6] | |
#} | |
# | |
# | |
#students = { | |
# 'Ben': [90, 100, 87], | |
# 'Sarah': [90, 85, 68, 100], | |
# 'Pete': [70, 45, 90] | |
#} | |
my_list = [0,1,3,6,9, 20] | |
my_list.append('asdasd') | |
print my_list | |
#for key in students.keys(): | |
# print key | |
# average = numpy.mean(students[key]) | |
# goodgrade = average > 85 | |
# print goodgrade | |
# if (goodgrade or key[0] == "P"): | |
# print "Good job!" | |
# else: | |
# print "Could use some work" | |
# print '**********' | |
# print goodgrade | |
# print max(['abc', 'cde', 'Zzz', 'xyz']) | |
#myval = True | |
#counter = 0 | |
#while (myval == True and counter < 5): | |
# #do this stuff | |
# counter += 1 | |
# r = random.random() | |
# print r | |
# if r > .9: | |
# myval = False | |
# if myval is STILL false: | |
# jump to line 51 and run again | |
# else: | |
# keep going |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment