Skip to content

Instantly share code, notes, and snippets.

@benjamincohen1
Created April 22, 2016 03:03
Show Gist options
  • Save benjamincohen1/3c29fdafbaddae1cf86081d3500f814d to your computer and use it in GitHub Desktop.
Save benjamincohen1/3c29fdafbaddae1cf86081d3500f814d to your computer and use it in GitHub Desktop.
week 2
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 21 21:45:14 2016
@author: ben
"""
import csv
import matplotlib.pyplot as plt
my_file = csv.DictReader(open('train.csv', 'rw'))
#http://scikit-learn.org/
def clean_data(datapoint, average):
###fill in missingage and convert to floats
age = datapoint[0]
pclass = datapoint[1]
cleaned_up = []
if age == "":
age = average
else:
age = float(age)
pclass = int(pclass)
cleaned_up.append(age)
cleaned_up.append(pclass)
return cleaned_up
#def predict(datapoint):
# age = datapoint[0]
# pclass = datapoint[1]
# if age >= 60:
# return '0'
# if pclass == 1:
# return '1'
# elif pclass == 3:
# return '0'
# elif pclass == 2:
# if age <= 15:
# return '1'
# else:
# return '0'
information = []
outcomes = []
predictions = []
for line in my_file:
datapoint = [line['Age'], line['Pclass']]
information.append(clean_data(datapoint, 29))
# pred = predict(clean_data(datapoint, 29))
# predictions.append(pred)
outcomes.append(int(line['Survived']))
from sklearn import linear_model
regr = linear_model.LinearRegression()
regr.fit(information, outcomes)
def predict(datapoint):
pred = regr.predict([datapoint])
if pred > .5:
return 1
else:
return 0
my_file = csv.DictReader(open('train.csv', 'rw'))
information = []
outcomes = []
predictions = []
for line in my_file:
datapoint = [line['Age'], line['Pclass']]
pred = predict(clean_data(datapoint, 29))
predictions.append(pred)
outcomes.append(int(line['Survived']))
from sklearn.metrics import accuracy_score
print accuracy_score(outcomes, predictions)
#num_correct = 0.
#for x in range(len(outcomes)):
# if outcomes[x] == predictions[x]:
# num_correct = num_correct + 1
#
#print (num_correct/len(predictions)) * 100
#ages = []
#classes = []
#for datapoint in information:
# ages.append(datapoint[0])
# classes.append(datapoint[1])
#
#colors = []
#for datapoint in outcomes:
# if datapoint == '0':
# colors.append('r')
# elif datapoint == '1':
# colors.append('g')
#
#plt.scatter(ages, classes, color=colors)
# -*- coding: utf-8 -*-
"""
Spyder Editor
hjkalskjdhasdk askjdhasldjads
This is a temporary script file.
"""
import numpy # ashdjkahdkjalsdh asdhkajsd asdhjkl
import random
#definitions = {
# 'building': 'a thing that has windows and doors',
# 'house': 'a building that people live in',
# 'dog': 'a furry animal with 4 legs that barks'
#}
#definitions['cat'] = 'a furry animal with 4 legs that meows'
#definitions['dog'] = 'a VERY VERY furry animal with 4 legs that barks'
#
##print definitions['dog']
#
#my_dict = {
# 1: [1,2,3], # this sets the key 1
# 2: [4,5,6]
#}
#
#
#students = {
# 'Ben': [90, 100, 87],
# 'Sarah': [90, 85, 68, 100],
# 'Pete': [70, 45, 90]
#}
my_list = [0,1,3,6,9, 20]
my_list.append('asdasd')
print my_list
#for key in students.keys():
# print key
# average = numpy.mean(students[key])
# goodgrade = average > 85
# print goodgrade
# if (goodgrade or key[0] == "P"):
# print "Good job!"
# else:
# print "Could use some work"
# print '**********'
# print goodgrade
# print max(['abc', 'cde', 'Zzz', 'xyz'])
#myval = True
#counter = 0
#while (myval == True and counter < 5):
# #do this stuff
# counter += 1
# r = random.random()
# print r
# if r > .9:
# myval = False
# if myval is STILL false:
# jump to line 51 and run again
# else:
# keep going
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment