benjamincohen1/data science

## data science
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 21 21:45:14 2016

@author: ben
"""
import csv
import matplotlib.pyplot as plt

my_file = csv.DictReader(open('train.csv', 'rw'))


#http://scikit-learn.org/

def clean_data(datapoint, average):
    ###fill in missingage and convert to floats
    age = datapoint[0]
    pclass = datapoint[1]

    cleaned_up = []
    if age == "":
        age = average
    else:
        age = float(age)
    pclass = int(pclass)
    cleaned_up.append(age)
    cleaned_up.append(pclass)

    return cleaned_up

#def predict(datapoint):
#    age = datapoint[0]
#    pclass = datapoint[1]
#    if age >= 60:
#        return '0'
#    if pclass == 1:
#        return '1'
#    elif pclass == 3:
#        return '0'
#    elif pclass == 2:
#        if age <= 15:
#            return '1'
#        else:
#            return '0'


information = []
outcomes = []
predictions = []
for line in my_file:
    datapoint = [line['Age'], line['Pclass']]
    information.append(clean_data(datapoint, 29))
#    pred = predict(clean_data(datapoint, 29))
#    predictions.append(pred)
    outcomes.append(int(line['Survived']))


from sklearn import linear_model

regr = linear_model.LinearRegression()

regr.fit(information, outcomes)

def predict(datapoint):
    pred = regr.predict([datapoint])
    if pred > .5:
        return 1
    else:
        return 0

my_file = csv.DictReader(open('train.csv', 'rw'))

information = []
outcomes = []
predictions = []
for line in my_file:
    datapoint = [line['Age'], line['Pclass']]
    pred = predict(clean_data(datapoint, 29))

    predictions.append(pred)
    outcomes.append(int(line['Survived']))

from sklearn.metrics import accuracy_score
print accuracy_score(outcomes, predictions)
#num_correct = 0.
#for x in range(len(outcomes)):
#    if outcomes[x] == predictions[x]:
#        num_correct = num_correct + 1
#
#print (num_correct/len(predictions)) * 100
#ages = []
#classes = []
#for datapoint in information:
#    ages.append(datapoint[0])
#    classes.append(datapoint[1])
#
#colors = []
#for datapoint in outcomes:
#    if datapoint == '0':
#        colors.append('r')
#    elif datapoint == '1':
#        colors.append('g')
#
#plt.scatter(ages, classes, color=colors)

## python
# -*- coding: utf-8 -*-
"""
Spyder Editor
hjkalskjdhasdk askjdhasldjads
This is a temporary script file.
"""
import numpy  # ashdjkahdkjalsdh asdhkajsd asdhjkl
import random
#definitions = {
#    'building': 'a thing that has windows and doors',
#    'house': 'a building that people live in',
#    'dog': 'a furry animal with 4 legs that barks'
#}


#definitions['cat'] = 'a furry animal with 4 legs that meows'
#definitions['dog'] = 'a VERY VERY furry animal with 4 legs that barks'
#
##print definitions['dog']
#
#my_dict = {
#    1: [1,2,3], # this sets the key 1
#    2: [4,5,6]
#}
#
#
#students = {
#    'Ben': [90, 100, 87],
#    'Sarah': [90, 85, 68, 100],
#    'Pete': [70, 45, 90]
#}
my_list = [0,1,3,6,9, 20]
my_list.append('asdasd')

print my_list
#for key in students.keys():
#    print key
#    average =  numpy.mean(students[key])
#    goodgrade = average > 85
#    print goodgrade
#    if (goodgrade or key[0] == "P"):
#        print "Good job!"
#    else:
#        print "Could use some work"
#    print '**********'

# print goodgrade
# print max(['abc', 'cde', 'Zzz', 'xyz'])


#myval = True
#counter = 0
#while (myval == True and counter < 5):
#    #do this stuff
#    counter += 1
#    r = random.random()
#    print r
#    if r > .9:
#        myval = False

#    if myval is STILL false:
#        jump to line 51 and run again
#    else:
#        keep going
	# -- coding: utf-8 --
	"""
	Created on Thu Apr 21 21:45:14 2016

	@author: ben
	"""
	import csv
	import matplotlib.pyplot as plt

	my_file = csv.DictReader(open('train.csv', 'rw'))


	#http://scikit-learn.org/

	def clean_data(datapoint, average):
	###fill in missingage and convert to floats
	age = datapoint[0]
	pclass = datapoint[1]

	cleaned_up = []
	if age == "":
	age = average
	else:
	age = float(age)
	pclass = int(pclass)
	cleaned_up.append(age)
	cleaned_up.append(pclass)

	return cleaned_up

	#def predict(datapoint):
	# age = datapoint[0]
	# pclass = datapoint[1]
	# if age >= 60:
	# return '0'
	# if pclass == 1:
	# return '1'
	# elif pclass == 3:
	# return '0'
	# elif pclass == 2:
	# if age <= 15:
	# return '1'
	# else:
	# return '0'



	information = []
	outcomes = []
	predictions = []
	for line in my_file:
	datapoint = [line['Age'], line['Pclass']]
	information.append(clean_data(datapoint, 29))
	# pred = predict(clean_data(datapoint, 29))
	# predictions.append(pred)
	outcomes.append(int(line['Survived']))


	from sklearn import linear_model

	regr = linear_model.LinearRegression()

	regr.fit(information, outcomes)

	def predict(datapoint):
	pred = regr.predict([datapoint])
	if pred > .5:
	return 1
	else:
	return 0

	my_file = csv.DictReader(open('train.csv', 'rw'))

	information = []
	outcomes = []
	predictions = []
	for line in my_file:
	datapoint = [line['Age'], line['Pclass']]
	pred = predict(clean_data(datapoint, 29))

	predictions.append(pred)
	outcomes.append(int(line['Survived']))

	from sklearn.metrics import accuracy_score
	print accuracy_score(outcomes, predictions)
	#num_correct = 0.
	#for x in range(len(outcomes)):
	# if outcomes[x] == predictions[x]:
	# num_correct = num_correct + 1
	#
	#print (num_correct/len(predictions)) * 100
	#ages = []
	#classes = []
	#for datapoint in information:
	# ages.append(datapoint[0])
	# classes.append(datapoint[1])
	#
	#colors = []
	#for datapoint in outcomes:
	# if datapoint == '0':
	# colors.append('r')
	# elif datapoint == '1':
	# colors.append('g')
	#
	#plt.scatter(ages, classes, color=colors)
	# -- coding: utf-8 --
	"""
	Spyder Editor
	hjkalskjdhasdk askjdhasldjads
	This is a temporary script file.
	"""
	import numpy # ashdjkahdkjalsdh asdhkajsd asdhjkl
	import random
	#definitions = {
	# 'building': 'a thing that has windows and doors',
	# 'house': 'a building that people live in',
	# 'dog': 'a furry animal with 4 legs that barks'
	#}


	#definitions['cat'] = 'a furry animal with 4 legs that meows'
	#definitions['dog'] = 'a VERY VERY furry animal with 4 legs that barks'
	#
	##print definitions['dog']
	#
	#my_dict = {
	# 1: [1,2,3], # this sets the key 1
	# 2: [4,5,6]
	#}
	#
	#
	#students = {
	# 'Ben': [90, 100, 87],
	# 'Sarah': [90, 85, 68, 100],
	# 'Pete': [70, 45, 90]
	#}
	my_list = [0,1,3,6,9, 20]
	my_list.append('asdasd')

	print my_list
	#for key in students.keys():
	# print key
	# average = numpy.mean(students[key])
	# goodgrade = average > 85
	# print goodgrade
	# if (goodgrade or key[0] == "P"):
	# print "Good job!"
	# else:
	# print "Could use some work"
	# print '**********'

	# print goodgrade
	# print max(['abc', 'cde', 'Zzz', 'xyz'])


	#myval = True
	#counter = 0
	#while (myval == True and counter < 5):
	# #do this stuff
	# counter += 1
	# r = random.random()
	# print r
	# if r > .9:
	# myval = False

	# if myval is STILL false:
	# jump to line 51 and run again
	# else:
	# keep going