Created
September 9, 2016 03:09
-
-
Save benjamincohen1/17d63e1aa1e352d6f4b4534d9ae17931 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#basic guessing game | |
import csv | |
import random | |
import re | |
import numpy | |
import matplotlib.pyplot as plt | |
#num_to_guess = random.randint(0,100) | |
# | |
##print "Youre trying to guess {}".format(num_to_guess) | |
##3 options | |
##1 - guess = num_to_guess, user wins, guess < num_to_guess, print too little, guess > num_to_guess, print too high | |
#num_guesses = 0 | |
#guessed = False | |
#while guessed == False: | |
# num_guesses += 1 | |
# guess = int(raw_input('Enter your number: ')) | |
# | |
# if (guess == num_to_guess) or (5 < 7) or ____: | |
# print "You got it" | |
# print "It took you {num_guesses} guesses to guess the number {num_to_guess}.".format(num_guesses=num_guesses, num_to_guess=num_to_guess) | |
# guessed = True | |
# | |
# elif guess > num_to_guess * 2: | |
# print "You guessed WAY too high" | |
# | |
# elif guess > num_to_guess: | |
# print "You guessed too high" | |
# | |
# else: | |
# print "You guessed too low" | |
# | |
# | |
#print "You won!" | |
#elif guess <= num_to_guess: | |
# print "You guessed too low" | |
# | |
ages = [] | |
myfile = open('/Users/Ben/Desktop/train.csv') | |
for line in csv.DictReader(myfile): | |
if line['Age'] != '': | |
ages.append(float(line['Age'])) | |
average = numpy.mean(ages) | |
classes = [] | |
sexes = [] | |
survived = [] | |
myfile = open('/Users/Ben/Desktop/train.csv') | |
for line in csv.DictReader(myfile): | |
if line['Pclass'] != '' and line['Sex'] != '': | |
if line['Survived'] == '1': | |
classes.append(float(line['Pclass'])) | |
if line['Sex'] == 'male': | |
sexes.append(1) | |
else: | |
sexes.append(0) | |
survived.append('g') | |
else: | |
classes.append(float(line['Pclass'])) | |
if line['Sex'] == 'male': | |
sexes.append(1) | |
else: | |
sexes.append(0) | |
survived.append('r') | |
plt.scatter(classes, sexes, color=survived) | |
def predict(datapoint): | |
#predict takes a datapoint and predicts whether the person will survive | |
#first idea: > 18, no < 18, yes | |
if line['Age'] != '': | |
age = float(datapoint['Age']) | |
else: | |
age = average | |
if line['Sex'] != '': | |
if line['Sex'] == 'male': | |
male = True | |
else: | |
male = False | |
#2 vars, age, is the age, male is a bool if the passenger was male | |
# if line['Pclass'] == '1': | |
# return True | |
if age < 18: | |
if male == True: | |
return True | |
elif male == False: | |
return False | |
elif age > 65: | |
return False | |
else: | |
if male == True: | |
return False | |
else: | |
return True | |
train_datapoints = [] | |
train_results = [] | |
test_datapoints = [] | |
test_results = [] | |
myfile = open('/Users/Ben/Desktop/train.csv') | |
for line in csv.DictReader(myfile): | |
if line['Age'] != '': | |
age = float(line['Age']) | |
else: | |
age = average | |
if line['Sex'] != '': | |
if line['Sex'] == 'male': | |
sex = 1 | |
else: | |
sex = 0 | |
datapoint = [age, sex, int(line['Pclass']), int(line['SibSp'])] | |
if random.randint(1,10) > 8: | |
test_datapoints.append(datapoint) | |
if line['Survived']== '1': | |
test_results.append(1) | |
else: | |
test_results.append(0) | |
else: | |
train_datapoints.append(datapoint) | |
if line['Survived']== '1': | |
train_results.append(1) | |
else: | |
train_results.append(0) | |
print datapoint | |
print '-----' | |
print len(test_results), len(train_results) | |
from sklearn.linear_model import LinearRegression | |
classifier = LinearRegression() | |
#from sklearn.neighbors import KNeighborsClassifier | |
#classifier = KNeighborsClassifier(n_neighbors=4, weights='distance') | |
#from sklearn.tree import DecisionTreeClassifier | |
#classifier = DecisionTreeClassifier() | |
classifier.fit(train_datapoints, train_results) | |
predictions = classifier.predict(test_datapoints) | |
answers = [] | |
for x in range(len(predictions)): | |
pred = predictions[x] | |
correct = test_results[x] | |
if pred <= .1: | |
pred = 0 | |
else: | |
pred = 1 | |
answers.append(pred) | |
# if pred == correct: | |
# answers.append(True) | |
# else: | |
# answers.append(False) | |
print predictions | |
from sklearn.metrics import confusion_matrix | |
print confusion_matrix(answers, test_results) | |
#corrects = answers.count(True) | |
#wrongs = answers.count(False) | |
# | |
#print corrects/float(corrects + wrongs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment