Last active
August 29, 2015 13:57
-
-
Save rouli/9407500 to your computer and use it in GitHub Desktop.
My brother's python homework
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
titanic = open('titanic.txt', "rb") | |
reader = csv.reader(titanic) | |
prefix_list = ["Mr ", "Mrs", "Mis"] # used to determine if passanger's name includes a prefix. Using 3 chars because of Mr. | |
# There are several demographic details we can count passengers and survivors by, this is a dictionary to map them out along with col number in the file. | |
details = {"embarked":[5, "Southampton", "Cherbourg", "Queenstown", ""], | |
"sex":[10, "male","female"], "pclass":[1,"1st","2nd","3rd"], | |
"prefix":[11,"Mr ", "Mrs", "Miss", "Unknown"]} # first item is col number (starts at 0), other items are the possible values | |
# Here we'll do 2 things: | |
# I - Add another column for prefix, and - | |
# II - Create processed_list with each of the rows in reader, since we can only run over reader once, | |
# and since I don't know much about handling CSVs or generator yet we'll run on the processed_list instead | |
processed_list = [] | |
for rownum, row in enumerate(reader): | |
# Finding the header: | |
if rownum == 0: | |
header = row | |
header.append("Prefix") | |
else: | |
prefix_location = row[3].find(",") + 2 # finds the position of the comma, the prefix starts after the comma and after a space (+2) | |
prefix = row[3][prefix_location:prefix_location+3] # grabs the 3 first characters of the prefix | |
if prefix in prefix_list: # if there's a prefix in the passanger's name, it's appended to the row | |
if prefix == "Mis": | |
row.append("Miss") # Mis is corrected to Miss on appending, since we must work with 3 chars | |
else: | |
row.append(prefix) | |
else: | |
row.append("Unknown") # for cases where there's no prefix in the passanger's name | |
processed_list.append(row) | |
# grab_values() will run on all rows and count the number of passengers in each demographic and the number of survivors | |
def grab_values(col_num,i): | |
# print col_num, "item name", i | |
num_on_board = 0 | |
num_survived = 0 | |
for row in processed_list: | |
if row[col_num] == i: | |
num_on_board += 1 | |
if row[2] == "1": | |
num_survived += 1 | |
return num_on_board, num_survived | |
# get_avg() finds the column number and possible values of demographic x. | |
def get_avg(x): # x is the category (sex, embarked...) | |
col_num = details[x][0] | |
for i in details[x][1:]: | |
print "Looking for: ", i, "at col num: ", col_num | |
num_on_board,num_survived = grab_values(col_num,i) | |
try: | |
proportion_survived = float(num_survived)/num_on_board | |
except ZeroDivisionError: | |
proportion_survived = "Cannot be calculated. I have no record of passengers in this group." | |
print "Number of %s passengers on board: " %i , num_on_board, "\n" \ | |
"Number of %s passengers survived: " %i, num_survived, "\n" \ | |
"Proportion of %s passengers survived: " %i, "%.2f%%" % (proportion_survived * 100), "\n" | |
print "Hello! I can calculate the proportion of passengers that survived according to these parameters: \n \ | |
Embarked \n Sex \n Pclass \n Prefix", "\n" | |
def get_choice(): | |
possible_choices = ["embarked","sex","pclass","prefix"] | |
choice = raw_input("Please enter your choice: ").lower() | |
if choice not in possible_choices: | |
print "Sorry, I can only work with Embarked / Sex / Pclass / Prefix. Please try again. \n I'm not case-sensitive btw, so don't worry about capitalization. I'm cool that way." | |
return get_choice() # note to self - in case you wonder why return is there, read: http://stackoverflow.com/questions/22113457/where-to-place-return-in-a-function-which-uses-raw-input-and-validates-the-input |||| http://stackoverflow.com/a/18866201/2594546 | |
return choice | |
user_choice = get_choice() | |
get_avg(user_choice) | |
# | |
# Finding the average age for those survived / perished | |
# Yup, forgot to include this part originally.... | |
# | |
def avg_age(y): | |
num_passengers = 0 | |
tot_age = 0 | |
num_unknown_age_passengers = 0 | |
for row in processed_list[1:]: # Remember the first row has headers | |
if row[2] == y: | |
if row[4] == "NA": | |
num_unknown_age_passengers += 1 | |
elif type(eval(row[4])) == int or type(eval(row[4])) == float: # remember it's read like a string | |
num_passengers += 1 | |
tot_age += eval(row[4]) | |
else: | |
pass | |
else: | |
pass | |
return num_passengers, tot_age, num_unknown_age_passengers | |
print "And now, to the average age of passengers that survived (and those that didn't quite..)" "\n" | |
for i in ["0","1"]: | |
num_passengers, tot_age, num_unknown_age_passengers = avg_age(i) | |
average_age = "%.2f" % (float(tot_age)/num_passengers) | |
print "Survived=", i, "\n" \ | |
"Number of passengers:",num_passengers, "\n" \ | |
"Average age:", average_age, "\n" \ | |
,"Number of passengers with age unknown:", num_unknown_age_passengers, "\n" | |
titanic.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment