Skip to content

Instantly share code, notes, and snippets.

@rouli
Last active August 29, 2015 13:57
Show Gist options
  • Save rouli/9407500 to your computer and use it in GitHub Desktop.
Save rouli/9407500 to your computer and use it in GitHub Desktop.
My brother's python homework
import csv
titanic = open('titanic.txt', "rb")
reader = csv.reader(titanic)
prefix_list = ["Mr ", "Mrs", "Mis"] # used to determine if passanger's name includes a prefix. Using 3 chars because of Mr.
# There are several demographic details we can count passengers and survivors by, this is a dictionary to map them out along with col number in the file.
details = {"embarked":[5, "Southampton", "Cherbourg", "Queenstown", ""],
"sex":[10, "male","female"], "pclass":[1,"1st","2nd","3rd"],
"prefix":[11,"Mr ", "Mrs", "Miss", "Unknown"]} # first item is col number (starts at 0), other items are the possible values
# Here we'll do 2 things:
# I - Add another column for prefix, and -
# II - Create processed_list with each of the rows in reader, since we can only run over reader once,
# and since I don't know much about handling CSVs or generator yet we'll run on the processed_list instead
processed_list = []
for rownum, row in enumerate(reader):
# Finding the header:
if rownum == 0:
header = row
header.append("Prefix")
else:
prefix_location = row[3].find(",") + 2 # finds the position of the comma, the prefix starts after the comma and after a space (+2)
prefix = row[3][prefix_location:prefix_location+3] # grabs the 3 first characters of the prefix
if prefix in prefix_list: # if there's a prefix in the passanger's name, it's appended to the row
if prefix == "Mis":
row.append("Miss") # Mis is corrected to Miss on appending, since we must work with 3 chars
else:
row.append(prefix)
else:
row.append("Unknown") # for cases where there's no prefix in the passanger's name
processed_list.append(row)
# grab_values() will run on all rows and count the number of passengers in each demographic and the number of survivors
def grab_values(col_num,i):
# print col_num, "item name", i
num_on_board = 0
num_survived = 0
for row in processed_list:
if row[col_num] == i:
num_on_board += 1
if row[2] == "1":
num_survived += 1
return num_on_board, num_survived
# get_avg() finds the column number and possible values of demographic x.
def get_avg(x): # x is the category (sex, embarked...)
col_num = details[x][0]
for i in details[x][1:]:
print "Looking for: ", i, "at col num: ", col_num
num_on_board,num_survived = grab_values(col_num,i)
try:
proportion_survived = float(num_survived)/num_on_board
except ZeroDivisionError:
proportion_survived = "Cannot be calculated. I have no record of passengers in this group."
print "Number of %s passengers on board: " %i , num_on_board, "\n" \
"Number of %s passengers survived: " %i, num_survived, "\n" \
"Proportion of %s passengers survived: " %i, "%.2f%%" % (proportion_survived * 100), "\n"
print "Hello! I can calculate the proportion of passengers that survived according to these parameters: \n \
Embarked \n Sex \n Pclass \n Prefix", "\n"
def get_choice():
possible_choices = ["embarked","sex","pclass","prefix"]
choice = raw_input("Please enter your choice: ").lower()
if choice not in possible_choices:
print "Sorry, I can only work with Embarked / Sex / Pclass / Prefix. Please try again. \n I'm not case-sensitive btw, so don't worry about capitalization. I'm cool that way."
return get_choice() # note to self - in case you wonder why return is there, read: http://stackoverflow.com/questions/22113457/where-to-place-return-in-a-function-which-uses-raw-input-and-validates-the-input |||| http://stackoverflow.com/a/18866201/2594546
return choice
user_choice = get_choice()
get_avg(user_choice)
#
# Finding the average age for those survived / perished
# Yup, forgot to include this part originally....
#
def avg_age(y):
num_passengers = 0
tot_age = 0
num_unknown_age_passengers = 0
for row in processed_list[1:]: # Remember the first row has headers
if row[2] == y:
if row[4] == "NA":
num_unknown_age_passengers += 1
elif type(eval(row[4])) == int or type(eval(row[4])) == float: # remember it's read like a string
num_passengers += 1
tot_age += eval(row[4])
else:
pass
else:
pass
return num_passengers, tot_age, num_unknown_age_passengers
print "And now, to the average age of passengers that survived (and those that didn't quite..)" "\n"
for i in ["0","1"]:
num_passengers, tot_age, num_unknown_age_passengers = avg_age(i)
average_age = "%.2f" % (float(tot_age)/num_passengers)
print "Survived=", i, "\n" \
"Number of passengers:",num_passengers, "\n" \
"Average age:", average_age, "\n" \
,"Number of passengers with age unknown:", num_unknown_age_passengers, "\n"
titanic.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment