rouli/gist:9407500

## gistfile1.py
import csv

titanic  = open('titanic.txt', "rb")
reader = csv.reader(titanic)


prefix_list = ["Mr ", "Mrs", "Mis"]     # used to determine if passanger's name includes a prefix. Using 3 chars because of Mr.


# There are several demographic details we can count passengers and survivors by, this is a dictionary to map them out along with col number in the file.
details = {"embarked":[5, "Southampton", "Cherbourg", "Queenstown", ""],
            "sex":[10, "male","female"], "pclass":[1,"1st","2nd","3rd"],
           "prefix":[11,"Mr ", "Mrs", "Miss", "Unknown"]}        # first item is col number (starts at 0), other items are the possible values


# Here we'll do 2 things:
# I - Add another column for prefix, and -
# II - Create processed_list with each of the rows in reader, since we can only run over reader once,
# and since I don't know much about handling CSVs or generator yet we'll run on the processed_list instead

processed_list = []
for rownum, row in enumerate(reader):
    # Finding the header:
    if rownum == 0:
        header = row
        header.append("Prefix")
    else:
        prefix_location = row[3].find(",") + 2              # finds the position of the comma, the prefix starts after the comma and after a space (+2)
        prefix = row[3][prefix_location:prefix_location+3]  # grabs the 3 first characters of the prefix

        if prefix in prefix_list:                           # if there's a prefix in the passanger's name, it's appended to the row
            if prefix == "Mis":
                row.append("Miss")                          # Mis is corrected to Miss on appending, since we must work with 3 chars
            else:
                row.append(prefix)
        else:
            row.append("Unknown")                           # for cases where there's no prefix in the passanger's name

    processed_list.append(row)

# grab_values() will run on all rows and count the number of passengers in each demographic and the number of survivors
def grab_values(col_num,i):
#    print col_num, "item name", i
    num_on_board = 0
    num_survived = 0
    for row in processed_list:
        if row[col_num] == i:
            num_on_board += 1
            if row[2] == "1":
                num_survived += 1
    return num_on_board, num_survived


# get_avg() finds the column number and possible values of demographic x.

def get_avg(x):             # x is the category (sex, embarked...)
    col_num = details[x][0]
    for i in details[x][1:]:
        print "Looking for: ", i, "at col num: ", col_num

        num_on_board,num_survived = grab_values(col_num,i)

        try:
            proportion_survived = float(num_survived)/num_on_board
        except ZeroDivisionError:
            proportion_survived = "Cannot be calculated. I have no record of passengers in this group."


        print "Number of %s passengers on board: " %i ,  num_on_board, "\n" \
              "Number of %s passengers survived: " %i, num_survived, "\n" \
              "Proportion of %s passengers survived: " %i,  "%.2f%%" % (proportion_survived * 100), "\n"


print "Hello! I can calculate the proportion of passengers that survived according to these parameters: \n \
Embarked \n Sex \n Pclass \n Prefix", "\n"

def get_choice():
    possible_choices = ["embarked","sex","pclass","prefix"]
    choice = raw_input("Please enter your choice: ").lower()
    if choice not in possible_choices:
        print "Sorry, I can only work with Embarked / Sex / Pclass / Prefix. Please try again. \n I'm not case-sensitive btw, so don't worry about capitalization. I'm cool that way."
        return get_choice()                                 # note to self - in case you wonder why return is there, read: http://stackoverflow.com/questions/22113457/where-to-place-return-in-a-function-which-uses-raw-input-and-validates-the-input  ||||  http://stackoverflow.com/a/18866201/2594546

    return choice

user_choice = get_choice()

get_avg(user_choice)


#
# Finding the average age for those survived / perished
# Yup, forgot to include this part originally....
#


def avg_age(y):
    num_passengers = 0
    tot_age = 0
    num_unknown_age_passengers = 0
    for row in processed_list[1:]:         # Remember the first row has headers
        if row[2] == y:
            if row[4] == "NA":
                num_unknown_age_passengers += 1
            elif type(eval(row[4])) == int or type(eval(row[4])) == float:      # remember it's read like a string
                num_passengers += 1
                tot_age += eval(row[4])
            else:
                pass
        else:
            pass

    return num_passengers, tot_age, num_unknown_age_passengers

print "And now, to the average age of passengers that survived (and those that didn't quite..)" "\n"

for i in ["0","1"]:
    num_passengers, tot_age, num_unknown_age_passengers = avg_age(i)
    average_age = "%.2f" % (float(tot_age)/num_passengers)

    print "Survived=", i, "\n" \
          "Number of passengers:",num_passengers, "\n" \
          "Average age:", average_age, "\n" \
          ,"Number of passengers with age unknown:", num_unknown_age_passengers, "\n"


titanic.close()
	import csv

	titanic = open('titanic.txt', "rb")
	reader = csv.reader(titanic)


	prefix_list = ["Mr ", "Mrs", "Mis"] # used to determine if passanger's name includes a prefix. Using 3 chars because of Mr.


	# There are several demographic details we can count passengers and survivors by, this is a dictionary to map them out along with col number in the file.
	details = {"embarked":[5, "Southampton", "Cherbourg", "Queenstown", ""],
	"sex":[10, "male","female"], "pclass":[1,"1st","2nd","3rd"],
	"prefix":[11,"Mr ", "Mrs", "Miss", "Unknown"]} # first item is col number (starts at 0), other items are the possible values


	# Here we'll do 2 things:
	# I - Add another column for prefix, and -
	# II - Create processed_list with each of the rows in reader, since we can only run over reader once,
	# and since I don't know much about handling CSVs or generator yet we'll run on the processed_list instead

	processed_list = []
	for rownum, row in enumerate(reader):
	# Finding the header:
	if rownum == 0:
	header = row
	header.append("Prefix")
	else:
	prefix_location = row[3].find(",") + 2 # finds the position of the comma, the prefix starts after the comma and after a space (+2)
	prefix = row[3][prefix_location:prefix_location+3] # grabs the 3 first characters of the prefix

	if prefix in prefix_list: # if there's a prefix in the passanger's name, it's appended to the row
	if prefix == "Mis":
	row.append("Miss") # Mis is corrected to Miss on appending, since we must work with 3 chars
	else:
	row.append(prefix)
	else:
	row.append("Unknown") # for cases where there's no prefix in the passanger's name

	processed_list.append(row)

	# grab_values() will run on all rows and count the number of passengers in each demographic and the number of survivors
	def grab_values(col_num,i):
	# print col_num, "item name", i
	num_on_board = 0
	num_survived = 0
	for row in processed_list:
	if row[col_num] == i:
	num_on_board += 1
	if row[2] == "1":
	num_survived += 1
	return num_on_board, num_survived



	# get_avg() finds the column number and possible values of demographic x.

	def get_avg(x): # x is the category (sex, embarked...)
	col_num = details[x][0]
	for i in details[x][1:]:
	print "Looking for: ", i, "at col num: ", col_num

	num_on_board,num_survived = grab_values(col_num,i)

	try:
	proportion_survived = float(num_survived)/num_on_board
	except ZeroDivisionError:
	proportion_survived = "Cannot be calculated. I have no record of passengers in this group."


	print "Number of %s passengers on board: " %i , num_on_board, "\n" \
	"Number of %s passengers survived: " %i, num_survived, "\n" \
	"Proportion of %s passengers survived: " %i, "%.2f%%" % (proportion_survived * 100), "\n"



	print "Hello! I can calculate the proportion of passengers that survived according to these parameters: \n \
	Embarked \n Sex \n Pclass \n Prefix", "\n"

	def get_choice():
	possible_choices = ["embarked","sex","pclass","prefix"]
	choice = raw_input("Please enter your choice: ").lower()
	if choice not in possible_choices:
	print "Sorry, I can only work with Embarked / Sex / Pclass / Prefix. Please try again. \n I'm not case-sensitive btw, so don't worry about capitalization. I'm cool that way."
	return get_choice() # note to self - in case you wonder why return is there, read: http://stackoverflow.com/questions/22113457/where-to-place-return-in-a-function-which-uses-raw-input-and-validates-the-input \|\|\|\| http://stackoverflow.com/a/18866201/2594546

	return choice

	user_choice = get_choice()

	get_avg(user_choice)




	#
	# Finding the average age for those survived / perished
	# Yup, forgot to include this part originally....
	#


	def avg_age(y):
	num_passengers = 0
	tot_age = 0
	num_unknown_age_passengers = 0
	for row in processed_list[1:]: # Remember the first row has headers
	if row[2] == y:
	if row[4] == "NA":
	num_unknown_age_passengers += 1
	elif type(eval(row[4])) == int or type(eval(row[4])) == float: # remember it's read like a string
	num_passengers += 1
	tot_age += eval(row[4])
	else:
	pass
	else:
	pass

	return num_passengers, tot_age, num_unknown_age_passengers

	print "And now, to the average age of passengers that survived (and those that didn't quite..)" "\n"

	for i in ["0","1"]:
	num_passengers, tot_age, num_unknown_age_passengers = avg_age(i)
	average_age = "%.2f" % (float(tot_age)/num_passengers)

	print "Survived=", i, "\n" \
	"Number of passengers:",num_passengers, "\n" \
	"Average age:", average_age, "\n" \
	,"Number of passengers with age unknown:", num_unknown_age_passengers, "\n"


	titanic.close()