Skip to content

Instantly share code, notes, and snippets.

@vaibhav-jain
Created July 30, 2017 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vaibhav-jain/ed0d10082a6ab3be136958be94104d7e to your computer and use it in GitHub Desktop.
Save vaibhav-jain/ed0d10082a6ab3be136958be94104d7e to your computer and use it in GitHub Desktop.
Using TFLearn and TensorFlow to estimate the surviving chance of Titanic passengers.
#!/usr/bin/env python
import os
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tflearn
from tflearn.data_utils import load_csv
from tflearn.datasets import titanic
"""
VARIABLE DESCRIPTIONS:
survived Survived
(0 = No; 1 = Yes)
pclass Passenger Class
(1 = st; 2 = nd; 3 = rd)
name Name
sex Sex
age Age
sibsp Number of Siblings/Spouses Aboard
parch Number of Parents/Children Aboard
ticket Ticket Number
fare Passenger Fare
"""
# Download the Titanic dataset
titanic.download_dataset('titanic_dataset.csv')
# Load CSV file, indicate that the
# first column represents labels
data, labels = load_csv(
'titanic_dataset.csv',
target_column=0,
categorical_labels=True,
n_classes=2
)
# Preprocessing function
def preprocess(data, columns_to_ignore):
# Sort by descending id and delete columns
for id in sorted(columns_to_ignore, reverse=True):
[r.pop(id) for r in data]
for i in range(len(data)):
# Converting 'sex' field to float (id is 1 after removing labels column)
data[i][1] = 1. if data[i][1] == 'female' else 0.
return np.array(data, dtype=np.float32)
# Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
to_ignore = [1, 6]
# Preprocess data
data = preprocess(data, to_ignore)
# Build neural network
net = tflearn.input_data(shape=[None, 6])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)
# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(data, labels, n_epoch=10, batch_size=16, show_metric=True)
# Let's create some data for DiCaprio and Winslet
dicaprio = [3, 'Jack Dawson', 'male', 19, 0, 0, 'N/A', 5.0000]
winslet = [1, 'Rose DeWitt Bukater', 'female', 17, 1, 2, 'N/A', 100.0000]
# Preprocess data
dicaprio, winslet = preprocess([dicaprio, winslet], to_ignore)
# Predict surviving chances (class 1 results)
pred = model.predict([dicaprio, winslet])
print("DiCaprio Surviving Rate:", pred[0][1])
print("Winslet Surviving Rate:", pred[1][1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment