Skip to content

Instantly share code, notes, and snippets.

# Transfom the class probabilities to class assignments.
survived_prediction[survived_prediction > 0.5] = 1
survived_prediction[survived_prediction < 1] = 0
survived_prediction = survived_prediction.astype(np.int32)
print('PassengerId,Survived')
for i, predicted_class in enumerate(survived_prediction):
print('%s,%s' % (i + 892, predicted_class[0]))
# Store the predictions in a dictionary.
pred_dict = {}
pred_dict['survived_prediction'] = survived_prediction
# If no path is specified, print the predictions to STDOUT.
if savepath is None:
for k, v in pred_dict.items():
print(k, v)
else: # Dump the predictions at the specified location.
pickle.dump(pred_dict, open(savepath, 'wb'))
def load_source_file_1(source_file_1):
"""Load source data from `source_file_1`.
Parameters
----------
source_file_1 : str
The file path of `source_file_1`. `source_file_1` contains the sources
``sibsp_and_parch`,`gender`,`age_and_fare`,`embarked`,`pclass``.
Returns
def encode_categorical(train_data, test_data, feature_name):
# Get the unique elements from the training set.
unique_elements = train_data[feature_name].dropna().unique()
for data in [train_data, test_data]:
element_indices = []
for unique_element in unique_elements:
# Collect all row indices the element occurs in the data.
element_indices += [data[feature_name].index[
data[feature_name].apply(lambda x: x == unique_element)]]
def fill_categorical_nan(train_data, test_data, feature_name):
"""Fills the NaN values by randomly choosing a known category."""
# Get the unique elements from the training set.
unique_elements = train_data[feature_name].dropna().unique()
# Fill the nan values.
for data in [train_data, test_data]:
nan_indices = data[feature_name].index[
data[feature_name].apply(pd.isnull)]
fill_values = rng.choice(unique_elements, size=len(nan_indices))
def fill_numerical_nan(train_data, test_data, feature_name):
"""Fills the NaN values by the mean."""
# Compute the mean from the training set.
fill_value = train_data[feature_name].mean()
# Fill the nan values.
for data in [train_data, test_data]:
data[feature_name].fillna(fill_value, inplace=True)
# Fill the nan values for 'Age' and 'Fare'.
from __future__ import print_function
from builtins import range
import pandas as pd
def inspect_data(data):
"""Shows an overview of the data set."""
# Check how many examples the data has.
num_examples = data.shape[0]
def main(source_file_1, learning_rate, checkpoint_file, mbsize, num_epochs):
"""Entrypoint for training the model.
This function loads the training data,
builds the computation graph of the model,
the computation graph of the loss to be optimized,
creates the optimizer and trains the model.
Parameters
----------
import numpy as np
import tensorflow as tf
# Build the model architecture.
input1 = tf.placeholder(tf.float32, [None, 5], name='input1')
input2 = tf.placeholder(tf.float32, [None, 10], name='input2')
conc = tf.concat([input1, input2], axis=-1)
weights = tf.get_variable(
'weights',
import numpy as np
import theano as th
import theano.tensor as T
# Build the model architecture.
input1 = T.fmatrix('input1')
input2 = T.fmatrix('input2')
conc = T.concatenate([input1, input2], axis=-1)
weights = th.shared(