This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fc = tf.feature_column | |
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', | |
'embark_town', 'alone'] | |
NUMERIC_COLUMNS = ['age', 'fare'] | |
def one_hot_cat_column(feature_name, vocab): | |
return tf.feature_column.indicator_column( | |
tf.feature_column.categorical_column_with_vocabulary_list(feature_name, | |
vocab)) | |
feature_columns = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
tf.enable_eager_execution() | |
# Load dataset. | |
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') | |
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') | |
y_train = dftrain.pop('survived') | |
y_eval = dfeval.pop('survived') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## TL;DR: | |
# Train model. | |
est = tf.estimator.BoostedTreesClassifier(feature_columns, n_batches_per_layer) | |
est.train(train_input_fn) | |
# Per instance model interpretability: | |
pred_dict = est.experimental_predict_with_explanations(pred_input_fn) | |
# Global gain-based feature importances: | |
importances = est.experimental_feature_importances() |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone | |
0 0 3 male 22.0 1 0 7.25 S Third man True Southampton no False | |
1 1 1 female 38.0 1 0 71.2833 C First woman False C Cherbourg yes False | |
2 1 3 female 26.0 0 0 7.925 S Third woman False Southampton yes True | |
3 1 1 female 35.0 1 0 53.1 S First woman False C Southampton yes False | |
4 0 3 male 35.0 0 0 8.05 S Third man True Southampton no True | |
5 0 3 male 0 0 8.4583 Q Third man True Queenstown no True | |
6 0 1 male 54.0 0 0 51.8625 S First man True E Southampton no True | |
7 0 3 male 2.0 3 1 21.075 S Third child False Southampton no False | |
8 1 3 female 27.0 0 2 11.1333 S Third woman False Southampton yes False |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _numeric_column_normalized(column_name, normalizer_fn): | |
return tf.feature_column.numeric_column(column_name, normalizer_fn=normalizer_fn) | |
def _make_zscaler(mean, std): | |
def zscaler(col): | |
return (col - mean)/std | |
return zscaler | |
# Define your feature columns | |
def create_feature_cols(features, use_normalization): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_normalization_parameters(traindf, features): | |
"""Get the normalization parameters (E.g., mean, std) for traindf for | |
features. We will use these parameters for training, eval, and serving.""" | |
def _z_score_params(column): | |
mean = traindf[column].mean() | |
std = traindf[column].std() | |
return {'mean': mean, 'std': std} | |
normalization_parameters = {} |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def random_null(col, keep=0.9): | |
import random | |
if keep < random.random(): | |
return None | |
return col | |
df.applymap(lambda x: random_null(x)) |