Skip to content

Instantly share code, notes, and snippets.

View crawles's full-sized avatar

Chris Rawles crawles

View GitHub Profile
fc = tf.feature_column
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',
'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']
def one_hot_cat_column(feature_name, vocab):
return tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(feature_name,
vocab))
feature_columns = []
import numpy as np
import pandas as pd
import tensorflow as tf
tf.enable_eager_execution()
# Load dataset.
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')
@crawles
crawles / boosted_trees_tldr.py
Created February 28, 2019 08:38
TensorFlow Boosted Trees
## TL;DR:
# Train model.
est = tf.estimator.BoostedTreesClassifier(feature_columns, n_batches_per_layer)
est.train(train_input_fn)
# Per instance model interpretability:
pred_dict = est.experimental_predict_with_explanations(pred_input_fn)
# Global gain-based feature importances:
importances = est.experimental_feature_importances()
@crawles
crawles / titanic.csv
Created October 17, 2018 20:41
sns.load_dataset('titanic')
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
0 0 3 male 22.0 1 0 7.25 S Third man True Southampton no False
1 1 1 female 38.0 1 0 71.2833 C First woman False C Cherbourg yes False
2 1 3 female 26.0 0 0 7.925 S Third woman False Southampton yes True
3 1 1 female 35.0 1 0 53.1 S First woman False C Southampton yes False
4 0 3 male 35.0 0 0 8.05 S Third man True Southampton no True
5 0 3 male 0 0 8.4583 Q Third man True Queenstown no True
6 0 1 male 54.0 0 0 51.8625 S First man True E Southampton no True
7 0 3 male 2.0 3 1 21.075 S Third child False Southampton no False
8 1 3 female 27.0 0 2 11.1333 S Third woman False Southampton yes False
@crawles
crawles / local-explanations.ipynb
Created September 20, 2018 19:43
Local Explanations.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
def _numeric_column_normalized(column_name, normalizer_fn):
return tf.feature_column.numeric_column(column_name, normalizer_fn=normalizer_fn)
def _make_zscaler(mean, std):
def zscaler(col):
return (col - mean)/std
return zscaler
# Define your feature columns
def create_feature_cols(features, use_normalization):
def get_normalization_parameters(traindf, features):
"""Get the normalization parameters (E.g., mean, std) for traindf for
features. We will use these parameters for training, eval, and serving."""
def _z_score_params(column):
mean = traindf[column].mean()
std = traindf[column].std()
return {'mean': mean, 'std': std}
normalization_parameters = {}
@crawles
crawles / feature_normalization.ipynb
Created June 28, 2018 01:04
feature_normalization.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@crawles
crawles / feature_normalization.ipynb
Created June 27, 2018 13:38
feature_normalization.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@crawles
crawles / drop.py
Created June 25, 2018 15:53
drop random elements from a pandas dataframe
def random_null(col, keep=0.9):
import random
if keep < random.random():
return None
return col
df.applymap(lambda x: random_null(x))