This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Can work for other delimiters as well | |
# Tab delimiter | |
bq load --source_format=CSV --field_delimiter=tab \ | |
--skip_leading_rows 1 -<destination_table> <source> \ | |
<schema> | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A simple cheat sheet of Spark Dataframe syntax | |
# Current for Spark 1.6.1 | |
# import statements | |
from pyspark.sql import SQLContext | |
from pyspark.sql.types import * | |
from pyspark.sql.functions import * | |
#creating dataframes | |
df = sqlContext.createDataFrame([(1, 4), (2, 5), (3, 6)], ["A", "B"]) # from manual data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import inspect | |
import pandas.io.sql as psql | |
def parametrized(dec): | |
def layer(*args, **kwargs): | |
""" | |
Wrapper for creating plpython functions. Argument types are provided in the docstring | |
:param conn: Psycopg connection object |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
explanation_metadata = { | |
"inputs": { | |
"dayofweek": { | |
"input_tensor_name": "dayofweek:0", | |
"input_baselines": [baselines_mode[0][0]] # Thursday | |
}, | |
"hourofday": { | |
"input_tensor_name": "hourofday:0", | |
"input_baselines": [baselines_mode[0][1]] # 8pm | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from io import StringIO | |
def head(table, n=10): | |
head_list = !bq head --n $n --table $table | |
head_str = '\n'.join([head_list[1]] + head_list[3:-1]) | |
return pd.read_csv(StringIO(head_str), delimiter="|").iloc[:, 1:-1] | |
df = head('publicdata:samples.natality') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _numeric_column_normalized(column_name, normalizer_fn): | |
return tf.feature_column.numeric_column(column_name, normalizer_fn=normalizer_fn) | |
def _make_zscaler(mean, std): | |
def zscaler(col): | |
return (col - mean)/std | |
return zscaler | |
# Define your feature columns | |
def create_feature_cols(features, use_normalization): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
tf.enable_eager_execution() | |
# Load dataset. | |
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') | |
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') | |
y_train = dftrain.pop('survived') | |
y_eval = dfeval.pop('survived') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def permutation_importances(est, X_eval, y_eval, metric, features): | |
"""Column by column, shuffle values and observe effect on eval set. | |
source: http://explained.ai/rf-importance/index.html | |
A similar approach can be done during training. See "Drop-column importance" | |
in the above article.""" | |
def accuracy_metric(est, X, y): | |
"""TensorFlow estimator accuracy.""" | |
eval_input_fn = make_input_fn(X, | |
y=y, | |
shuffle=False, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get importances | |
importances = est.experimental_feature_importances(normalize=True) | |
df_imp = pd.Series(importances) | |
# Visualize importances. | |
N = 8 | |
ax = (df_imp.iloc[0:N][::-1] | |
.plot(kind='barh')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make predictions. | |
pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn)) | |
df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts]) | |
# Plot results. | |
ID = 182 | |
example = df_dfc.iloc[ID] # Choose ith example from evaluation set. | |
TOP_N = 8 # View top 8 features. | |
sorted_ix = example.abs().sort_values()[-TOP_N:].index | |
ax = example[sorted_ix].plot(kind='barh') |
NewerOlder