Skip to content

Instantly share code, notes, and snippets.

@siboehm
Created June 9, 2019 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save siboehm/dba29f2294d85ec7e10af4fda517539c to your computer and use it in GitHub Desktop.
Save siboehm/dba29f2294d85ec7e10af4fda517539c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
"For more information, please see:\n",
" * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
" * https://github.com/tensorflow/addons\n",
"If you depend on functionality not listed there, please file an issue.\n",
"\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"import tensorflow_probability as tfp\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import scipy\n",
"import sklearn\n",
"import pandas as pd\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"tf.enable_v2_behavior()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.13.1\n"
]
}
],
"source": [
"print(tf.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# a cosine + some noise\n",
"def gen_data(num, noise_std=0.2, heterosced_noise=0.3):\n",
" np.random.seed(22)\n",
" x = np.linspace(-6, 6, num=num)\n",
" y_heterosced_noise = np.zeros(x.shape)\n",
" for i in range(int(num / 4)):\n",
" y_heterosced_noise[i] = abs(x[i])\n",
" y = (\n",
" 3 * np.sin(1*x)\n",
" + np.random.normal(0, noise_std, size=num)\n",
" + y_heterosced_noise * np.random.normal(0, heterosced_noise, size=num)\n",
" )\n",
" x = x.astype(np.float32).reshape((num, 1))\n",
" y = y.astype(np.float32).reshape((num, 1))\n",
" assert x.shape == y.shape\n",
" return x, y\n",
"\n",
"\n",
"def plot_dist(x_train, y_train, x_test, model, labels=True):\n",
" output_dist = model(x_test)\n",
" mean = output_dist.mean()\n",
" std = output_dist.stddev()\n",
" plt.figure(figsize=[12, 6])\n",
" plt.scatter(x_train, y_train, s=4, label='training data')\n",
" plt.plot(x_train, mean, \"r\", lw=2, label=r'mean')\n",
" plt.fill_between(np.squeeze(x_train), np.squeeze(mean + 2*std), np.squeeze(mean - 2 * std), alpha=0.1, label='2 stddev');\n",
" plt.legend()\n",
" plt.ylim(-6, 6)\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is the data we'll fit, a sinus with some heteroscedastic noise"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x504 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"x_train, y_train = gen_data(100)\n",
"x_test, y_test = gen_data(100)\n",
"plt.figure(figsize=[12, 7])\n",
"plt.scatter(x_train, y_train, s=12, label='training_data')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define the model. It a small dense neural network that outputs a Gaussian distribution"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"class DensityModel(tf.keras.Sequential):\n",
" def __init__(self, hidden_sizes=(32,32), learning_rate=0.003):\n",
" layers = [tf.keras.layers.Dense(size, activation='relu') for size in hidden_sizes]\n",
" # the output layer that parametrizes the distribution we're outputting\n",
" layers += [tf.keras.layers.Dense(2, activation='linear')]\n",
" # a distribution layer that transforms the output of the previous layer into a tf distribution\n",
" layers += [tfp.layers.DistributionLambda(lambda t: tfp.distributions.Normal(loc=t[..., 0:1], scale=tf.nn.softplus(t[..., 1:2])))]\n",
" super().__init__(layers)\n",
" # for the loss we use the negative log likelihood of the data\n",
" self.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=lambda y, p_y: -p_y.log_prob(y))\n",
" \n",
" @staticmethod\n",
" def build_fn(hidden_sizes=(10,10), learning_rate=0.03):\n",
" # this destroys the current tf graph and builds a new one, avoiding memory overflow issues\n",
" tf.keras.backend.clear_session()\n",
" return DensityModel(hidden_sizes, learning_rate)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7f4c80550d68>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"initial_model = DensityModel(hidden_sizes=(16, 16), learning_rate=0.0003)\n",
"initial_model.fit(x_train, y_train, epochs=200, verbose=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Our inital model doesn't fit the data too well. The learning rate is probably too low. Let's see if we can improve on that"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_dist(x_train, y_train, x_test, initial_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We take 20 different parameter settings at random an evaluate their goodness-of-fit via a 5-fold crossvalidation. By setting `n_jobs=-1`, scikit will spawn as many jobs as there a processors on your system and evaluate the parameter configurations in parallel"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"param_grid = {\n",
" 'learning_rate' : scipy.stats.uniform(loc=0.0001, scale=0.05),\n",
" 'hidden_sizes': [(x, x) for x in range(5, 30, 5)] + [(x, ) for x in range(5, 30, 5)],\n",
"}\n",
"cv = RandomizedSearchCV(estimator=tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=DensityModel.build_fn), param_distributions=param_grid, n_jobs=-1, n_iter=30, cv=4, iid=True)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomizedSearchCV(cv=4, error_score='raise-deprecating',\n",
" estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7f4c80487cc0>,\n",
" iid=True, n_iter=30, n_jobs=-1,\n",
" param_distributions={'hidden_sizes': [(5, 5), (10, 10),\n",
" (15, 15), (20, 20),\n",
" (25, 25), (5,), (10,),\n",
" (15,), (20,), (25,)],\n",
" 'learning_rate': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f4c804876d8>},\n",
" pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
" return_train_score=False, scoring=None, verbose=0)"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cv.fit(x_train, y_train, verbose=0, epochs=200)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The best parameters we found have a significantly higher learning rate compared to our initial blind guess"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'hidden_sizes': (25, 25), 'learning_rate': 0.024464187877681353}"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cv.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"tuned_model = DensityModel(**cv.best_params_)\n",
"tuned_model.fit(x_train, y_train, epochs=100, verbose=0)\n",
"plot_dist(x_train, y_train, x_test, tuned_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And our loss went down"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100/100 [==============================] - 0s 1ms/sample - loss: 2.2928\n",
"Initial model test loss: 2.2928287410736083\n",
"100/100 [==============================] - 0s 2ms/sample - loss: 1.2909\n",
"Tuned model test loss: 1.290901689529419\n"
]
}
],
"source": [
"print('Initial model test loss: ' + str(initial_model.evaluate(x_test, y_test)))\n",
"print('Tuned model test loss: ' + str(tuned_model.evaluate(x_test, y_test)))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "hyper",
"language": "python",
"name": "hyper"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment