Skip to content

Instantly share code, notes, and snippets.

@ilyarudyak
Created July 8, 2020 13:47
Show Gist options
  • Save ilyarudyak/92655c2defc85b09887b5b74176ad866 to your computer and use it in GitHub Desktop.
Save ilyarudyak/92655c2defc85b09887b5b74176ad866 to your computer and use it in GitHub Desktop.
bias_initializer.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "bias_initializer.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyOfxmWQ28ethxg8Ky8Dn+E1",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ilyarudyak/92655c2defc85b09887b5b74176ad866/bias_initializer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fyTuZ2bE-8lh",
"colab_type": "text"
},
"source": [
"## 0 - import"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Z6pnh3ip6kZd",
"colab_type": "code",
"colab": {}
},
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"\n",
"import os\n",
"import tempfile\n",
"\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"import sklearn\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"mpl.rcParams['figure.figsize'] = (12, 10)\n",
"colors = plt.rcParams['axes.prop_cycle'].by_key()['color']"
],
"execution_count": 38,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "VH4Z9i8Q8FQz",
"colab_type": "text"
},
"source": [
"## 1 - get data"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ZcMoAYq_7H43",
"colab_type": "code",
"colab": {}
},
"source": [
"file = tf.keras.utils\n",
"raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')\n",
"\n",
"cleaned_df = raw_df.copy()\n",
"\n",
"# You don't want the `Time` column.\n",
"cleaned_df.pop('Time')\n",
"\n",
"# The `Amount` column covers a huge range. Convert to log-space.\n",
"eps=0.001 # 0 => 0.1¢\n",
"cleaned_df['Log_Amount'] = np.log(cleaned_df.pop('Amount')+eps)\n",
"\n",
"# Use a utility from sklearn to split and shuffle our dataset.\n",
"train_df, test_df = train_test_split(cleaned_df, test_size=0.2)\n",
"train_df, val_df = train_test_split(train_df, test_size=0.2)\n",
"\n",
"# Form np arrays of labels and features.\n",
"train_labels = np.array(train_df.pop('Class'))\n",
"bool_train_labels = train_labels != 0\n",
"val_labels = np.array(val_df.pop('Class'))\n",
"test_labels = np.array(test_df.pop('Class'))\n",
"\n",
"train_features = np.array(train_df)\n",
"val_features = np.array(val_df)\n",
"test_features = np.array(test_df)\n",
"\n",
"scaler = StandardScaler()\n",
"train_features = scaler.fit_transform(train_features)\n",
"\n",
"val_features = scaler.transform(val_features)\n",
"test_features = scaler.transform(test_features)\n",
"\n",
"train_features = np.clip(train_features, -5, 5)\n",
"val_features = np.clip(val_features, -5, 5)\n",
"test_features = np.clip(test_features, -5, 5)"
],
"execution_count": 39,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "3HFDOdP2Akkb",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "f92d8ef0-eedf-4be1-f535-108987a1a2a7"
},
"source": [
"train_features.shape, train_features.shape[-1], train_features.dtype"
],
"execution_count": 40,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((182276, 29), 29, dtype('float64'))"
]
},
"metadata": {
"tags": []
},
"execution_count": 40
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "HJtSEeYnGcYc",
"colab_type": "code",
"colab": {}
},
"source": [
"train_features = tf.cast(train_features, dtype=tf.float32)"
],
"execution_count": 41,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "xFnIoK5sh6ak",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"outputId": "fc75feba-553d-401e-99d7-e59bbcb886d1"
},
"source": [
"neg, pos = np.bincount(raw_df['Class'])\n",
"total = neg + pos\n",
"print('Examples:\\n Total: {}\\n Positive: {} ({:.2f}% of total)\\n'.format(\n",
" total, pos, 100 * pos / total))"
],
"execution_count": 48,
"outputs": [
{
"output_type": "stream",
"text": [
"Examples:\n",
" Total: 284807\n",
" Positive: 492 (0.17% of total)\n",
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4v8NdFYl-2Xn",
"colab_type": "text"
},
"source": [
"## 2 - get the model - no bias"
]
},
{
"cell_type": "code",
"metadata": {
"id": "pwMdBsDx7IWf",
"colab_type": "code",
"colab": {}
},
"source": [
"tf.random.set_seed(42)\n",
"dense_in = tf.keras.layers.Dense(units=16, \n",
" activation='relu', \n",
" input_shape=(train_features.shape[-1],))\n",
"dropout = keras.layers.Dropout(0.5)\n",
"# initialize with default bias_initializer\n",
"dense_out = keras.layers.Dense(units=1, \n",
" activation='sigmoid',\n",
" bias_initializer='zeros')"
],
"execution_count": 42,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "eCsK4xK87In2",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "86c857f9-a6dd-452f-a0f4-997bc9393c5d"
},
"source": [
"dense_in_output = dense_in(train_features[:10])\n",
"dense_out_output = dense_out(dense_in_output)\n",
"dense_in_output.shape, dense_out_output.shape"
],
"execution_count": 43,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(TensorShape([10, 16]), TensorShape([10, 1]))"
]
},
"metadata": {
"tags": []
},
"execution_count": 43
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "u4VrrEfKK4IM",
"colab_type": "text"
},
"source": [
"The last layer just multiply input by weigts and add bias (in our case bias is initialized to `0`) and compute `sigmoid`. Let's prove this simple fact. "
]
},
{
"cell_type": "code",
"metadata": {
"id": "XJaeLekQMBUd",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "0f9d326b-4850-476c-d6cb-ac690c61f688"
},
"source": [
"weights, bias = dense_out.weights\n",
"weights.shape, bias"
],
"execution_count": 44,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(TensorShape([16, 1]),\n",
" <tf.Variable 'dense_6/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>)"
]
},
"metadata": {
"tags": []
},
"execution_count": 44
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "k-kSzm2RNOwZ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "2f84a682-a4f1-44db-bc38-cab9211cab27"
},
"source": [
"dense_in_output.shape, dense_in_output[0].shape"
],
"execution_count": 45,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(TensorShape([10, 16]), TensorShape([16]))"
]
},
"metadata": {
"tags": []
},
"execution_count": 45
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "F-wNH34C7IyA",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 68
},
"outputId": "5313845a-3f2b-4a7a-d208-6526bcd9c86e"
},
"source": [
"logits = tf.matmul(dense_in_output, weights)\n",
"output = tf.math.sigmoid(logits)\n",
"logits.shape, output.shape, output[0]"
],
"execution_count": 46,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(TensorShape([10, 1]),\n",
" TensorShape([10, 1]),\n",
" <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6018876], dtype=float32)>)"
]
},
"metadata": {
"tags": []
},
"execution_count": 46
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hfGNLhsC7I58",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "4caf0355-bfa3-491d-f29e-44dc6b8af112"
},
"source": [
"dense_out_output[0]"
],
"execution_count": 47,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6018876], dtype=float32)>"
]
},
"metadata": {
"tags": []
},
"execution_count": 47
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "LiUdgUJbhR7I",
"colab_type": "text"
},
"source": [
"## 3 - get the model - with bias"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "e65bg9ksiGBl",
"colab_type": "text"
},
"source": [
"So we need to set bias. How should we approach this problem? In other words what is the formula for computations with bias? It turns out that we set `bias` for the output layer (previously it was `0`):\n",
"\n",
"$$ logits = Wx + b$$\n",
"$$ prob = \\frac{1}{1 + e^{-Wx-b}}$$\n",
"\n",
"In computations in the tutorial they set $Wx$ to `0`. In this case if we want $prob = pos / total$ we have to solve:\n",
"\n",
"$$ pos / total = \\frac{1}{1 + e^{-b}}$$\n",
"\n",
"That's exactly the equation they use in the tutorial."
]
},
{
"cell_type": "code",
"metadata": {
"id": "hFTUay7l7JH3",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "7e1c89a7-bcf0-4350-8350-4319a46132c0"
},
"source": [
"initial_bias = np.log([pos/neg])\n",
"initial_bias"
],
"execution_count": 49,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([-6.35935934])"
]
},
"metadata": {
"tags": []
},
"execution_count": 49
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "JmxKGjhg7JO-",
"colab_type": "code",
"colab": {}
},
"source": [
"tf.random.set_seed(42)\n",
"dense_in = tf.keras.layers.Dense(units=16, \n",
" activation='relu', \n",
" input_shape=(train_features.shape[-1],))\n",
"dropout = keras.layers.Dropout(0.5)\n",
"# initialize with initial_bias\n",
"output_bias = tf.keras.initializers.Constant(initial_bias)\n",
"dense_out = keras.layers.Dense(units=1, \n",
" activation='sigmoid',\n",
" bias_initializer=output_bias)"
],
"execution_count": 52,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Pt5M1LLD7JWn",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "cc67bff1-d628-4ae3-c292-204e62b40f1a"
},
"source": [
"dense_in_output = dense_in(train_features[:10])\n",
"dense_out_output = dense_out(dense_in_output)\n",
"dense_in_output.shape, dense_out_output.shape"
],
"execution_count": 53,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(TensorShape([10, 16]), TensorShape([10, 1]))"
]
},
"metadata": {
"tags": []
},
"execution_count": 53
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "XExAURz97JeX",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "7357efec-9c78-461e-9cf5-1487fc4be2c1"
},
"source": [
"# so now the bias is NOT 0, it's set according to our computations\n",
"weights, bias = dense_out.weights\n",
"weights.shape, bias"
],
"execution_count": 54,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(TensorShape([16, 1]),\n",
" <tf.Variable 'dense_11/bias:0' shape=(1,) dtype=float32, numpy=array([-6.3593593], dtype=float32)>)"
]
},
"metadata": {
"tags": []
},
"execution_count": 54
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "9eaXSIsA7JmA",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 68
},
"outputId": "48833a6f-d666-483e-ccfa-882e7fdebb76"
},
"source": [
"logits = tf.matmul(dense_in_output, weights) + tf.convert_to_tensor(initial_bias, dtype=tf.float32)\n",
"output = tf.math.sigmoid(logits)\n",
"logits.shape, output.shape, output[0]"
],
"execution_count": 56,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(TensorShape([10, 1]),\n",
" TensorShape([10, 1]),\n",
" <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00260943], dtype=float32)>)"
]
},
"metadata": {
"tags": []
},
"execution_count": 56
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5O6sW-b_7JtP",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "98341952-5293-46e6-9a5f-e05e2f5b9c71"
},
"source": [
"dense_out_output[0]"
],
"execution_count": 57,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00260943], dtype=float32)>"
]
},
"metadata": {
"tags": []
},
"execution_count": 57
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "iE3Rm8R07J0e",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 47,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment