ilyarudyak/bias_initializer.ipynb

## bias_initializer.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "bias_initializer.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyOfxmWQ28ethxg8Ky8Dn+E1",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/ilyarudyak/92655c2defc85b09887b5b74176ad866/bias_initializer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fyTuZ2bE-8lh",
        "colab_type": "text"
      },
      "source": [
        "## 0 - import"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Z6pnh3ip6kZd",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tensorflow as tf\n",
        "from tensorflow import keras\n",
        "\n",
        "import os\n",
        "import tempfile\n",
        "\n",
        "import matplotlib as mpl\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import seaborn as sns\n",
        "\n",
        "import sklearn\n",
        "from sklearn.metrics import confusion_matrix\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "\n",
        "mpl.rcParams['figure.figsize'] = (12, 10)\n",
        "colors = plt.rcParams['axes.prop_cycle'].by_key()['color']"
      ],
      "execution_count": 38,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VH4Z9i8Q8FQz",
        "colab_type": "text"
      },
      "source": [
        "## 1 - get data"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZcMoAYq_7H43",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "file = tf.keras.utils\n",
        "raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')\n",
        "\n",
        "cleaned_df = raw_df.copy()\n",
        "\n",
        "# You don't want the `Time` column.\n",
        "cleaned_df.pop('Time')\n",
        "\n",
        "# The `Amount` column covers a huge range. Convert to log-space.\n",
        "eps=0.001 # 0 => 0.1¢\n",
        "cleaned_df['Log_Amount'] = np.log(cleaned_df.pop('Amount')+eps)\n",
        "\n",
        "# Use a utility from sklearn to split and shuffle our dataset.\n",
        "train_df, test_df = train_test_split(cleaned_df, test_size=0.2)\n",
        "train_df, val_df = train_test_split(train_df, test_size=0.2)\n",
        "\n",
        "# Form np arrays of labels and features.\n",
        "train_labels = np.array(train_df.pop('Class'))\n",
        "bool_train_labels = train_labels != 0\n",
        "val_labels = np.array(val_df.pop('Class'))\n",
        "test_labels = np.array(test_df.pop('Class'))\n",
        "\n",
        "train_features = np.array(train_df)\n",
        "val_features = np.array(val_df)\n",
        "test_features = np.array(test_df)\n",
        "\n",
        "scaler = StandardScaler()\n",
        "train_features = scaler.fit_transform(train_features)\n",
        "\n",
        "val_features = scaler.transform(val_features)\n",
        "test_features = scaler.transform(test_features)\n",
        "\n",
        "train_features = np.clip(train_features, -5, 5)\n",
        "val_features = np.clip(val_features, -5, 5)\n",
        "test_features = np.clip(test_features, -5, 5)"
      ],
      "execution_count": 39,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3HFDOdP2Akkb",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "f92d8ef0-eedf-4be1-f535-108987a1a2a7"
      },
      "source": [
        "train_features.shape, train_features.shape[-1], train_features.dtype"
      ],
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "((182276, 29), 29, dtype('float64'))"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 40
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HJtSEeYnGcYc",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train_features = tf.cast(train_features, dtype=tf.float32)"
      ],
      "execution_count": 41,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xFnIoK5sh6ak",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 85
        },
        "outputId": "fc75feba-553d-401e-99d7-e59bbcb886d1"
      },
      "source": [
        "neg, pos = np.bincount(raw_df['Class'])\n",
        "total = neg + pos\n",
        "print('Examples:\\n    Total: {}\\n    Positive: {} ({:.2f}% of total)\\n'.format(\n",
        "    total, pos, 100 * pos / total))"
      ],
      "execution_count": 48,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Examples:\n",
            "    Total: 284807\n",
            "    Positive: 492 (0.17% of total)\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4v8NdFYl-2Xn",
        "colab_type": "text"
      },
      "source": [
        "## 2 - get the model - no bias"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pwMdBsDx7IWf",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "tf.random.set_seed(42)\n",
        "dense_in = tf.keras.layers.Dense(units=16, \n",
        "                                activation='relu', \n",
        "                                input_shape=(train_features.shape[-1],))\n",
        "dropout = keras.layers.Dropout(0.5)\n",
        "# initialize with default bias_initializer\n",
        "dense_out = keras.layers.Dense(units=1, \n",
        "                               activation='sigmoid',\n",
        "                               bias_initializer='zeros')"
      ],
      "execution_count": 42,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "eCsK4xK87In2",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "86c857f9-a6dd-452f-a0f4-997bc9393c5d"
      },
      "source": [
        "dense_in_output = dense_in(train_features[:10])\n",
        "dense_out_output = dense_out(dense_in_output)\n",
        "dense_in_output.shape, dense_out_output.shape"
      ],
      "execution_count": 43,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(TensorShape([10, 16]), TensorShape([10, 1]))"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 43
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "u4VrrEfKK4IM",
        "colab_type": "text"
      },
      "source": [
        "The last layer just multiply input by weigts and add bias (in our case bias is initialized to `0`) and compute `sigmoid`. Let's prove this simple fact. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XJaeLekQMBUd",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        },
        "outputId": "0f9d326b-4850-476c-d6cb-ac690c61f688"
      },
      "source": [
        "weights, bias = dense_out.weights\n",
        "weights.shape, bias"
      ],
      "execution_count": 44,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(TensorShape([16, 1]),\n",
              " <tf.Variable 'dense_6/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 44
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "k-kSzm2RNOwZ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "2f84a682-a4f1-44db-bc38-cab9211cab27"
      },
      "source": [
        "dense_in_output.shape, dense_in_output[0].shape"
      ],
      "execution_count": 45,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(TensorShape([10, 16]), TensorShape([16]))"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 45
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "F-wNH34C7IyA",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 68
        },
        "outputId": "5313845a-3f2b-4a7a-d208-6526bcd9c86e"
      },
      "source": [
        "logits = tf.matmul(dense_in_output, weights)\n",
        "output = tf.math.sigmoid(logits)\n",
        "logits.shape, output.shape, output[0]"
      ],
      "execution_count": 46,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(TensorShape([10, 1]),\n",
              " TensorShape([10, 1]),\n",
              " <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6018876], dtype=float32)>)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 46
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hfGNLhsC7I58",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "4caf0355-bfa3-491d-f29e-44dc6b8af112"
      },
      "source": [
        "dense_out_output[0]"
      ],
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6018876], dtype=float32)>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 47
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LiUdgUJbhR7I",
        "colab_type": "text"
      },
      "source": [
        "## 3 - get the model - with bias"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "e65bg9ksiGBl",
        "colab_type": "text"
      },
      "source": [
        "So we need to set bias. How should we approach this problem? In other words what is the formula for computations with bias? It turns out that we set `bias` for the output layer (previously it was `0`):\n",
        "\n",
        "$$ logits = Wx + b$$\n",
        "$$ prob = \\frac{1}{1 + e^{-Wx-b}}$$\n",
        "\n",
        "In computations in the tutorial they set $Wx$ to `0`. In this case if we want $prob = pos / total$ we have to solve:\n",
        "\n",
        "$$ pos / total = \\frac{1}{1 + e^{-b}}$$\n",
        "\n",
        "That's exactly the equation they use in the tutorial."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hFTUay7l7JH3",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "7e1c89a7-bcf0-4350-8350-4319a46132c0"
      },
      "source": [
        "initial_bias = np.log([pos/neg])\n",
        "initial_bias"
      ],
      "execution_count": 49,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([-6.35935934])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 49
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JmxKGjhg7JO-",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "tf.random.set_seed(42)\n",
        "dense_in = tf.keras.layers.Dense(units=16, \n",
        "                                activation='relu', \n",
        "                                input_shape=(train_features.shape[-1],))\n",
        "dropout = keras.layers.Dropout(0.5)\n",
        "# initialize with initial_bias\n",
        "output_bias = tf.keras.initializers.Constant(initial_bias)\n",
        "dense_out = keras.layers.Dense(units=1, \n",
        "                               activation='sigmoid',\n",
        "                               bias_initializer=output_bias)"
      ],
      "execution_count": 52,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Pt5M1LLD7JWn",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "cc67bff1-d628-4ae3-c292-204e62b40f1a"
      },
      "source": [
        "dense_in_output = dense_in(train_features[:10])\n",
        "dense_out_output = dense_out(dense_in_output)\n",
        "dense_in_output.shape, dense_out_output.shape"
      ],
      "execution_count": 53,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(TensorShape([10, 16]), TensorShape([10, 1]))"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 53
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XExAURz97JeX",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        },
        "outputId": "7357efec-9c78-461e-9cf5-1487fc4be2c1"
      },
      "source": [
        "# so now the bias is NOT 0, it's set according to our computations\n",
        "weights, bias = dense_out.weights\n",
        "weights.shape, bias"
      ],
      "execution_count": 54,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(TensorShape([16, 1]),\n",
              " <tf.Variable 'dense_11/bias:0' shape=(1,) dtype=float32, numpy=array([-6.3593593], dtype=float32)>)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 54
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9eaXSIsA7JmA",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 68
        },
        "outputId": "48833a6f-d666-483e-ccfa-882e7fdebb76"
      },
      "source": [
        "logits = tf.matmul(dense_in_output, weights) + tf.convert_to_tensor(initial_bias, dtype=tf.float32)\n",
        "output = tf.math.sigmoid(logits)\n",
        "logits.shape, output.shape, output[0]"
      ],
      "execution_count": 56,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(TensorShape([10, 1]),\n",
              " TensorShape([10, 1]),\n",
              " <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00260943], dtype=float32)>)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 56
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5O6sW-b_7JtP",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "98341952-5293-46e6-9a5f-e05e2f5b9c71"
      },
      "source": [
        "dense_out_output[0]"
      ],
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00260943], dtype=float32)>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iE3Rm8R07J0e",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 47,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "bias_initializer.ipynb",
	"provenance": [],
	"authorship_tag": "ABX9TyOfxmWQ28ethxg8Ky8Dn+E1",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/ilyarudyak/92655c2defc85b09887b5b74176ad866/bias_initializer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "fyTuZ2bE-8lh",
	"colab_type": "text"
	},
	"source": [
	"## 0 - import"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Z6pnh3ip6kZd",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import tensorflow as tf\n",
	"from tensorflow import keras\n",
	"\n",
	"import os\n",
	"import tempfile\n",
	"\n",
	"import matplotlib as mpl\n",
	"import matplotlib.pyplot as plt\n",
	"import numpy as np\n",
	"import pandas as pd\n",
	"import seaborn as sns\n",
	"\n",
	"import sklearn\n",
	"from sklearn.metrics import confusion_matrix\n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.preprocessing import StandardScaler\n",
	"\n",
	"mpl.rcParams['figure.figsize'] = (12, 10)\n",
	"colors = plt.rcParams['axes.prop_cycle'].by_key()['color']"
	],
	"execution_count": 38,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "VH4Z9i8Q8FQz",
	"colab_type": "text"
	},
	"source": [
	"## 1 - get data"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "ZcMoAYq_7H43",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"file = tf.keras.utils\n",
	"raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')\n",
	"\n",
	"cleaned_df = raw_df.copy()\n",
	"\n",
	"# You don't want the `Time` column.\n",
	"cleaned_df.pop('Time')\n",
	"\n",
	"# The `Amount` column covers a huge range. Convert to log-space.\n",
	"eps=0.001 # 0 => 0.1¢\n",
	"cleaned_df['Log_Amount'] = np.log(cleaned_df.pop('Amount')+eps)\n",
	"\n",
	"# Use a utility from sklearn to split and shuffle our dataset.\n",
	"train_df, test_df = train_test_split(cleaned_df, test_size=0.2)\n",
	"train_df, val_df = train_test_split(train_df, test_size=0.2)\n",
	"\n",
	"# Form np arrays of labels and features.\n",
	"train_labels = np.array(train_df.pop('Class'))\n",
	"bool_train_labels = train_labels != 0\n",
	"val_labels = np.array(val_df.pop('Class'))\n",
	"test_labels = np.array(test_df.pop('Class'))\n",
	"\n",
	"train_features = np.array(train_df)\n",
	"val_features = np.array(val_df)\n",
	"test_features = np.array(test_df)\n",
	"\n",
	"scaler = StandardScaler()\n",
	"train_features = scaler.fit_transform(train_features)\n",
	"\n",
	"val_features = scaler.transform(val_features)\n",
	"test_features = scaler.transform(test_features)\n",
	"\n",
	"train_features = np.clip(train_features, -5, 5)\n",
	"val_features = np.clip(val_features, -5, 5)\n",
	"test_features = np.clip(test_features, -5, 5)"
	],
	"execution_count": 39,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "3HFDOdP2Akkb",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "f92d8ef0-eedf-4be1-f535-108987a1a2a7"
	},
	"source": [
	"train_features.shape, train_features.shape[-1], train_features.dtype"
	],
	"execution_count": 40,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"((182276, 29), 29, dtype('float64'))"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 40
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "HJtSEeYnGcYc",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"train_features = tf.cast(train_features, dtype=tf.float32)"
	],
	"execution_count": 41,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "xFnIoK5sh6ak",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 85
	},
	"outputId": "fc75feba-553d-401e-99d7-e59bbcb886d1"
	},
	"source": [
	"neg, pos = np.bincount(raw_df['Class'])\n",
	"total = neg + pos\n",
	"print('Examples:\\n Total: {}\\n Positive: {} ({:.2f}% of total)\\n'.format(\n",
	" total, pos, 100 * pos / total))"
	],
	"execution_count": 48,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Examples:\n",
	" Total: 284807\n",
	" Positive: 492 (0.17% of total)\n",
	"\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "4v8NdFYl-2Xn",
	"colab_type": "text"
	},
	"source": [
	"## 2 - get the model - no bias"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "pwMdBsDx7IWf",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"tf.random.set_seed(42)\n",
	"dense_in = tf.keras.layers.Dense(units=16, \n",
	" activation='relu', \n",
	" input_shape=(train_features.shape[-1],))\n",
	"dropout = keras.layers.Dropout(0.5)\n",
	"# initialize with default bias_initializer\n",
	"dense_out = keras.layers.Dense(units=1, \n",
	" activation='sigmoid',\n",
	" bias_initializer='zeros')"
	],
	"execution_count": 42,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "eCsK4xK87In2",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "86c857f9-a6dd-452f-a0f4-997bc9393c5d"
	},
	"source": [
	"dense_in_output = dense_in(train_features[:10])\n",
	"dense_out_output = dense_out(dense_in_output)\n",
	"dense_in_output.shape, dense_out_output.shape"
	],
	"execution_count": 43,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(TensorShape([10, 16]), TensorShape([10, 1]))"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 43
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "u4VrrEfKK4IM",
	"colab_type": "text"
	},
	"source": [
	"The last layer just multiply input by weigts and add bias (in our case bias is initialized to `0`) and compute `sigmoid`. Let's prove this simple fact. "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "XJaeLekQMBUd",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 51
	},
	"outputId": "0f9d326b-4850-476c-d6cb-ac690c61f688"
	},
	"source": [
	"weights, bias = dense_out.weights\n",
	"weights.shape, bias"
	],
	"execution_count": 44,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(TensorShape([16, 1]),\n",
	" <tf.Variable 'dense_6/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>)"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 44
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "k-kSzm2RNOwZ",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "2f84a682-a4f1-44db-bc38-cab9211cab27"
	},
	"source": [
	"dense_in_output.shape, dense_in_output[0].shape"
	],
	"execution_count": 45,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(TensorShape([10, 16]), TensorShape([16]))"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 45
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "F-wNH34C7IyA",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 68
	},
	"outputId": "5313845a-3f2b-4a7a-d208-6526bcd9c86e"
	},
	"source": [
	"logits = tf.matmul(dense_in_output, weights)\n",
	"output = tf.math.sigmoid(logits)\n",
	"logits.shape, output.shape, output[0]"
	],
	"execution_count": 46,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(TensorShape([10, 1]),\n",
	" TensorShape([10, 1]),\n",
	" <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6018876], dtype=float32)>)"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 46
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "hfGNLhsC7I58",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "4caf0355-bfa3-491d-f29e-44dc6b8af112"
	},
	"source": [
	"dense_out_output[0]"
	],
	"execution_count": 47,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6018876], dtype=float32)>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 47
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "LiUdgUJbhR7I",
	"colab_type": "text"
	},
	"source": [
	"## 3 - get the model - with bias"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "e65bg9ksiGBl",
	"colab_type": "text"
	},
	"source": [
	"So we need to set bias. How should we approach this problem? In other words what is the formula for computations with bias? It turns out that we set `bias` for the output layer (previously it was `0`):\n",
	"\n",
	"$$ logits = Wx + b$$\n",
	"$$ prob = \\frac{1}{1 + e^{-Wx-b}}$$\n",
	"\n",
	"In computations in the tutorial they set $Wx$ to `0`. In this case if we want $prob = pos / total$ we have to solve:\n",
	"\n",
	"$$ pos / total = \\frac{1}{1 + e^{-b}}$$\n",
	"\n",
	"That's exactly the equation they use in the tutorial."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "hFTUay7l7JH3",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "7e1c89a7-bcf0-4350-8350-4319a46132c0"
	},
	"source": [
	"initial_bias = np.log([pos/neg])\n",
	"initial_bias"
	],
	"execution_count": 49,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array([-6.35935934])"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 49
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "JmxKGjhg7JO-",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"tf.random.set_seed(42)\n",
	"dense_in = tf.keras.layers.Dense(units=16, \n",
	" activation='relu', \n",
	" input_shape=(train_features.shape[-1],))\n",
	"dropout = keras.layers.Dropout(0.5)\n",
	"# initialize with initial_bias\n",
	"output_bias = tf.keras.initializers.Constant(initial_bias)\n",
	"dense_out = keras.layers.Dense(units=1, \n",
	" activation='sigmoid',\n",
	" bias_initializer=output_bias)"
	],
	"execution_count": 52,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Pt5M1LLD7JWn",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "cc67bff1-d628-4ae3-c292-204e62b40f1a"
	},
	"source": [
	"dense_in_output = dense_in(train_features[:10])\n",
	"dense_out_output = dense_out(dense_in_output)\n",
	"dense_in_output.shape, dense_out_output.shape"
	],
	"execution_count": 53,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(TensorShape([10, 16]), TensorShape([10, 1]))"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 53
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "XExAURz97JeX",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 51
	},
	"outputId": "7357efec-9c78-461e-9cf5-1487fc4be2c1"
	},
	"source": [
	"# so now the bias is NOT 0, it's set according to our computations\n",
	"weights, bias = dense_out.weights\n",
	"weights.shape, bias"
	],
	"execution_count": 54,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(TensorShape([16, 1]),\n",
	" <tf.Variable 'dense_11/bias:0' shape=(1,) dtype=float32, numpy=array([-6.3593593], dtype=float32)>)"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 54
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "9eaXSIsA7JmA",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 68
	},
	"outputId": "48833a6f-d666-483e-ccfa-882e7fdebb76"
	},
	"source": [
	"logits = tf.matmul(dense_in_output, weights) + tf.convert_to_tensor(initial_bias, dtype=tf.float32)\n",
	"output = tf.math.sigmoid(logits)\n",
	"logits.shape, output.shape, output[0]"
	],
	"execution_count": 56,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(TensorShape([10, 1]),\n",
	" TensorShape([10, 1]),\n",
	" <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00260943], dtype=float32)>)"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 56
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "5O6sW-b_7JtP",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "98341952-5293-46e6-9a5f-e05e2f5b9c71"
	},
	"source": [
	"dense_out_output[0]"
	],
	"execution_count": 57,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00260943], dtype=float32)>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 57
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "iE3Rm8R07J0e",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 47,
	"outputs": []
	}
	]
	}