pavanky/dummy_keras_model.ipynb

## dummy_keras_model.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "dummy_keras_model.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyO4sT7HQNN3508dPYaVSsT6",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/pavanky/53e893ec0dc5c2f7464e95e7bba145f9/dummy_keras_model.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wjSFjMCXkgBx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tensorflow as tf\n",
        "import numpy as np\n",
        "\n",
        "TOTAL_BATCHES = 10240\n",
        "MAX_CATEGORIES = 2**16\n",
        "NUMERICAL_DIMENSION = 64\n",
        "EMBEDDING_DIMENSION = 64\n",
        "\n",
        "# Create dummy data\n",
        "CATEGORICAL = np.array([\"word\" + str(np.random.random(MAX_CATEGORIES)) for _ in range(TOTAL_BATCHES)])\n",
        "NUMERICAL = np.random.random((TOTAL_BATCHES, NUMERICAL_DIMENSION))\n",
        "LABELS = np.random.randint(0, 2, size=(TOTAL_BATCHES))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3mhVvEkrtMLB",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def get_feature_columns():\n",
        "  cat_col = tf.feature_column.categorical_column_with_hash_bucket(\n",
        "      key=\"categorical\", hash_bucket_size=MAX_CATEGORIES, dtype=tf.dtypes.string,\n",
        "  )\n",
        "\n",
        "  num_col = tf.feature_column.numeric_column(\n",
        "      key=\"numerical\", shape=(NUMERICAL_DIMENSION,), dtype=tf.dtypes.float32,\n",
        "  )\n",
        "\n",
        "  emb_col = tf.feature_column.embedding_column(\n",
        "      categorical_column=cat_col, dimension=EMBEDDING_DIMENSION, combiner=\"sum\", \n",
        "  )\n",
        "  return [emb_col, num_col]\n",
        "\n",
        "def get_dataset(batch_size, prefetch=4):\n",
        "  return (tf.data.Dataset.from_tensor_slices((\n",
        "      {\"categorical\": CATEGORICAL, \"numerical\": NUMERICAL},\n",
        "      LABELS),\n",
        "  )\n",
        "  .repeat().shuffle(buffer_size=batch_size*10)\n",
        "  .batch(batch_size).prefetch(4))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yMReoUZ6tM8U",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Use sequential API to easily convert to estimator later\n",
        "def get_model(feature_columns):\n",
        "  return tf.keras.Sequential([\n",
        "    tf.keras.layers.DenseFeatures(feature_columns),\n",
        "    tf.keras.layers.Dense(64, activation=\"relu\"),\n",
        "    tf.keras.layers.Dense(64, activation=\"relu\"),\n",
        "    tf.keras.layers.Dense(1)\n",
        "  ])"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Q-FghB32wDhW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "feature_columns = get_feature_columns()\n",
        "train_dataset = get_dataset(batch_size=256)\n",
        "eval_dataset = get_dataset(batch_size=256)\n",
        "optimizer = tf.keras.optimizers.SGD(learning_rate=1E-3)\n",
        "loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SFOq9rxuxeLv",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 235
        },
        "outputId": "e7affea4-59f6-4cbc-e841-c301bd6c6495"
      },
      "source": [
        "dummy_model = get_model(feature_columns=feature_columns)\n",
        "dummy_model.compile(optimizer=optimizer, loss=loss)\n",
        "dummy_model.fit(\n",
        "    train_dataset.take(1024), epochs=2, metrics=[\"accuracy\"], \n",
        "    validation_data=eval_dataset.take(256))"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Epoch 1/2\n",
            "WARNING:tensorflow:Layer dense_features_5 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "1024/1024 [==============================] - 11s 11ms/step - loss: 0.6958 - val_loss: 0.6956\n",
            "Epoch 2/2\n",
            "1024/1024 [==============================] - 11s 11ms/step - loss: 0.6953 - val_loss: 0.6951\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<tensorflow.python.keras.callbacks.History at 0x7f6cd42adb70>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 16
        }
      ]
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "dummy_keras_model.ipynb",
	"provenance": [],
	"authorship_tag": "ABX9TyO4sT7HQNN3508dPYaVSsT6",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/pavanky/53e893ec0dc5c2f7464e95e7bba145f9/dummy_keras_model.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "wjSFjMCXkgBx",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import tensorflow as tf\n",
	"import numpy as np\n",
	"\n",
	"TOTAL_BATCHES = 10240\n",
	"MAX_CATEGORIES = 2**16\n",
	"NUMERICAL_DIMENSION = 64\n",
	"EMBEDDING_DIMENSION = 64\n",
	"\n",
	"# Create dummy data\n",
	"CATEGORICAL = np.array([\"word\" + str(np.random.random(MAX_CATEGORIES)) for _ in range(TOTAL_BATCHES)])\n",
	"NUMERICAL = np.random.random((TOTAL_BATCHES, NUMERICAL_DIMENSION))\n",
	"LABELS = np.random.randint(0, 2, size=(TOTAL_BATCHES))"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "3mhVvEkrtMLB",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"def get_feature_columns():\n",
	" cat_col = tf.feature_column.categorical_column_with_hash_bucket(\n",
	" key=\"categorical\", hash_bucket_size=MAX_CATEGORIES, dtype=tf.dtypes.string,\n",
	" )\n",
	"\n",
	" num_col = tf.feature_column.numeric_column(\n",
	" key=\"numerical\", shape=(NUMERICAL_DIMENSION,), dtype=tf.dtypes.float32,\n",
	" )\n",
	"\n",
	" emb_col = tf.feature_column.embedding_column(\n",
	" categorical_column=cat_col, dimension=EMBEDDING_DIMENSION, combiner=\"sum\", \n",
	" )\n",
	" return [emb_col, num_col]\n",
	"\n",
	"def get_dataset(batch_size, prefetch=4):\n",
	" return (tf.data.Dataset.from_tensor_slices((\n",
	" {\"categorical\": CATEGORICAL, \"numerical\": NUMERICAL},\n",
	" LABELS),\n",
	" )\n",
	" .repeat().shuffle(buffer_size=batch_size*10)\n",
	" .batch(batch_size).prefetch(4))"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "yMReoUZ6tM8U",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# Use sequential API to easily convert to estimator later\n",
	"def get_model(feature_columns):\n",
	" return tf.keras.Sequential([\n",
	" tf.keras.layers.DenseFeatures(feature_columns),\n",
	" tf.keras.layers.Dense(64, activation=\"relu\"),\n",
	" tf.keras.layers.Dense(64, activation=\"relu\"),\n",
	" tf.keras.layers.Dense(1)\n",
	" ])"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Q-FghB32wDhW",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"feature_columns = get_feature_columns()\n",
	"train_dataset = get_dataset(batch_size=256)\n",
	"eval_dataset = get_dataset(batch_size=256)\n",
	"optimizer = tf.keras.optimizers.SGD(learning_rate=1E-3)\n",
	"loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "SFOq9rxuxeLv",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 235
	},
	"outputId": "e7affea4-59f6-4cbc-e841-c301bd6c6495"
	},
	"source": [
	"dummy_model = get_model(feature_columns=feature_columns)\n",
	"dummy_model.compile(optimizer=optimizer, loss=loss)\n",
	"dummy_model.fit(\n",
	" train_dataset.take(1024), epochs=2, metrics=[\"accuracy\"], \n",
	" validation_data=eval_dataset.take(256))"
	],
	"execution_count": 16,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Epoch 1/2\n",
	"WARNING:tensorflow:Layer dense_features_5 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n",
	"\n",
	"If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
	"\n",
	"To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
	"\n",
	"1024/1024 [==============================] - 11s 11ms/step - loss: 0.6958 - val_loss: 0.6956\n",
	"Epoch 2/2\n",
	"1024/1024 [==============================] - 11s 11ms/step - loss: 0.6953 - val_loss: 0.6951\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<tensorflow.python.keras.callbacks.History at 0x7f6cd42adb70>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 16
	}
	]
	}
	]
	}