pavanky/dummy_estimator_model.ipynb

## dummy_estimator_model.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "dummy_estimator_model.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyP302mkzLP3UVmX/YY6taTS",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/pavanky/6dcf6adc1ee7ba1ca24dc290ca59bc16/dummy_estimator_model.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xnW0EoOV5j49",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tensorflow as tf\n",
        "tf.compat.v1.disable_eager_execution()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HG3DjnRF510Z",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import numpy as np"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IGNMvAh753dK",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "TOTAL_BATCHES = 10240\n",
        "MAX_CATEGORIES = 2**16\n",
        "NUMERICAL_DIMENSION = 64\n",
        "EMBEDDING_DIMENSION = 64\n",
        "\n",
        "# Create dummy data\n",
        "CATEGORICAL = np.array([\"word\" + str(np.random.random(MAX_CATEGORIES)) for _ in range(TOTAL_BATCHES)])\n",
        "NUMERICAL = np.random.random((TOTAL_BATCHES, NUMERICAL_DIMENSION))\n",
        "LABELS = np.random.randint(0, 2, size=(TOTAL_BATCHES))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QrcS4mPq58QW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def get_feature_columns():\n",
        "  cat_col = tf.feature_column.categorical_column_with_hash_bucket(\n",
        "      key=\"categorical\", hash_bucket_size=MAX_CATEGORIES, dtype=tf.dtypes.string,\n",
        "  )\n",
        "\n",
        "  num_col = tf.feature_column.numeric_column(\n",
        "      key=\"numerical\", shape=(NUMERICAL_DIMENSION,), dtype=tf.dtypes.float32,\n",
        "  )\n",
        "\n",
        "  emb_col = tf.feature_column.embedding_column(\n",
        "      categorical_column=cat_col, dimension=EMBEDDING_DIMENSION, combiner=\"sum\", \n",
        "  )\n",
        "  return [emb_col, num_col]\n",
        "\n",
        "def get_dataset(batch_size, prefetch=4):\n",
        "  return (tf.data.Dataset.from_tensor_slices((\n",
        "      {\"categorical\": CATEGORICAL, \"numerical\": NUMERICAL},\n",
        "      LABELS),\n",
        "  )\n",
        "  .repeat().shuffle(buffer_size=batch_size*10)\n",
        "  .batch(batch_size).prefetch(4))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NH-94u7v6v-Z",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 199
        },
        "outputId": "4d64e6c8-0130-4df2-e665-b6c51a22072b"
      },
      "source": [
        "estimator = tf.estimator.DNNEstimator(\n",
        "    head=tf.estimator.BinaryClassHead(),\n",
        "    feature_columns=get_feature_columns(),\n",
        "    hidden_units=[64, 64],\n",
        "    optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=1E-3))"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:tensorflow:Using default config.\n",
            "WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpr_2vnsk3\n",
            "INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpr_2vnsk3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
            "graph_options {\n",
            "  rewrite_options {\n",
            "    meta_optimizer_iterations: ONE\n",
            "  }\n",
            "}\n",
            ", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1zpuAHLJ7OnT",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "ddbde117-9524-45d2-e718-260241f711ab"
      },
      "source": [
        "num_epochs = 2\n",
        "num_steps = 1024\n",
        "estimator.train(input_fn=lambda: get_dataset(256), steps=num_steps * num_epochs)"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:tensorflow:Calling model_fn.\n",
            "WARNING:tensorflow:Layer dnn is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "INFO:tensorflow:Done calling model_fn.\n",
            "INFO:tensorflow:Create CheckpointSaverHook.\n",
            "INFO:tensorflow:Graph was finalized.\n",
            "INFO:tensorflow:Restoring parameters from /tmp/tmpr_2vnsk3/model.ckpt-1024\n",
            "INFO:tensorflow:Running local_init_op.\n",
            "INFO:tensorflow:Done running local_init_op.\n",
            "INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 1024...\n",
            "INFO:tensorflow:Saving checkpoints for 1024 into /tmp/tmpr_2vnsk3/model.ckpt.\n",
            "INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 1024...\n",
            "INFO:tensorflow:loss = 0.6949047, step = 1024\n",
            "INFO:tensorflow:global_step/sec: 331.811\n",
            "INFO:tensorflow:loss = 0.69176507, step = 1124 (0.303 sec)\n",
            "INFO:tensorflow:global_step/sec: 472.349\n",
            "INFO:tensorflow:loss = 0.6969578, step = 1224 (0.211 sec)\n",
            "INFO:tensorflow:global_step/sec: 466.318\n",
            "INFO:tensorflow:loss = 0.6991083, step = 1324 (0.217 sec)\n",
            "INFO:tensorflow:global_step/sec: 441.172\n",
            "INFO:tensorflow:loss = 0.69564223, step = 1424 (0.225 sec)\n",
            "INFO:tensorflow:global_step/sec: 448.116\n",
            "INFO:tensorflow:loss = 0.6956173, step = 1524 (0.222 sec)\n",
            "INFO:tensorflow:global_step/sec: 468.844\n",
            "INFO:tensorflow:loss = 0.6948998, step = 1624 (0.215 sec)\n",
            "INFO:tensorflow:global_step/sec: 459.55\n",
            "INFO:tensorflow:loss = 0.69096094, step = 1724 (0.215 sec)\n",
            "INFO:tensorflow:global_step/sec: 464.959\n",
            "INFO:tensorflow:loss = 0.6917819, step = 1824 (0.215 sec)\n",
            "INFO:tensorflow:global_step/sec: 427.788\n",
            "INFO:tensorflow:loss = 0.6913247, step = 1924 (0.234 sec)\n",
            "INFO:tensorflow:global_step/sec: 457.438\n",
            "INFO:tensorflow:loss = 0.693395, step = 2024 (0.218 sec)\n",
            "INFO:tensorflow:global_step/sec: 447.911\n",
            "INFO:tensorflow:loss = 0.6980244, step = 2124 (0.226 sec)\n",
            "INFO:tensorflow:global_step/sec: 473.493\n",
            "INFO:tensorflow:loss = 0.69154155, step = 2224 (0.212 sec)\n",
            "INFO:tensorflow:global_step/sec: 454.247\n",
            "INFO:tensorflow:loss = 0.69146204, step = 2324 (0.217 sec)\n",
            "INFO:tensorflow:global_step/sec: 460.333\n",
            "INFO:tensorflow:loss = 0.68608826, step = 2424 (0.219 sec)\n",
            "INFO:tensorflow:global_step/sec: 459.86\n",
            "INFO:tensorflow:loss = 0.6934238, step = 2524 (0.218 sec)\n",
            "INFO:tensorflow:global_step/sec: 477.252\n",
            "INFO:tensorflow:loss = 0.69501543, step = 2624 (0.207 sec)\n",
            "INFO:tensorflow:global_step/sec: 459.195\n",
            "INFO:tensorflow:loss = 0.6989089, step = 2724 (0.218 sec)\n",
            "INFO:tensorflow:global_step/sec: 480.741\n",
            "INFO:tensorflow:loss = 0.6963976, step = 2824 (0.208 sec)\n",
            "INFO:tensorflow:global_step/sec: 453.999\n",
            "INFO:tensorflow:loss = 0.69740707, step = 2924 (0.221 sec)\n",
            "INFO:tensorflow:global_step/sec: 428.81\n",
            "INFO:tensorflow:loss = 0.699007, step = 3024 (0.231 sec)\n",
            "INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 3072...\n",
            "INFO:tensorflow:Saving checkpoints for 3072 into /tmp/tmpr_2vnsk3/model.ckpt.\n",
            "INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 3072...\n",
            "INFO:tensorflow:Loss for final step: 0.69158924.\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<tensorflow_estimator.python.estimator.canned.dnn.DNNEstimatorV2 at 0x7ff309dcd4e0>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 13
        }
      ]
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "dummy_estimator_model.ipynb",
	"provenance": [],
	"authorship_tag": "ABX9TyP302mkzLP3UVmX/YY6taTS",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/pavanky/6dcf6adc1ee7ba1ca24dc290ca59bc16/dummy_estimator_model.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "xnW0EoOV5j49",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import tensorflow as tf\n",
	"tf.compat.v1.disable_eager_execution()"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "HG3DjnRF510Z",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import numpy as np"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "IGNMvAh753dK",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"TOTAL_BATCHES = 10240\n",
	"MAX_CATEGORIES = 2**16\n",
	"NUMERICAL_DIMENSION = 64\n",
	"EMBEDDING_DIMENSION = 64\n",
	"\n",
	"# Create dummy data\n",
	"CATEGORICAL = np.array([\"word\" + str(np.random.random(MAX_CATEGORIES)) for _ in range(TOTAL_BATCHES)])\n",
	"NUMERICAL = np.random.random((TOTAL_BATCHES, NUMERICAL_DIMENSION))\n",
	"LABELS = np.random.randint(0, 2, size=(TOTAL_BATCHES))"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "QrcS4mPq58QW",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"def get_feature_columns():\n",
	" cat_col = tf.feature_column.categorical_column_with_hash_bucket(\n",
	" key=\"categorical\", hash_bucket_size=MAX_CATEGORIES, dtype=tf.dtypes.string,\n",
	" )\n",
	"\n",
	" num_col = tf.feature_column.numeric_column(\n",
	" key=\"numerical\", shape=(NUMERICAL_DIMENSION,), dtype=tf.dtypes.float32,\n",
	" )\n",
	"\n",
	" emb_col = tf.feature_column.embedding_column(\n",
	" categorical_column=cat_col, dimension=EMBEDDING_DIMENSION, combiner=\"sum\", \n",
	" )\n",
	" return [emb_col, num_col]\n",
	"\n",
	"def get_dataset(batch_size, prefetch=4):\n",
	" return (tf.data.Dataset.from_tensor_slices((\n",
	" {\"categorical\": CATEGORICAL, \"numerical\": NUMERICAL},\n",
	" LABELS),\n",
	" )\n",
	" .repeat().shuffle(buffer_size=batch_size*10)\n",
	" .batch(batch_size).prefetch(4))"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "NH-94u7v6v-Z",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 199
	},
	"outputId": "4d64e6c8-0130-4df2-e665-b6c51a22072b"
	},
	"source": [
	"estimator = tf.estimator.DNNEstimator(\n",
	" head=tf.estimator.BinaryClassHead(),\n",
	" feature_columns=get_feature_columns(),\n",
	" hidden_units=[64, 64],\n",
	" optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=1E-3))"
	],
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Using default config.\n",
	"WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpr_2vnsk3\n",
	"INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpr_2vnsk3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
	"graph_options {\n",
	" rewrite_options {\n",
	" meta_optimizer_iterations: ONE\n",
	" }\n",
	"}\n",
	", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "1zpuAHLJ7OnT",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 1000
	},
	"outputId": "ddbde117-9524-45d2-e718-260241f711ab"
	},
	"source": [
	"num_epochs = 2\n",
	"num_steps = 1024\n",
	"estimator.train(input_fn=lambda: get_dataset(256), steps=num_steps * num_epochs)"
	],
	"execution_count": 13,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Calling model_fn.\n",
	"WARNING:tensorflow:Layer dnn is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n",
	"\n",
	"If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
	"\n",
	"To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
	"\n",
	"INFO:tensorflow:Done calling model_fn.\n",
	"INFO:tensorflow:Create CheckpointSaverHook.\n",
	"INFO:tensorflow:Graph was finalized.\n",
	"INFO:tensorflow:Restoring parameters from /tmp/tmpr_2vnsk3/model.ckpt-1024\n",
	"INFO:tensorflow:Running local_init_op.\n",
	"INFO:tensorflow:Done running local_init_op.\n",
	"INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 1024...\n",
	"INFO:tensorflow:Saving checkpoints for 1024 into /tmp/tmpr_2vnsk3/model.ckpt.\n",
	"INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 1024...\n",
	"INFO:tensorflow:loss = 0.6949047, step = 1024\n",
	"INFO:tensorflow:global_step/sec: 331.811\n",
	"INFO:tensorflow:loss = 0.69176507, step = 1124 (0.303 sec)\n",
	"INFO:tensorflow:global_step/sec: 472.349\n",
	"INFO:tensorflow:loss = 0.6969578, step = 1224 (0.211 sec)\n",
	"INFO:tensorflow:global_step/sec: 466.318\n",
	"INFO:tensorflow:loss = 0.6991083, step = 1324 (0.217 sec)\n",
	"INFO:tensorflow:global_step/sec: 441.172\n",
	"INFO:tensorflow:loss = 0.69564223, step = 1424 (0.225 sec)\n",
	"INFO:tensorflow:global_step/sec: 448.116\n",
	"INFO:tensorflow:loss = 0.6956173, step = 1524 (0.222 sec)\n",
	"INFO:tensorflow:global_step/sec: 468.844\n",
	"INFO:tensorflow:loss = 0.6948998, step = 1624 (0.215 sec)\n",
	"INFO:tensorflow:global_step/sec: 459.55\n",
	"INFO:tensorflow:loss = 0.69096094, step = 1724 (0.215 sec)\n",
	"INFO:tensorflow:global_step/sec: 464.959\n",
	"INFO:tensorflow:loss = 0.6917819, step = 1824 (0.215 sec)\n",
	"INFO:tensorflow:global_step/sec: 427.788\n",
	"INFO:tensorflow:loss = 0.6913247, step = 1924 (0.234 sec)\n",
	"INFO:tensorflow:global_step/sec: 457.438\n",
	"INFO:tensorflow:loss = 0.693395, step = 2024 (0.218 sec)\n",
	"INFO:tensorflow:global_step/sec: 447.911\n",
	"INFO:tensorflow:loss = 0.6980244, step = 2124 (0.226 sec)\n",
	"INFO:tensorflow:global_step/sec: 473.493\n",
	"INFO:tensorflow:loss = 0.69154155, step = 2224 (0.212 sec)\n",
	"INFO:tensorflow:global_step/sec: 454.247\n",
	"INFO:tensorflow:loss = 0.69146204, step = 2324 (0.217 sec)\n",
	"INFO:tensorflow:global_step/sec: 460.333\n",
	"INFO:tensorflow:loss = 0.68608826, step = 2424 (0.219 sec)\n",
	"INFO:tensorflow:global_step/sec: 459.86\n",
	"INFO:tensorflow:loss = 0.6934238, step = 2524 (0.218 sec)\n",
	"INFO:tensorflow:global_step/sec: 477.252\n",
	"INFO:tensorflow:loss = 0.69501543, step = 2624 (0.207 sec)\n",
	"INFO:tensorflow:global_step/sec: 459.195\n",
	"INFO:tensorflow:loss = 0.6989089, step = 2724 (0.218 sec)\n",
	"INFO:tensorflow:global_step/sec: 480.741\n",
	"INFO:tensorflow:loss = 0.6963976, step = 2824 (0.208 sec)\n",
	"INFO:tensorflow:global_step/sec: 453.999\n",
	"INFO:tensorflow:loss = 0.69740707, step = 2924 (0.221 sec)\n",
	"INFO:tensorflow:global_step/sec: 428.81\n",
	"INFO:tensorflow:loss = 0.699007, step = 3024 (0.231 sec)\n",
	"INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 3072...\n",
	"INFO:tensorflow:Saving checkpoints for 3072 into /tmp/tmpr_2vnsk3/model.ckpt.\n",
	"INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 3072...\n",
	"INFO:tensorflow:Loss for final step: 0.69158924.\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<tensorflow_estimator.python.estimator.canned.dnn.DNNEstimatorV2 at 0x7ff309dcd4e0>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 13
	}
	]
	}
	]
	}