byronyi/standalone_client.ipynb Secret

## standalone_client.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting job # 0 in a separate thread.\n"
     ]
    }
   ],
   "source": [
    "%%python --bg\n",
    "\n",
    "import os\n",
    "import json\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "os.environ['TF_CONFIG'] = json.dumps({\n",
    "    'cluster': {\n",
    "        'worker': ['localhost:5000', 'localhost:5001']\n",
    "    },\n",
    "    'task': {'type': 'worker', 'index': 0}\n",
    "})\n",
    "\n",
    "tf.contrib.distribute.run_standard_tensorflow_server().join()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting job # 2 in a separate thread.\n"
     ]
    }
   ],
   "source": [
    "%%python --bg\n",
    "\n",
    "import os\n",
    "import json\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "os.environ['TF_CONFIG'] = json.dumps({\n",
    "    'cluster': {\n",
    "        'worker': ['localhost:5000', 'localhost:5001']\n",
    "    },\n",
    "    'task': {'type': 'worker', 'index': 1}\n",
    "})\n",
    "\n",
    "tf.contrib.distribute.run_standard_tensorflow_server().join()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:Not all devices in `tf.distribute.Strategy` are visible to TensorFlow.\n",
      "INFO:tensorflow:CollectiveAllReduceStrategy with local_devices = ('/device:CPU:0',)\n",
      "INFO:tensorflow:Initializing RunConfig with distribution strategies.\n",
      "INFO:tensorflow:RunConfig initialized for Distribute Coordinator with STANDALONE_CLIENT mode\n",
      "WARNING:tensorflow:Using temporary folder as model directory: /var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq\n",
      "INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': {'worker': ['localhost:5000', 'localhost:5001']}, '_model_dir': '/var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true\n",
      "graph_options {\n",
      "  rewrite_options {\n",
      "    meta_optimizer_iterations: ONE\n",
      "  }\n",
      "}\n",
      ", '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, eval_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, remote_cluster={'worker': ['localhost:5000', 'localhost:5001']}), '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': '', '_eval_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, '_train_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, '_master': '', '_distribute_coordinator_mode': 'standalone_client'}\n",
      "INFO:tensorflow:Running `train_and_evaluate` with Distribute Coordinator.\n",
      "INFO:tensorflow:Running Distribute Coordinator with mode = 'standalone_client', cluster_spec = {'worker': ['localhost:5000', 'localhost:5001']}, task_type = None, task_id = None, environment = None, rpc_layer = 'grpc'\n",
      "INFO:tensorflow:Device is available but not used by distribute strategy: /device:CPU:0\n",
      "INFO:tensorflow:Device is available but not used by distribute strategy: /device:CPU:0\n",
      "WARNING:tensorflow:Not all devices in `tf.distribute.Strategy` are visible to TensorFlow.\n",
      "WARNING:tensorflow:Not all devices in `tf.distribute.Strategy` are visible to TensorFlow.\n",
      "INFO:tensorflow:Multi-worker CollectiveAllReduceStrategy with cluster_spec = {'worker': ['localhost:5000', 'localhost:5001']}, task_type = 'worker', task_id = 0, num_workers = 2, local_devices = ('/job:worker/task:0',)\n",
      "INFO:tensorflow:Multi-worker CollectiveAllReduceStrategy with cluster_spec = {'worker': ['localhost:5000', 'localhost:5001']}, task_type = 'worker', task_id = 1, num_workers = 2, local_devices = ('/job:worker/task:1',)\n",
      "INFO:tensorflow:Updated config: {'_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
      "graph_options {\n",
      "  rewrite_options {\n",
      "    meta_optimizer_iterations: ONE\n",
      "  }\n",
      "}\n",
      ", '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d21590>, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x123d2c710>, '_model_dir': '/var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2c2d0>, eval_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2c2d0>, remote_cluster={'worker': ['localhost:5000', 'localhost:5001']}), '_num_worker_replicas': 2, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': 'grpc://localhost:5000', '_eval_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2c2d0>, '_global_id_in_cluster': 0, '_master': 'grpc://localhost:5000', '_distribute_coordinator_mode': 'standalone_client'}\n",
      "INFO:tensorflow:Updated config: {'_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
      "graph_options {\n",
      "  rewrite_options {\n",
      "    meta_optimizer_iterations: ONE\n",
      "  }\n",
      "}\n",
      ", '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d21c50>, '_is_chief': False, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x123d2cb90>, '_model_dir': '/var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2cb50>, eval_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2cb50>, remote_cluster={'worker': ['localhost:5000', 'localhost:5001']}), '_num_worker_replicas': 2, '_task_id': 1, '_log_step_count_steps': 100, '_evaluation_master': 'grpc://localhost:5001', '_eval_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2cb50>, '_global_id_in_cluster': 1, '_master': 'grpc://localhost:5001', '_distribute_coordinator_mode': 'standalone_client'}\n",
      "WARNING:tensorflow:From /usr/local/lib/python2.7/site-packages/tensorflow/python/data/ops/dataset_ops.py:1763: make_initializable_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.\n",
      "WARNING:tensorflow:From /usr/local/lib/python2.7/site-packages/tensorflow/python/data/ops/dataset_ops.py:1458: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "WARNING:tensorflow:From /usr/local/lib/python2.7/site-packages/tensorflow/python/ops/init_ops.py:1253: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Call initializer instance with the dtype argument instead of passing it to the constructor\n",
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Collective All-reduce invoked with batches size = 2, num_workers = 2\n",
      "INFO:tensorflow:Collective All-reduce invoked with batches size = 2, num_workers = 2\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Creating chief session creator with config: device_filters: \"/job:worker/task:0\"\n",
      "allow_soft_placement: true\n",
      "graph_options {\n",
      "  rewrite_options {\n",
      "    meta_optimizer_iterations: ONE\n",
      "    scoped_allocator_optimization: ON\n",
      "    scoped_allocator_opts {\n",
      "      enable_op: \"CollectiveReduce\"\n",
      "    }\n",
      "  }\n",
      "}\n",
      "experimental {\n",
      "  collective_group_leader: \"/job:worker/replica:0/task:0\"\n",
      "}\n",
      "\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Creating chief session creator with config: device_filters: \"/job:worker/task:1\"\n",
      "allow_soft_placement: true\n",
      "graph_options {\n",
      "  rewrite_options {\n",
      "    meta_optimizer_iterations: ONE\n",
      "    scoped_allocator_optimization: ON\n",
      "    scoped_allocator_opts {\n",
      "      enable_op: \"CollectiveReduce\"\n",
      "    }\n",
      "  }\n",
      "}\n",
      "experimental {\n",
      "  collective_group_leader: \"/job:worker/replica:0/task:0\"\n",
      "}\n",
      "\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 0 into /var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq/model.ckpt.\n",
      "INFO:tensorflow:loss = 1.3263894e-05, step = 0\n",
      "INFO:tensorflow:loss = 1.3263894e-05, step = 0\n",
      "WARNING:tensorflow:It seems that global step (tf.train.get_global_step) has not been increased. Current value (could be stable): 0 vs previous value: 0. You could increase the global step by passing tf.train.get_global_step() to Optimizer.apply_gradients or Optimizer.minimize.\n",
      "INFO:tensorflow:Loss for final step: 0.0.\n",
      "INFO:tensorflow:Loss for final step: 0.0.\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.data import Dataset as tfd\n",
    "from tensorflow.contrib.distribute import CollectiveAllReduceStrategy\n",
    "from tensorflow.contrib.distribute import DistributeConfig\n",
    "\n",
    "\n",
    "def input_fn():\n",
    "    features = tfd.from_tensors([[1.]]).repeat(32)\n",
    "    labels = tfd.from_tensors(1).repeat(32)\n",
    "    return tfd.zip((features, labels))\n",
    "\n",
    "def model_fn(features, labels, mode):\n",
    "    layer = tf.layers.Dense(1)\n",
    "    logits = layer(features)\n",
    "\n",
    "    if mode == tf.estimator.ModeKeys.PREDICT:\n",
    "        predictions = {\"logits\": logits}\n",
    "        return tf.estimator.EstimatorSpec(mode, predictions=predictions)\n",
    "    \n",
    "    loss = tf.losses.mean_squared_error(\n",
    "        labels=labels, predictions=tf.reshape(logits, []))\n",
    "\n",
    "    if mode == tf.estimator.ModeKeys.EVAL:\n",
    "        return tf.estimator.EstimatorSpec(mode, loss=loss)\n",
    "\n",
    "    if mode == tf.estimator.ModeKeys.TRAIN:\n",
    "        train_op = tf.train.GradientDescentOptimizer(0.2).minimize(loss)\n",
    "        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)\n",
    "\n",
    "train_spec = tf.estimator.TrainSpec(input_fn=input_fn)\n",
    "eval_spec = tf.estimator.EvalSpec(input_fn=input_fn)\n",
    "\n",
    "distribution = CollectiveAllReduceStrategy(num_gpus_per_worker=0)\n",
    "\n",
    "config = tf.estimator.RunConfig(\n",
    "    experimental_distribute=DistributeConfig(\n",
    "        train_distribute=distribution,\n",
    "        eval_distribute=distribution,\n",
    "        remote_cluster={\n",
    "            'worker': ['localhost:5000', 'localhost:5001'],\n",
    "        },\n",
    "    )\n",
    ")\n",
    "\n",
    "estimator = tf.estimator.Estimator(model_fn=model_fn, config=config)\n",
    "tf.estimator.train_and_evaluate(estimator=estimator,\n",
    "                                train_spec=train_spec,\n",
    "                                eval_spec=eval_spec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All background processes were killed.\n"
     ]
    }
   ],
   "source": [
    "%killbgscripts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Starting job # 0 in a separate thread.\n"
	]
	}
	],
	"source": [
	"%%python --bg\n",
	"\n",
	"import os\n",
	"import json\n",
	"\n",
	"import tensorflow as tf\n",
	"\n",
	"os.environ['TF_CONFIG'] = json.dumps({\n",
	" 'cluster': {\n",
	" 'worker': ['localhost:5000', 'localhost:5001']\n",
	" },\n",
	" 'task': {'type': 'worker', 'index': 0}\n",
	"})\n",
	"\n",
	"tf.contrib.distribute.run_standard_tensorflow_server().join()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Starting job # 2 in a separate thread.\n"
	]
	}
	],
	"source": [
	"%%python --bg\n",
	"\n",
	"import os\n",
	"import json\n",
	"\n",
	"import tensorflow as tf\n",
	"\n",
	"os.environ['TF_CONFIG'] = json.dumps({\n",
	" 'cluster': {\n",
	" 'worker': ['localhost:5000', 'localhost:5001']\n",
	" },\n",
	" 'task': {'type': 'worker', 'index': 1}\n",
	"})\n",
	"\n",
	"tf.contrib.distribute.run_standard_tensorflow_server().join()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"WARNING:tensorflow:Not all devices in `tf.distribute.Strategy` are visible to TensorFlow.\n",
	"INFO:tensorflow:CollectiveAllReduceStrategy with local_devices = ('/device:CPU:0',)\n",
	"INFO:tensorflow:Initializing RunConfig with distribution strategies.\n",
	"INFO:tensorflow:RunConfig initialized for Distribute Coordinator with STANDALONE_CLIENT mode\n",
	"WARNING:tensorflow:Using temporary folder as model directory: /var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq\n",
	"INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': {'worker': ['localhost:5000', 'localhost:5001']}, '_model_dir': '/var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true\n",
	"graph_options {\n",
	" rewrite_options {\n",
	" meta_optimizer_iterations: ONE\n",
	" }\n",
	"}\n",
	", '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, eval_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, remote_cluster={'worker': ['localhost:5000', 'localhost:5001']}), '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': '', '_eval_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, '_train_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x12293ebd0>, '_master': '', '_distribute_coordinator_mode': 'standalone_client'}\n",
	"INFO:tensorflow:Running `train_and_evaluate` with Distribute Coordinator.\n",
	"INFO:tensorflow:Running Distribute Coordinator with mode = 'standalone_client', cluster_spec = {'worker': ['localhost:5000', 'localhost:5001']}, task_type = None, task_id = None, environment = None, rpc_layer = 'grpc'\n",
	"INFO:tensorflow:Device is available but not used by distribute strategy: /device:CPU:0\n",
	"INFO:tensorflow:Device is available but not used by distribute strategy: /device:CPU:0\n",
	"WARNING:tensorflow:Not all devices in `tf.distribute.Strategy` are visible to TensorFlow.\n",
	"WARNING:tensorflow:Not all devices in `tf.distribute.Strategy` are visible to TensorFlow.\n",
	"INFO:tensorflow:Multi-worker CollectiveAllReduceStrategy with cluster_spec = {'worker': ['localhost:5000', 'localhost:5001']}, task_type = 'worker', task_id = 0, num_workers = 2, local_devices = ('/job:worker/task:0',)\n",
	"INFO:tensorflow:Multi-worker CollectiveAllReduceStrategy with cluster_spec = {'worker': ['localhost:5000', 'localhost:5001']}, task_type = 'worker', task_id = 1, num_workers = 2, local_devices = ('/job:worker/task:1',)\n",
	"INFO:tensorflow:Updated config: {'_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
	"graph_options {\n",
	" rewrite_options {\n",
	" meta_optimizer_iterations: ONE\n",
	" }\n",
	"}\n",
	", '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d21590>, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x123d2c710>, '_model_dir': '/var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2c2d0>, eval_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2c2d0>, remote_cluster={'worker': ['localhost:5000', 'localhost:5001']}), '_num_worker_replicas': 2, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': 'grpc://localhost:5000', '_eval_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2c2d0>, '_global_id_in_cluster': 0, '_master': 'grpc://localhost:5000', '_distribute_coordinator_mode': 'standalone_client'}\n",
	"INFO:tensorflow:Updated config: {'_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
	"graph_options {\n",
	" rewrite_options {\n",
	" meta_optimizer_iterations: ONE\n",
	" }\n",
	"}\n",
	", '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d21c50>, '_is_chief': False, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x123d2cb90>, '_model_dir': '/var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2cb50>, eval_distribute=<tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2cb50>, remote_cluster={'worker': ['localhost:5000', 'localhost:5001']}), '_num_worker_replicas': 2, '_task_id': 1, '_log_step_count_steps': 100, '_evaluation_master': 'grpc://localhost:5001', '_eval_distribute': <tensorflow.contrib.distribute.python.collective_all_reduce_strategy.CollectiveAllReduceStrategy object at 0x123d2cb50>, '_global_id_in_cluster': 1, '_master': 'grpc://localhost:5001', '_distribute_coordinator_mode': 'standalone_client'}\n",
	"WARNING:tensorflow:From /usr/local/lib/python2.7/site-packages/tensorflow/python/data/ops/dataset_ops.py:1763: make_initializable_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
	"Instructions for updating:\n",
	"Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.\n",
	"WARNING:tensorflow:From /usr/local/lib/python2.7/site-packages/tensorflow/python/data/ops/dataset_ops.py:1458: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
	"Instructions for updating:\n",
	"Colocations handled automatically by placer.\n",
	"INFO:tensorflow:Calling model_fn.\n",
	"WARNING:tensorflow:From /usr/local/lib/python2.7/site-packages/tensorflow/python/ops/init_ops.py:1253: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
	"Instructions for updating:\n",
	"Call initializer instance with the dtype argument instead of passing it to the constructor\n",
	"INFO:tensorflow:Calling model_fn.\n",
	"INFO:tensorflow:Collective All-reduce invoked with batches size = 2, num_workers = 2\n",
	"INFO:tensorflow:Collective All-reduce invoked with batches size = 2, num_workers = 2\n",
	"INFO:tensorflow:Done calling model_fn.\n",
	"INFO:tensorflow:Done calling model_fn.\n",
	"INFO:tensorflow:Create CheckpointSaverHook.\n",
	"INFO:tensorflow:Creating chief session creator with config: device_filters: \"/job:worker/task:0\"\n",
	"allow_soft_placement: true\n",
	"graph_options {\n",
	" rewrite_options {\n",
	" meta_optimizer_iterations: ONE\n",
	" scoped_allocator_optimization: ON\n",
	" scoped_allocator_opts {\n",
	" enable_op: \"CollectiveReduce\"\n",
	" }\n",
	" }\n",
	"}\n",
	"experimental {\n",
	" collective_group_leader: \"/job:worker/replica:0/task:0\"\n",
	"}\n",
	"\n",
	"INFO:tensorflow:Graph was finalized.\n",
	"INFO:tensorflow:Create CheckpointSaverHook.\n",
	"INFO:tensorflow:Creating chief session creator with config: device_filters: \"/job:worker/task:1\"\n",
	"allow_soft_placement: true\n",
	"graph_options {\n",
	" rewrite_options {\n",
	" meta_optimizer_iterations: ONE\n",
	" scoped_allocator_optimization: ON\n",
	" scoped_allocator_opts {\n",
	" enable_op: \"CollectiveReduce\"\n",
	" }\n",
	" }\n",
	"}\n",
	"experimental {\n",
	" collective_group_leader: \"/job:worker/replica:0/task:0\"\n",
	"}\n",
	"\n",
	"INFO:tensorflow:Graph was finalized.\n",
	"INFO:tensorflow:Running local_init_op.\n",
	"INFO:tensorflow:Running local_init_op.\n",
	"INFO:tensorflow:Done running local_init_op.\n",
	"INFO:tensorflow:Done running local_init_op.\n",
	"INFO:tensorflow:Saving checkpoints for 0 into /var/folders/gn/sjntndrs1fs22kfr302697mr0000gn/T/tmpMkv3Vq/model.ckpt.\n",
	"INFO:tensorflow:loss = 1.3263894e-05, step = 0\n",
	"INFO:tensorflow:loss = 1.3263894e-05, step = 0\n",
	"WARNING:tensorflow:It seems that global step (tf.train.get_global_step) has not been increased. Current value (could be stable): 0 vs previous value: 0. You could increase the global step by passing tf.train.get_global_step() to Optimizer.apply_gradients or Optimizer.minimize.\n",
	"INFO:tensorflow:Loss for final step: 0.0.\n",
	"INFO:tensorflow:Loss for final step: 0.0.\n"
	]
	}
	],
	"source": [
	"import tensorflow as tf\n",
	"from tensorflow.data import Dataset as tfd\n",
	"from tensorflow.contrib.distribute import CollectiveAllReduceStrategy\n",
	"from tensorflow.contrib.distribute import DistributeConfig\n",
	"\n",
	"\n",
	"def input_fn():\n",
	" features = tfd.from_tensors([[1.]]).repeat(32)\n",
	" labels = tfd.from_tensors(1).repeat(32)\n",
	" return tfd.zip((features, labels))\n",
	"\n",
	"def model_fn(features, labels, mode):\n",
	" layer = tf.layers.Dense(1)\n",
	" logits = layer(features)\n",
	"\n",
	" if mode == tf.estimator.ModeKeys.PREDICT:\n",
	" predictions = {\"logits\": logits}\n",
	" return tf.estimator.EstimatorSpec(mode, predictions=predictions)\n",
	" \n",
	" loss = tf.losses.mean_squared_error(\n",
	" labels=labels, predictions=tf.reshape(logits, []))\n",
	"\n",
	" if mode == tf.estimator.ModeKeys.EVAL:\n",
	" return tf.estimator.EstimatorSpec(mode, loss=loss)\n",
	"\n",
	" if mode == tf.estimator.ModeKeys.TRAIN:\n",
	" train_op = tf.train.GradientDescentOptimizer(0.2).minimize(loss)\n",
	" return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)\n",
	"\n",
	"train_spec = tf.estimator.TrainSpec(input_fn=input_fn)\n",
	"eval_spec = tf.estimator.EvalSpec(input_fn=input_fn)\n",
	"\n",
	"distribution = CollectiveAllReduceStrategy(num_gpus_per_worker=0)\n",
	"\n",
	"config = tf.estimator.RunConfig(\n",
	" experimental_distribute=DistributeConfig(\n",
	" train_distribute=distribution,\n",
	" eval_distribute=distribution,\n",
	" remote_cluster={\n",
	" 'worker': ['localhost:5000', 'localhost:5001'],\n",
	" },\n",
	" )\n",
	")\n",
	"\n",
	"estimator = tf.estimator.Estimator(model_fn=model_fn, config=config)\n",
	"tf.estimator.train_and_evaluate(estimator=estimator,\n",
	" train_spec=train_spec,\n",
	" eval_spec=eval_spec)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"All background processes were killed.\n"
	]
	}
	],
	"source": [
	"%killbgscripts"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"---"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.15"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}