shang-vikas/feeding_data_totf_part2.ipynb

## feeding_data_totf_part2.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <font color=brown>Simple Graph Execution</font>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "## import necessary stuff\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import os,sys\n",
    "import time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Building simple DNN Model and feeding the numpy/pandas data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Simple MNIST Model of Dense layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Using default config.\n",
      "WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpvcgi43dc\n",
      "INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpvcgi43dc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f9ec826ce10>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
     ]
    }
   ],
   "source": [
    "## defining the type of features columns to be used on model.\n",
    "feature_column = [tf.feature_column.numeric_column(key='image',shape=(784,))]\n",
    "\n",
    "##defining the model\n",
    "model = tf.estimator.DNNClassifier([100,100],n_classes=10,feature_columns=feature_column)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Same Mnist Model using TFRecords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "mnist_tfrecord_path = os.path.abspath('./mnist_train.tfrecords')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "'''Oh crap! I forgot the feature names of tfrecords!!\n",
    "A hack for this would be this line of code in bash \"head -n10 mnist_train.tfrecords\".\n",
    "Just look for the feature name here.'''\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def _parse_(serialized_example):\n",
    "    feature = {'image_raw':tf.FixedLenFeature([],tf.string),\n",
    "                'label':tf.FixedLenFeature([],tf.int64)}\n",
    "    example = tf.parse_single_example(serialized_example,feature)\n",
    "    image = tf.decode_raw(example['image_raw'],tf.int64) #remember to parse in int64. float will raise error\n",
    "    label = tf.cast(example['label'],tf.int32)\n",
    "    return (dict({'image':image}),label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def tfrecord_train_input_fn(batch_size=32):\n",
    "    tfrecord_dataset = tf.data.TFRecordDataset(mnist_tfrecord_path)\n",
    "    tfrecord_dataset = tfrecord_dataset.map(lambda x:_parse_(x)).shuffle(True) \\\n",
    "                            .batch(batch_size)\n",
    "    tfrecord_iterator = tfrecord_dataset.make_one_shot_iterator()\n",
    "    return tfrecord_iterator.get_next()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpjfa3etpe/model.ckpt.\n",
      "INFO:tensorflow:loss = 3407.3716, step = 0\n",
      "INFO:tensorflow:global_step/sec: 458.111\n",
      "INFO:tensorflow:loss = 49.886246, step = 100 (0.219 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 200 into /tmp/tmpjfa3etpe/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 35.36163.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7fcada6accf8>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.train(lambda:tfrecord_train_input_fn(32),steps=200)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### *What if i have the raw Images on disk*???"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "'''Here we need to use from_tensor_slices function of DATASET API again.'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# path = os.path.abspath('./digit-recognizer/train/')\n",
    "\n",
    "f = !ls ./digit-recognizer/train/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    " def _ondisk_parse_(filename):\n",
    "    filename = tf.cast([filename],tf.string)\n",
    "    label = tf.string_split([tf.string_split(filename,'_').values[1]],'.').values[0]\n",
    "    label = tf.string_to_number([label],tf.int32)\n",
    "    path = os.path.abspath('./digit-recognizer/train//')\n",
    "    path = tf.cast([path],tf.string)\n",
    "    final_path = tf.string_join((path,tf.cast(['/'],tf.string),filename))\n",
    "    image_string = tf.read_file(final_path[0])\n",
    "    image = tf.image.decode_jpeg(image_string)\n",
    "    image = tf.cast(image,tf.int8)\n",
    "    image = tf.cast(image,tf.float32)\n",
    "    image_reshaped = tf.reshape(image,(784,))\n",
    "    return (dict({'image':image}),label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ondisk_train_input_fn(filenames,batch_size=32):\n",
    "    dataset  = tf.data.Dataset.from_tensor_slices(filenames)\n",
    "    dataset = dataset.map(lambda x:_ondisk_parse_(x)).shuffle(True).batch(batch_size)\n",
    "    ondisk_iterator = dataset.make_one_shot_iterator()\n",
    "    \n",
    "    return ondisk_iterator.get_next()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpvcgi43dc/model.ckpt.\n",
      "INFO:tensorflow:loss = 1007.99396, step = 0\n",
      "INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpvcgi43dc/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 3291.0413.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f9ec826ccf8>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.train(lambda:ondisk_train_input_fn(f,32),steps=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Calling model_fn.\n",
      "INFO:tensorflow:Done calling model_fn.\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Graph was finalized.\n",
      "INFO:tensorflow:Restoring parameters from /tmp/tmpvcgi43dc/model.ckpt-2\n",
      "INFO:tensorflow:Running local_init_op.\n",
      "INFO:tensorflow:Done running local_init_op.\n",
      "INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpvcgi43dc/model.ckpt.\n",
      "INFO:tensorflow:loss = 7812.8594, step = 2\n",
      "INFO:tensorflow:global_step/sec: 138.149\n",
      "INFO:tensorflow:loss = 76.17598, step = 102 (0.725 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 202 into /tmp/tmpvcgi43dc/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 74.08304.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f9ec826ccf8>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.train(lambda:ondisk_train_input_fn(f,32),steps=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## <font color=brown>Simple Graph Execution</font>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"## import necessary stuff\n",
	"import tensorflow as tf\n",
	"import numpy as np\n",
	"import os,sys\n",
	"import time"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Building simple DNN Model and feeding the numpy/pandas data"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### Simple MNIST Model of Dense layers"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Using default config.\n",
	"WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpvcgi43dc\n",
	"INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpvcgi43dc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f9ec826ce10>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
	]
	}
	],
	"source": [
	"## defining the type of features columns to be used on model.\n",
	"feature_column = [tf.feature_column.numeric_column(key='image',shape=(784,))]\n",
	"\n",
	"##defining the model\n",
	"model = tf.estimator.DNNClassifier([100,100],n_classes=10,feature_columns=feature_column)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"heading_collapsed": true
	},
	"source": [
	"### Same Mnist Model using TFRecords"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"hidden": true
	},
	"outputs": [],
	"source": [
	"mnist_tfrecord_path = os.path.abspath('./mnist_train.tfrecords')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"hidden": true
	},
	"source": [
	"'''Oh crap! I forgot the feature names of tfrecords!!\n",
	"A hack for this would be this line of code in bash \"head -n10 mnist_train.tfrecords\".\n",
	"Just look for the feature name here.'''\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"hidden": true
	},
	"outputs": [],
	"source": [
	"def _parse_(serialized_example):\n",
	" feature = {'image_raw':tf.FixedLenFeature([],tf.string),\n",
	" 'label':tf.FixedLenFeature([],tf.int64)}\n",
	" example = tf.parse_single_example(serialized_example,feature)\n",
	" image = tf.decode_raw(example['image_raw'],tf.int64) #remember to parse in int64. float will raise error\n",
	" label = tf.cast(example['label'],tf.int32)\n",
	" return (dict({'image':image}),label)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"hidden": true
	},
	"outputs": [],
	"source": [
	"def tfrecord_train_input_fn(batch_size=32):\n",
	" tfrecord_dataset = tf.data.TFRecordDataset(mnist_tfrecord_path)\n",
	" tfrecord_dataset = tfrecord_dataset.map(lambda x:_parse_(x)).shuffle(True) \\\n",
	" .batch(batch_size)\n",
	" tfrecord_iterator = tfrecord_dataset.make_one_shot_iterator()\n",
	" return tfrecord_iterator.get_next()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"hidden": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Calling model_fn.\n",
	"INFO:tensorflow:Done calling model_fn.\n",
	"INFO:tensorflow:Create CheckpointSaverHook.\n",
	"INFO:tensorflow:Graph was finalized.\n",
	"INFO:tensorflow:Running local_init_op.\n",
	"INFO:tensorflow:Done running local_init_op.\n",
	"INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpjfa3etpe/model.ckpt.\n",
	"INFO:tensorflow:loss = 3407.3716, step = 0\n",
	"INFO:tensorflow:global_step/sec: 458.111\n",
	"INFO:tensorflow:loss = 49.886246, step = 100 (0.219 sec)\n",
	"INFO:tensorflow:Saving checkpoints for 200 into /tmp/tmpjfa3etpe/model.ckpt.\n",
	"INFO:tensorflow:Loss for final step: 35.36163.\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7fcada6accf8>"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"model.train(lambda:tfrecord_train_input_fn(32),steps=200)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### What if i have the raw Images on disk???"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"'''Here we need to use from_tensor_slices function of DATASET API again.'''"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"# path = os.path.abspath('./digit-recognizer/train/')\n",
	"\n",
	"f = !ls ./digit-recognizer/train/"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	" def _ondisk_parse_(filename):\n",
	" filename = tf.cast([filename],tf.string)\n",
	" label = tf.string_split([tf.string_split(filename,'_').values[1]],'.').values[0]\n",
	" label = tf.string_to_number([label],tf.int32)\n",
	" path = os.path.abspath('./digit-recognizer/train//')\n",
	" path = tf.cast([path],tf.string)\n",
	" final_path = tf.string_join((path,tf.cast(['/'],tf.string),filename))\n",
	" image_string = tf.read_file(final_path[0])\n",
	" image = tf.image.decode_jpeg(image_string)\n",
	" image = tf.cast(image,tf.int8)\n",
	" image = tf.cast(image,tf.float32)\n",
	" image_reshaped = tf.reshape(image,(784,))\n",
	" return (dict({'image':image}),label)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"def ondisk_train_input_fn(filenames,batch_size=32):\n",
	" dataset = tf.data.Dataset.from_tensor_slices(filenames)\n",
	" dataset = dataset.map(lambda x:_ondisk_parse_(x)).shuffle(True).batch(batch_size)\n",
	" ondisk_iterator = dataset.make_one_shot_iterator()\n",
	" \n",
	" return ondisk_iterator.get_next()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Calling model_fn.\n",
	"INFO:tensorflow:Done calling model_fn.\n",
	"INFO:tensorflow:Create CheckpointSaverHook.\n",
	"INFO:tensorflow:Graph was finalized.\n",
	"INFO:tensorflow:Running local_init_op.\n",
	"INFO:tensorflow:Done running local_init_op.\n",
	"INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpvcgi43dc/model.ckpt.\n",
	"INFO:tensorflow:loss = 1007.99396, step = 0\n",
	"INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpvcgi43dc/model.ckpt.\n",
	"INFO:tensorflow:Loss for final step: 3291.0413.\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f9ec826ccf8>"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"model.train(lambda:ondisk_train_input_fn(f,32),steps=2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Calling model_fn.\n",
	"INFO:tensorflow:Done calling model_fn.\n",
	"INFO:tensorflow:Create CheckpointSaverHook.\n",
	"INFO:tensorflow:Graph was finalized.\n",
	"INFO:tensorflow:Restoring parameters from /tmp/tmpvcgi43dc/model.ckpt-2\n",
	"INFO:tensorflow:Running local_init_op.\n",
	"INFO:tensorflow:Done running local_init_op.\n",
	"INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpvcgi43dc/model.ckpt.\n",
	"INFO:tensorflow:loss = 7812.8594, step = 2\n",
	"INFO:tensorflow:global_step/sec: 138.149\n",
	"INFO:tensorflow:loss = 76.17598, step = 102 (0.725 sec)\n",
	"INFO:tensorflow:Saving checkpoints for 202 into /tmp/tmpvcgi43dc/model.ckpt.\n",
	"INFO:tensorflow:Loss for final step: 74.08304.\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f9ec826ccf8>"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"model.train(lambda:ondisk_train_input_fn(f,32),steps=200)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}