OluwoleOyetoke/AlexNetDef.py

## AlexNetDef.py
#CREATE CNN STRUCTURE
"""----------------------------------------------------------------------------------------------------------------------------------------------------------------"""
def cnn_model_fn(features, labels, mode):

    """INPUT LAYER"""
    input_layer = tf.reshape(features["x"], [-1, FLAGS.image_width, FLAGS.image_height, FLAGS.image_channels], name="input_layer") #Alexnet uses 227x227x3 input layer. '-1' means pick batch size randomly
    #print(input_layer)

    """%FIRST CONVOLUTION BLOCK
        The first convolutional layer filters the 227×227×3 input image with
        96 kernels of size 11×11 with a stride of 4 pixels. Bias of 1."""
    conv1 = tf.layers.conv2d(inputs=input_layer, filters=96, kernel_size=[11, 11], strides=4, padding="valid", activation=tf.nn.relu)
    lrn1 = tf.nn.lrn(input=conv1, depth_radius=5, bias=1.0, alpha=0.0001/5.0, beta=0.75); #Normalization layer
    pool1_conv1 = tf.layers.max_pooling2d(inputs=lrn1, pool_size=[3, 3], strides=2) #Max Pool Layer
    #print(pool1_conv1)


    """SECOND CONVOLUTION BLOCK
    Divide the 96 channel blob input from block one into 48 and process independently"""
    conv2 = tf.layers.conv2d(inputs=pool1_conv1, filters=256, kernel_size=[5, 5], strides=1, padding="same", activation=tf.nn.relu)
    lrn2 = tf.nn.lrn(input=conv2, depth_radius=5, bias=1.0, alpha=0.0001/5.0, beta=0.75); #Normalization layer
    pool2_conv2 = tf.layers.max_pooling2d(inputs=lrn2, pool_size=[3, 3], strides=2) #Max Pool Layer
    #print(pool2_conv2)

    """THIRD CONVOLUTION BLOCK
    Note that the third, fourth, and fifth convolution layers are connected to one
    another without any intervening pooling or normalization layers.
    The third convolutional layer has 384 kernels of size 3 × 3
    connected to the (normalized, pooled) outputs of the second convolutional layer"""
    conv3 = tf.layers.conv2d(inputs=pool2_conv2, filters=384, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu)
    #print(conv3)

    #FOURTH CONVOLUTION BLOCK
    """%The fourth convolutional layer has 384 kernels of size 3 × 3"""
    conv4 = tf.layers.conv2d(inputs=conv3, filters=384, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu)
    #print(conv4)

    #FIFTH CONVOLUTION BLOCK
    """%the fifth convolutional layer has 256 kernels of size 3 × 3"""
    conv5 = tf.layers.conv2d(inputs=conv4, filters=256, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu)
    pool3_conv5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2, padding="valid") #Max Pool Layer
    #print(pool3_conv5)


    #FULLY CONNECTED LAYER 1
    """The fully-connected layers have 4096 neurons each"""
    pool3_conv5_flat = tf.reshape(pool3_conv5, [-1, 6* 6 * 256]) #output of conv block is 6x6x256 therefore, to connect it to a fully connected layer, we can flaten it out
    fc1 = tf.layers.dense(inputs=pool3_conv5_flat, units=4096, activation=tf.nn.relu)
    #fc1 = tf.layers.conv2d(inputs=pool3_conv5, filters=4096, kernel_size=[6, 6], strides=1, padding="valid", activation=tf.nn.relu) #representing the FCL using a convolution block (no need to do 'pool3_conv5_flat' above)
    #print(fc1)

    #FULLY CONNECTED LAYER 2
    """since the output from above is [1x1x4096]"""
    fc2 = tf.layers.dense(inputs=fc1, units=4096, activation=tf.nn.relu)
    #fc2 = tf.layers.conv2d(inputs=fc1, filters=4096, kernel_size=[1, 1], strides=1, padding="valid", activation=tf.nn.relu)
    #print(fc2)

    #FULLY CONNECTED LAYER 3
    """since the output from above is [1x1x4096]"""
    logits = tf.layers.dense(inputs=fc2, units=FLAGS.num_of_classes, name="logits_layer")
    #fc3 = tf.layers.conv2d(inputs=fc2, filters=43, kernel_size=[1, 1], strides=1, padding="valid")
    #logits = tf.layers.dense(inputs=fc3, units=FLAGS.num_of_classes) #converting the convolutional block (tf.layers.conv2d) to a dense layer (tf.layers.dense). Only needed if we had used tf.layers.conv2d to represent the FCLs
    #print(logits)

    #PASS OUTPUT OF LAST FC LAYER TO A SOFTMAX LAYER
    """convert these raw values into two different formats that our model function can return:
    The predicted class for each example: a digit from 1–43.
    The probabilities for each possible target class for each example
    tf.argmax(input=fc3, axis=1: Generate predictions from the 43 last filters returned from the fc3. Axis 1 will apply argmax to the rows
    tf.nn.softmax(logits, name="softmax_tensor"): Generate the probability distribution
    """
    predictions = {
      "classes": tf.argmax(input=logits, axis=1, name="classes_tensor"),
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
      }

    #Return result if we were in prediction mode and not training
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    #CALCULATE OUR LOSS
    """For both training and evaluation, we need to define a loss function that measures how closely the
    model's predictions match the target classes. For multiclass classification, cross entropy is typically used as the loss metric."""
    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=FLAGS.num_of_classes)
    loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
    tf.summary.scalar('Loss Per Stride', loss) #Just to see loss values per epoch (testing tensor board)

    #CONFIGURE TRAINING
    """Since the loss of the CNN is the softmax cross-entropy of the fc3 layer
    and our labels. Let's configure our model to optimize this loss value during
    training. We'll use a learning rate of 0.001 and stochastic gradient descent
    as the optimization algorithm:"""
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
        train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) #global_Step needed for proper graph on tensor board
        #optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.00005) #Very small learning rate used. Training will be slower at converging by better
        #train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    #ADD EVALUATION METRICS
    eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
"""-----------------------------------------------------------------------------------------------------------------------------------------------------------------"""
	#CREATE CNN STRUCTURE
	"""----------------------------------------------------------------------------------------------------------------------------------------------------------------"""
	def cnn_model_fn(features, labels, mode):

	"""INPUT LAYER"""
	input_layer = tf.reshape(features["x"], [-1, FLAGS.image_width, FLAGS.image_height, FLAGS.image_channels], name="input_layer") #Alexnet uses 227x227x3 input layer. '-1' means pick batch size randomly
	#print(input_layer)

	"""%FIRST CONVOLUTION BLOCK
	The first convolutional layer filters the 227×227×3 input image with
	96 kernels of size 11×11 with a stride of 4 pixels. Bias of 1."""
	conv1 = tf.layers.conv2d(inputs=input_layer, filters=96, kernel_size=[11, 11], strides=4, padding="valid", activation=tf.nn.relu)
	lrn1 = tf.nn.lrn(input=conv1, depth_radius=5, bias=1.0, alpha=0.0001/5.0, beta=0.75); #Normalization layer
	pool1_conv1 = tf.layers.max_pooling2d(inputs=lrn1, pool_size=[3, 3], strides=2) #Max Pool Layer
	#print(pool1_conv1)


	"""SECOND CONVOLUTION BLOCK
	Divide the 96 channel blob input from block one into 48 and process independently"""
	conv2 = tf.layers.conv2d(inputs=pool1_conv1, filters=256, kernel_size=[5, 5], strides=1, padding="same", activation=tf.nn.relu)
	lrn2 = tf.nn.lrn(input=conv2, depth_radius=5, bias=1.0, alpha=0.0001/5.0, beta=0.75); #Normalization layer
	pool2_conv2 = tf.layers.max_pooling2d(inputs=lrn2, pool_size=[3, 3], strides=2) #Max Pool Layer
	#print(pool2_conv2)

	"""THIRD CONVOLUTION BLOCK
	Note that the third, fourth, and fifth convolution layers are connected to one
	another without any intervening pooling or normalization layers.
	The third convolutional layer has 384 kernels of size 3 × 3
	connected to the (normalized, pooled) outputs of the second convolutional layer"""
	conv3 = tf.layers.conv2d(inputs=pool2_conv2, filters=384, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu)
	#print(conv3)

	#FOURTH CONVOLUTION BLOCK
	"""%The fourth convolutional layer has 384 kernels of size 3 × 3"""
	conv4 = tf.layers.conv2d(inputs=conv3, filters=384, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu)
	#print(conv4)

	#FIFTH CONVOLUTION BLOCK
	"""%the fifth convolutional layer has 256 kernels of size 3 × 3"""
	conv5 = tf.layers.conv2d(inputs=conv4, filters=256, kernel_size=[3, 3], strides=1, padding="same", activation=tf.nn.relu)
	pool3_conv5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2, padding="valid") #Max Pool Layer
	#print(pool3_conv5)


	#FULLY CONNECTED LAYER 1
	"""The fully-connected layers have 4096 neurons each"""
	pool3_conv5_flat = tf.reshape(pool3_conv5, [-1, 6* 6 * 256]) #output of conv block is 6x6x256 therefore, to connect it to a fully connected layer, we can flaten it out
	fc1 = tf.layers.dense(inputs=pool3_conv5_flat, units=4096, activation=tf.nn.relu)
	#fc1 = tf.layers.conv2d(inputs=pool3_conv5, filters=4096, kernel_size=[6, 6], strides=1, padding="valid", activation=tf.nn.relu) #representing the FCL using a convolution block (no need to do 'pool3_conv5_flat' above)
	#print(fc1)

	#FULLY CONNECTED LAYER 2
	"""since the output from above is [1x1x4096]"""
	fc2 = tf.layers.dense(inputs=fc1, units=4096, activation=tf.nn.relu)
	#fc2 = tf.layers.conv2d(inputs=fc1, filters=4096, kernel_size=[1, 1], strides=1, padding="valid", activation=tf.nn.relu)
	#print(fc2)

	#FULLY CONNECTED LAYER 3
	"""since the output from above is [1x1x4096]"""
	logits = tf.layers.dense(inputs=fc2, units=FLAGS.num_of_classes, name="logits_layer")
	#fc3 = tf.layers.conv2d(inputs=fc2, filters=43, kernel_size=[1, 1], strides=1, padding="valid")
	#logits = tf.layers.dense(inputs=fc3, units=FLAGS.num_of_classes) #converting the convolutional block (tf.layers.conv2d) to a dense layer (tf.layers.dense). Only needed if we had used tf.layers.conv2d to represent the FCLs
	#print(logits)

	#PASS OUTPUT OF LAST FC LAYER TO A SOFTMAX LAYER
	"""convert these raw values into two different formats that our model function can return:
	The predicted class for each example: a digit from 1–43.
	The probabilities for each possible target class for each example
	tf.argmax(input=fc3, axis=1: Generate predictions from the 43 last filters returned from the fc3. Axis 1 will apply argmax to the rows
	tf.nn.softmax(logits, name="softmax_tensor"): Generate the probability distribution
	"""
	predictions = {
	"classes": tf.argmax(input=logits, axis=1, name="classes_tensor"),
	"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
	}

	#Return result if we were in prediction mode and not training
	if mode == tf.estimator.ModeKeys.PREDICT:
	return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

	#CALCULATE OUR LOSS
	"""For both training and evaluation, we need to define a loss function that measures how closely the
	model's predictions match the target classes. For multiclass classification, cross entropy is typically used as the loss metric."""
	onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=FLAGS.num_of_classes)
	loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
	tf.summary.scalar('Loss Per Stride', loss) #Just to see loss values per epoch (testing tensor board)

	#CONFIGURE TRAINING
	"""Since the loss of the CNN is the softmax cross-entropy of the fc3 layer
	and our labels. Let's configure our model to optimize this loss value during
	training. We'll use a learning rate of 0.001 and stochastic gradient descent
	as the optimization algorithm:"""
	if mode == tf.estimator.ModeKeys.TRAIN:
	optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
	train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) #global_Step needed for proper graph on tensor board
	#optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.00005) #Very small learning rate used. Training will be slower at converging by better
	#train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())
	return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

	#ADD EVALUATION METRICS
	eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
	return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
	"""-----------------------------------------------------------------------------------------------------------------------------------------------------------------"""