Alescontrela/conv.py

## conv.py
def conv(image, label, params, conv_s, pool_f, pool_s):

    [f1, f2, w3, w4, b1, b2, b3, b4] = params

    ################################################
    ############## Forward Operation ###############
    ################################################
    conv1 = convolution(image, f1, b1, conv_s) # convolution operation
    conv1[conv1<=0] = 0 # pass through ReLU non-linearity

    conv2 = convolution(conv1, f2, b2, conv_s) # second convolution operation
    conv2[conv2<=0] = 0 # pass through ReLU non-linearity

    pooled = maxpool(conv2, pool_f, pool_s) # maxpooling operation

    (nf2, dim2, _) = pooled.shape
    fc = pooled.reshape((nf2 * dim2 * dim2, 1)) # flatten pooled layer

    z = w3.dot(fc) + b3 # first dense layer
    z[z<=0] = 0 # pass through ReLU non-linearity

    out = w4.dot(z) + b4 # second dense layer

    probs = softmax(out) # predict class probabilities with the softmax activation function

    ################################################
    #################### Loss ######################
    ################################################

    loss = categoricalCrossEntropy(probs, label) # categorical cross-entropy loss

    ################################################
    ############# Backward Operation ###############
    ################################################
    dout = probs - label # derivative of loss w.r.t. final dense layer output
    dw4 = dout.dot(z.T) # loss gradient of final dense layer weights
    db4 = np.sum(dout, axis = 1).reshape(b4.shape) # loss gradient of final dense layer biases

    dz = w4.T.dot(dout) # loss gradient of first dense layer outputs
    dz[z<=0] = 0 # backpropagate through ReLU
    dw3 = dz.dot(fc.T)
    db3 = np.sum(dz, axis = 1).reshape(b3.shape)

    dfc = w3.T.dot(dz) # loss gradients of fully-connected layer (pooling layer)
    dpool = dfc.reshape(pooled.shape) # reshape fully connected into dimensions of pooling layer

    dconv2 = maxpoolBackward(dpool, conv2, pool_f, pool_s) # backprop through the max-pooling layer(only neurons with highest activation in window get updated)
    dconv2[conv2<=0] = 0 # backpropagate through ReLU

    dconv1, df2, db2 = convolutionBackward(dconv2, conv1, f2, conv_s) # backpropagate previous gradient through second convolutional layer.
    dconv1[conv1<=0] = 0 # backpropagate through ReLU

    dimage, df1, db1 = convolutionBackward(dconv1, image, f1, conv_s) # backpropagate previous gradient through first convolutional layer.

    grads = [df1, df2, dw3, dw4, db1, db2, db3, db4]

    return grads, loss
	def conv(image, label, params, conv_s, pool_f, pool_s):

	[f1, f2, w3, w4, b1, b2, b3, b4] = params

	################################################
	############## Forward Operation ###############
	################################################
	conv1 = convolution(image, f1, b1, conv_s) # convolution operation
	conv1[conv1<=0] = 0 # pass through ReLU non-linearity

	conv2 = convolution(conv1, f2, b2, conv_s) # second convolution operation
	conv2[conv2<=0] = 0 # pass through ReLU non-linearity

	pooled = maxpool(conv2, pool_f, pool_s) # maxpooling operation

	(nf2, dim2, _) = pooled.shape
	fc = pooled.reshape((nf2 * dim2 * dim2, 1)) # flatten pooled layer

	z = w3.dot(fc) + b3 # first dense layer
	z[z<=0] = 0 # pass through ReLU non-linearity

	out = w4.dot(z) + b4 # second dense layer

	probs = softmax(out) # predict class probabilities with the softmax activation function

	################################################
	#################### Loss ######################
	################################################

	loss = categoricalCrossEntropy(probs, label) # categorical cross-entropy loss

	################################################
	############# Backward Operation ###############
	################################################
	dout = probs - label # derivative of loss w.r.t. final dense layer output
	dw4 = dout.dot(z.T) # loss gradient of final dense layer weights
	db4 = np.sum(dout, axis = 1).reshape(b4.shape) # loss gradient of final dense layer biases

	dz = w4.T.dot(dout) # loss gradient of first dense layer outputs
	dz[z<=0] = 0 # backpropagate through ReLU
	dw3 = dz.dot(fc.T)
	db3 = np.sum(dz, axis = 1).reshape(b3.shape)

	dfc = w3.T.dot(dz) # loss gradients of fully-connected layer (pooling layer)
	dpool = dfc.reshape(pooled.shape) # reshape fully connected into dimensions of pooling layer

	dconv2 = maxpoolBackward(dpool, conv2, pool_f, pool_s) # backprop through the max-pooling layer(only neurons with highest activation in window get updated)
	dconv2[conv2<=0] = 0 # backpropagate through ReLU

	dconv1, df2, db2 = convolutionBackward(dconv2, conv1, f2, conv_s) # backpropagate previous gradient through second convolutional layer.
	dconv1[conv1<=0] = 0 # backpropagate through ReLU

	dimage, df1, db1 = convolutionBackward(dconv1, image, f1, conv_s) # backpropagate previous gradient through first convolutional layer.

	grads = [df1, df2, dw3, dw4, db1, db2, db3, db4]

	return grads, loss