gilbertfrancois/bn_test_2.py

## bn_test_2.py
import os

import gluoncv
import mxnet as mx
import mxnet.ndarray as nd
import numpy as np
import pandas as pd
from mxnet import init, autograd
from mxnet.gluon import nn

WITH_RESNET = True
N_EMBEDDINGS = 128
N_CLASSES = 10

np.random.seed(42)
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"


class MyNet(nn.HybridBlock):
    """ Network with Feature extractor, custom tail (embeddings layer) and output layer. Set global variable
        WITH_RESNET to use a ResNet50 as feature extractor, or set to False for a single conv block.

    """

    def __init__(self, with_batchnorm, with_resnet=False):
        super(MyNet, self).__init__()

        with self.name_scope():
            # Choose a large feature extractor
            if with_resnet:
                backbone = gluoncv.model_zoo.get_model("resnet50_v1", pretrained=False)
                self.features = backbone.features
            # Or a custom small one...
            else:
                self.features = nn.HybridSequential()
                self.features.add(nn.Conv2D(3, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), use_bias=False))
                self.features.add(nn.BatchNorm())
                self.features.add(nn.Activation("relu"))
                self.features.add(nn.AvgPool2D(pool_size=(2, 2), strides=(2, 2)))
            self.tail = nn.HybridSequential()
            self.tail.add(nn.Flatten())
            if with_batchnorm:
                self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5))
            self.tail.add(nn.Dense(N_EMBEDDINGS, weight_initializer=init.Normal(0.01)))
            if with_batchnorm:
                self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5))
            self.output = nn.Dense(N_CLASSES, weight_initializer=init.Normal(0.01))

    def hybrid_forward(self, F, x, *args, **kwargs):
        x = self.features(x)
        x = self.tail(x)
        if autograd.is_recording():
            x = self.output(x)
        return x


def get_net(with_batchnorm, input_shape):
    """ Build the network and initialize the parameters.

    PARAMETERS
    ----------
    with_batchnorm: bool
        Adds BatchNorm layers to the tail of the network, if set to True.

    RETURNS
    -------
    MyNet

    """
    net = MyNet(with_batchnorm, WITH_RESNET)
    net.initialize()
    # Since parameters are lazy initialized, do a one time forward pass to complete initialization.
    # Use autograd.record to reach the last layers.
    x = nd.zeros(shape=input_shape)
    with autograd.record():
        _ = net(x)
    return net


def check_params(net, mx_ctx):
    """ Check if all parameters have been initialized. If not, the call to param.data()
        will crash the program.

    PARAMETERS
    ----------
    net: MyNet
        object of MyNet.
    mx_ctx: list of mxnet.context.Context
        mxnet compute context
    """
    for ctx in mx_ctx:
        for name, param in net.collect_params().items():
            _ = param.data(ctx)


if __name__ == '__main__':
    ctx_test_list = [mx.cpu(), mx.gpu(0)]
    input_shape_list = [(1, 3, 224, 224), (2, 3, 224, 224)]
    res_list = []

    loss_fn = mx.gluon.loss.SoftmaxCrossEntropyLoss()
    # Loop over 2 different networks, one with BN layers in net.tail and one without.
    for input_shape in input_shape_list:
        x = nd.random.randn(*input_shape)
        y = nd.random.randint(0, 10, shape=(input_shape[0],))
        for with_batchnorm in [True, False]:
            # Define and init the network on CPU context.
            # Create a network
            net = get_net(with_batchnorm, input_shape)
            y_out_list = []
            y_embeddings_list = []
            # Test on CPU and GPU context. The results should be the same.
            for ctx in ctx_test_list:
                res = {}
                res["ctx"] = str(ctx)
                res["with_batchnorm"] = with_batchnorm
                # Copy the input data and network parameters to context of choice.
                x = x.as_in_context(ctx)
                y = y.as_in_context(ctx)
                net.collect_params().reset_ctx([ctx])
                check_params(net, [ctx])
                trainer = mx.gluon.Trainer(net.collect_params(), "sgd",
                                           {"learning_rate": 0.01, "momentum": 0.9, "wd": 0.0005})
                # Simulate a forward training step, computing the class predictions.
                with autograd.record():
                    y_out = net(x)
                    loss = loss_fn(y_out, y)
                loss.backward()
                trainer.step(batch_size=input_shape[0])
                # Simulate a forward validation step, computing the embeddings.
                y_embeddings = net(x)
                # Do some validation comparison with embeddings here, e.g.:
                #   distance = np.sum(np.square(y_embeddings - y_embeddings_val), axis=1)
                #   acc = ...
                # res["x"] = x.asnumpy()
                res["input_shape"] = input_shape
                res["y_out"] = y_out.asnumpy()
                res["y_embeddings"] = y_embeddings.asnumpy()
                res_list.append(res)

    df = pd.DataFrame(res_list)
    pd.set_option('display.max_rows', 50)
    pd.set_option('display.max_columns', 50)
    pd.set_option('display.width', 1000)
    print(df)
	import os

	import gluoncv
	import mxnet as mx
	import mxnet.ndarray as nd
	import numpy as np
	import pandas as pd
	from mxnet import init, autograd
	from mxnet.gluon import nn

	WITH_RESNET = True
	N_EMBEDDINGS = 128
	N_CLASSES = 10

	np.random.seed(42)
	os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"


	class MyNet(nn.HybridBlock):
	""" Network with Feature extractor, custom tail (embeddings layer) and output layer. Set global variable
	WITH_RESNET to use a ResNet50 as feature extractor, or set to False for a single conv block.

	"""

	def __init__(self, with_batchnorm, with_resnet=False):
	super(MyNet, self).__init__()

	with self.name_scope():
	# Choose a large feature extractor
	if with_resnet:
	backbone = gluoncv.model_zoo.get_model("resnet50_v1", pretrained=False)
	self.features = backbone.features
	# Or a custom small one...
	else:
	self.features = nn.HybridSequential()
	self.features.add(nn.Conv2D(3, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), use_bias=False))
	self.features.add(nn.BatchNorm())
	self.features.add(nn.Activation("relu"))
	self.features.add(nn.AvgPool2D(pool_size=(2, 2), strides=(2, 2)))
	self.tail = nn.HybridSequential()
	self.tail.add(nn.Flatten())
	if with_batchnorm:
	self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5))
	self.tail.add(nn.Dense(N_EMBEDDINGS, weight_initializer=init.Normal(0.01)))
	if with_batchnorm:
	self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5))
	self.output = nn.Dense(N_CLASSES, weight_initializer=init.Normal(0.01))

	def hybrid_forward(self, F, x, args, *kwargs):
	x = self.features(x)
	x = self.tail(x)
	if autograd.is_recording():
	x = self.output(x)
	return x


	def get_net(with_batchnorm, input_shape):
	""" Build the network and initialize the parameters.

	PARAMETERS
	----------
	with_batchnorm: bool
	Adds BatchNorm layers to the tail of the network, if set to True.

	RETURNS
	-------
	MyNet

	"""
	net = MyNet(with_batchnorm, WITH_RESNET)
	net.initialize()
	# Since parameters are lazy initialized, do a one time forward pass to complete initialization.
	# Use autograd.record to reach the last layers.
	x = nd.zeros(shape=input_shape)
	with autograd.record():
	_ = net(x)
	return net


	def check_params(net, mx_ctx):
	""" Check if all parameters have been initialized. If not, the call to param.data()
	will crash the program.

	PARAMETERS
	----------
	net: MyNet
	object of MyNet.
	mx_ctx: list of mxnet.context.Context
	mxnet compute context
	"""
	for ctx in mx_ctx:
	for name, param in net.collect_params().items():
	_ = param.data(ctx)


	if __name__ == '__main__':
	ctx_test_list = [mx.cpu(), mx.gpu(0)]
	input_shape_list = [(1, 3, 224, 224), (2, 3, 224, 224)]
	res_list = []

	loss_fn = mx.gluon.loss.SoftmaxCrossEntropyLoss()
	# Loop over 2 different networks, one with BN layers in net.tail and one without.
	for input_shape in input_shape_list:
	x = nd.random.randn(*input_shape)
	y = nd.random.randint(0, 10, shape=(input_shape[0],))
	for with_batchnorm in [True, False]:
	# Define and init the network on CPU context.
	# Create a network
	net = get_net(with_batchnorm, input_shape)
	y_out_list = []
	y_embeddings_list = []
	# Test on CPU and GPU context. The results should be the same.
	for ctx in ctx_test_list:
	res = {}
	res["ctx"] = str(ctx)
	res["with_batchnorm"] = with_batchnorm
	# Copy the input data and network parameters to context of choice.
	x = x.as_in_context(ctx)
	y = y.as_in_context(ctx)
	net.collect_params().reset_ctx([ctx])
	check_params(net, [ctx])
	trainer = mx.gluon.Trainer(net.collect_params(), "sgd",
	{"learning_rate": 0.01, "momentum": 0.9, "wd": 0.0005})
	# Simulate a forward training step, computing the class predictions.
	with autograd.record():
	y_out = net(x)
	loss = loss_fn(y_out, y)
	loss.backward()
	trainer.step(batch_size=input_shape[0])
	# Simulate a forward validation step, computing the embeddings.
	y_embeddings = net(x)
	# Do some validation comparison with embeddings here, e.g.:
	# distance = np.sum(np.square(y_embeddings - y_embeddings_val), axis=1)
	# acc = ...
	# res["x"] = x.asnumpy()
	res["input_shape"] = input_shape
	res["y_out"] = y_out.asnumpy()
	res["y_embeddings"] = y_embeddings.asnumpy()
	res_list.append(res)

	df = pd.DataFrame(res_list)
	pd.set_option('display.max_rows', 50)
	pd.set_option('display.max_columns', 50)
	pd.set_option('display.width', 1000)
	print(df)