Skip to content

Instantly share code, notes, and snippets.

@gilbertfrancois
Last active July 22, 2020 22:11
Show Gist options
  • Save gilbertfrancois/888f81042f5edaa42b1011d28264cff4 to your computer and use it in GitHub Desktop.
Save gilbertfrancois/888f81042f5edaa42b1011d28264cff4 to your computer and use it in GitHub Desktop.
MXNet gluon.nn.BatchNorm issue report
import os
import gluoncv
import mxnet as mx
import mxnet.ndarray as nd
import numpy as np
import pandas as pd
from mxnet import init, autograd
from mxnet.gluon import nn
WITH_RESNET = True
N_EMBEDDINGS = 128
N_CLASSES = 10
np.random.seed(42)
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
class MyNet(nn.HybridBlock):
""" Network with Feature extractor, custom tail (embeddings layer) and output layer. Set global variable
WITH_RESNET to use a ResNet50 as feature extractor, or set to False for a single conv block.
"""
def __init__(self, with_batchnorm, with_resnet=False):
super(MyNet, self).__init__()
with self.name_scope():
# Choose a large feature extractor
if with_resnet:
backbone = gluoncv.model_zoo.get_model("resnet50_v1", pretrained=False)
self.features = backbone.features
# Or a custom small one...
else:
self.features = nn.HybridSequential()
self.features.add(nn.Conv2D(3, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), use_bias=False))
self.features.add(nn.BatchNorm())
self.features.add(nn.Activation("relu"))
self.features.add(nn.AvgPool2D(pool_size=(2, 2), strides=(2, 2)))
self.tail = nn.HybridSequential()
self.tail.add(nn.Flatten())
if with_batchnorm:
self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5))
self.tail.add(nn.Dense(N_EMBEDDINGS, weight_initializer=init.Normal(0.01)))
if with_batchnorm:
self.tail.add(nn.BatchNorm(momentum=0.9, epsilon=1e-5))
self.output = nn.Dense(N_CLASSES, weight_initializer=init.Normal(0.01))
def hybrid_forward(self, F, x, *args, **kwargs):
x = self.features(x)
x = self.tail(x)
if autograd.is_recording():
x = self.output(x)
return x
def get_net(with_batchnorm, input_shape):
""" Build the network and initialize the parameters.
PARAMETERS
----------
with_batchnorm: bool
Adds BatchNorm layers to the tail of the network, if set to True.
RETURNS
-------
MyNet
"""
net = MyNet(with_batchnorm, WITH_RESNET)
net.initialize()
# Since parameters are lazy initialized, do a one time forward pass to complete initialization.
# Use autograd.record to reach the last layers.
x = nd.zeros(shape=input_shape)
with autograd.record():
_ = net(x)
return net
def check_params(net, mx_ctx):
""" Check if all parameters have been initialized. If not, the call to param.data()
will crash the program.
PARAMETERS
----------
net: MyNet
object of MyNet.
mx_ctx: list of mxnet.context.Context
mxnet compute context
"""
for ctx in mx_ctx:
for name, param in net.collect_params().items():
_ = param.data(ctx)
if __name__ == '__main__':
ctx_test_list = [mx.cpu(), mx.gpu(0)]
input_shape_list = [(1, 3, 224, 224), (2, 3, 224, 224)]
res_list = []
loss_fn = mx.gluon.loss.SoftmaxCrossEntropyLoss()
# Loop over 2 different networks, one with BN layers in net.tail and one without.
for input_shape in input_shape_list:
x = nd.random.randn(*input_shape)
y = nd.random.randint(0, 10, shape=(input_shape[0],))
for with_batchnorm in [True, False]:
# Define and init the network on CPU context.
# Create a network
net = get_net(with_batchnorm, input_shape)
y_out_list = []
y_embeddings_list = []
# Test on CPU and GPU context. The results should be the same.
for ctx in ctx_test_list:
res = {}
res["ctx"] = str(ctx)
res["with_batchnorm"] = with_batchnorm
# Copy the input data and network parameters to context of choice.
x = x.as_in_context(ctx)
y = y.as_in_context(ctx)
net.collect_params().reset_ctx([ctx])
check_params(net, [ctx])
trainer = mx.gluon.Trainer(net.collect_params(), "sgd",
{"learning_rate": 0.01, "momentum": 0.9, "wd": 0.0005})
# Simulate a forward training step, computing the class predictions.
with autograd.record():
y_out = net(x)
loss = loss_fn(y_out, y)
loss.backward()
trainer.step(batch_size=input_shape[0])
# Simulate a forward validation step, computing the embeddings.
y_embeddings = net(x)
# Do some validation comparison with embeddings here, e.g.:
# distance = np.sum(np.square(y_embeddings - y_embeddings_val), axis=1)
# acc = ...
# res["x"] = x.asnumpy()
res["input_shape"] = input_shape
res["y_out"] = y_out.asnumpy()
res["y_embeddings"] = y_embeddings.asnumpy()
res_list.append(res)
df = pd.DataFrame(res_list)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)
print(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment