TommyHuang821/Weight_Initialization

## Weight_Initialization
# -*- coding: utf-8 -*-
"""
@author: Chih-Sheng Huang (Tommy), chih.sheng.huang821@gmail.com
"""

import torch
import torch.nn as nn
import torch.utils.data as Data
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict
import math
np.random.seed(1)    # reproducible

data = np.random.randn(2000, 500).astype('float32')
layer_sizes = [500 - 50 * i for i in range(0, 6)]
num_layers = len(layer_sizes)

def weight_init_random(model, is_normal=0):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            stdv = 1
            if is_normal:
                m.weight.data.normal_(0, stdv)
                if m.bias is not None:
                    m.bias.data.normal_(0, stdv)
            else:
                m.weight.data.uniform_(-stdv, stdv)
                if m.bias is not None:
                    m.bias.data.uniform_(-stdv, stdv)
def weight_init_xavier(model, is_normal=0):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            if is_normal == 1:
                stdv = math.sqrt(2/(m.weight.size(0)+m.weight.size(1)))
                m.weight.data.normal_(0, stdv)
                if m.bias is not None:
                    m.bias.data.normal_(0, stdv)
            else:
                # nn.init.xavier_uniform(m.weight)
                stdv = math.sqrt(6) / math.sqrt((m.weight.size(0) + m.weight.size(1)))
                m.weight.data.uniform_(-stdv, stdv)
                if m.bias is not None:
                    m.bias.data.uniform_(-stdv, stdv)
def weight_init_He(model, is_normal=0):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            if is_normal == 1:
                #nn.init.kaiming_normal(m.weight)
                stdv =np.sqrt(2/m.weight.size(0))
                m.weight.data.normal_(0, stdv)
                if m.bias is not None:
                    m.bias.data.normal_(0, stdv)
            else:
                # nn.init.xavier_uniform(m.weight)
                stdv =np.sqrt(6/m.weight.size(0))
                m.weight.data.uniform_(-stdv, stdv)
                if m.bias is not None:
                    m.bias.data.uniform_(-stdv, stdv)


class _fcblock(nn.Module):
    def __init__(self, num_input_features, num_output_features):
        super(_fcblock, self).__init__()
        self.fc = nn.Linear(num_input_features, num_output_features, bias=False)
        self.bn = nn.BatchNorm1d(num_output_features)
        #self.act = nn.Tanh()
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.fc(x)
        x = self.bn(x)
        x = self.act(x)
        return x


class Testmodel(nn.Module):
    def __init__(self, ):
        super(Testmodel, self).__init__()

        self.model = nn.Sequential(OrderedDict([ ]))
        for i in range(0, num_layers - 1):
            node_in = layer_sizes[i]
            node_out = layer_sizes[i + 1]
            block = _fcblock(node_in, node_out)
            self.model.add_module('dc%d' % (i + 1), block)

    def forward(self,x):
        output=[]
        for i in range(len(self.model)):
            x = self.model[i](x)
            output.append(x)
        return output

model = Testmodel()
weight_init_random(model, is_normal=1)
#weight_init_xavier(model, is_normal=0)
#weight_init_He(model, is_normal=1)

x=torch.Tensor(data)
output = model(x)
output =[tmp.detach().numpy() for tmp in output]
print('input mean {0:.5f} and std {1:.5f}'.format(np.mean(data),
                                                  np.std(data)))
for idx, fc in enumerate(output):
     print('layer {0} mean {1:.5f} and std {2:.5f}'.format(idx + 1, np.mean(fc),
                                                      np.std(fc)))

plt.figure()
for idx, fc in enumerate(output):
    plt.subplot(1, len(output), idx+1)
    plt.hist(fc.flatten(), 30, range=[-1, 1])
    plt.xlabel('layer ' + str(idx + 1))
    plt.yticks([])

plt.show()
	# -- coding: utf-8 --
	"""
	@author: Chih-Sheng Huang (Tommy), chih.sheng.huang821@gmail.com
	"""

	import torch
	import torch.nn as nn
	import torch.utils.data as Data
	import matplotlib.pyplot as plt
	import numpy as np
	from collections import OrderedDict
	import math
	np.random.seed(1) # reproducible

	data = np.random.randn(2000, 500).astype('float32')
	layer_sizes = [500 - 50 * i for i in range(0, 6)]
	num_layers = len(layer_sizes)

	def weight_init_random(model, is_normal=0):
	for m in model.modules():
	if isinstance(m, nn.Linear):
	stdv = 1
	if is_normal:
	m.weight.data.normal_(0, stdv)
	if m.bias is not None:
	m.bias.data.normal_(0, stdv)
	else:
	m.weight.data.uniform_(-stdv, stdv)
	if m.bias is not None:
	m.bias.data.uniform_(-stdv, stdv)
	def weight_init_xavier(model, is_normal=0):
	for m in model.modules():
	if isinstance(m, nn.Linear):
	if is_normal == 1:
	stdv = math.sqrt(2/(m.weight.size(0)+m.weight.size(1)))
	m.weight.data.normal_(0, stdv)
	if m.bias is not None:
	m.bias.data.normal_(0, stdv)
	else:
	# nn.init.xavier_uniform(m.weight)
	stdv = math.sqrt(6) / math.sqrt((m.weight.size(0) + m.weight.size(1)))
	m.weight.data.uniform_(-stdv, stdv)
	if m.bias is not None:
	m.bias.data.uniform_(-stdv, stdv)
	def weight_init_He(model, is_normal=0):
	for m in model.modules():
	if isinstance(m, nn.Linear):
	if is_normal == 1:
	#nn.init.kaiming_normal(m.weight)
	stdv =np.sqrt(2/m.weight.size(0))
	m.weight.data.normal_(0, stdv)
	if m.bias is not None:
	m.bias.data.normal_(0, stdv)
	else:
	# nn.init.xavier_uniform(m.weight)
	stdv =np.sqrt(6/m.weight.size(0))
	m.weight.data.uniform_(-stdv, stdv)
	if m.bias is not None:
	m.bias.data.uniform_(-stdv, stdv)


	class _fcblock(nn.Module):
	def __init__(self, num_input_features, num_output_features):
	super(_fcblock, self).__init__()
	self.fc = nn.Linear(num_input_features, num_output_features, bias=False)
	self.bn = nn.BatchNorm1d(num_output_features)
	#self.act = nn.Tanh()
	self.act = nn.ReLU()

	def forward(self, x):
	x = self.fc(x)
	x = self.bn(x)
	x = self.act(x)
	return x


	class Testmodel(nn.Module):
	def __init__(self, ):
	super(Testmodel, self).__init__()

	self.model = nn.Sequential(OrderedDict([ ]))
	for i in range(0, num_layers - 1):
	node_in = layer_sizes[i]
	node_out = layer_sizes[i + 1]
	block = _fcblock(node_in, node_out)
	self.model.add_module('dc%d' % (i + 1), block)

	def forward(self,x):
	output=[]
	for i in range(len(self.model)):
	x = self.model[i](x)
	output.append(x)
	return output

	model = Testmodel()
	weight_init_random(model, is_normal=1)
	#weight_init_xavier(model, is_normal=0)
	#weight_init_He(model, is_normal=1)

	x=torch.Tensor(data)
	output = model(x)
	output =[tmp.detach().numpy() for tmp in output]
	print('input mean {0:.5f} and std {1:.5f}'.format(np.mean(data),
	np.std(data)))
	for idx, fc in enumerate(output):
	print('layer {0} mean {1:.5f} and std {2:.5f}'.format(idx + 1, np.mean(fc),
	np.std(fc)))

	plt.figure()
	for idx, fc in enumerate(output):
	plt.subplot(1, len(output), idx+1)
	plt.hist(fc.flatten(), 30, range=[-1, 1])
	plt.xlabel('layer ' + str(idx + 1))
	plt.yticks([])

	plt.show()