techedlaksh/basic_nn.py

## basic_nn.py
# library
import numpy as np

# b_size is batch size
# inp_dim is input dimension
# hid_dim is hidden dimension
# out_dim is output dimension
b_size, inp_dim, hid_dim, out_dim = 64, 1000, 100, 10

# Create random dataset
X = np.random.randn(b_size, inp_dim)
y = np.random.randn(b_size, out_dim)

# Random weights initialised
w1 = np.random.randn(inp_dim, hid_dim)
w2 = np.random.randn(hid_dim, out_dim)

# Learning Rate
lr = 1e-5

# 20 is the Number of Epoches
for t in range(20):
    # Forward Pass
    h = X.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute the loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backpropogation to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = X.T.dot(grad_h)

    # Update weights
    w1 -= lr * grad_w1
    w2 -= lr * grad_w2
	# library
	import numpy as np

	# b_size is batch size
	# inp_dim is input dimension
	# hid_dim is hidden dimension
	# out_dim is output dimension
	b_size, inp_dim, hid_dim, out_dim = 64, 1000, 100, 10

	# Create random dataset
	X = np.random.randn(b_size, inp_dim)
	y = np.random.randn(b_size, out_dim)

	# Random weights initialised
	w1 = np.random.randn(inp_dim, hid_dim)
	w2 = np.random.randn(hid_dim, out_dim)

	# Learning Rate
	lr = 1e-5

	# 20 is the Number of Epoches
	for t in range(20):
	# Forward Pass
	h = X.dot(w1)
	h_relu = np.maximum(h, 0)
	y_pred = h_relu.dot(w2)

	# Compute the loss
	loss = np.square(y_pred - y).sum()
	print(t, loss)

	# Backpropogation to compute gradients of w1 and w2 with respect to loss
	grad_y_pred = 2.0 * (y_pred - y)
	grad_w2 = h_relu.T.dot(grad_y_pred)
	grad_h_relu = grad_y_pred.dot(w2.T)
	grad_h = grad_h_relu.copy()
	grad_h[h < 0] = 0
	grad_w1 = X.T.dot(grad_h)

	# Update weights
	w1 -= lr * grad_w1
	w2 -= lr * grad_w2