liginity/gradient-descent-on-linear-mapping.py

## gradient-descent-on-linear-mapping.py
"""use pytorch in this example.

input: dimension = (16,)
output: dimension = (4,)
matrix of linear mapping: dimension = (16, 4)
"""

import torch
from torch import nn


# variables for dimensions
d1 = 64
d2 = 32
d3 = 16
d4 = 4
d5 = 1

# number of steps for gradient descent.
n_steps = 5
learning_rate = 0.0005

# the linear mapping matrix
W2 = nn.Parameter(torch.zeros((d3, d4), dtype=torch.float64, requires_grad=True))

# optimizer and loss function.
# stochastic gradient descent optimizer
optimizer2 = torch.optim.SGD([W2], lr=learning_rate)
# print(optimizer2)
# mean squared error (squared L2 norm)
loss_fn2 = torch.nn.MSELoss()

# input is x2, output is y2.
# x2 is 1D array. length of x2 = d3.
x2 = torch.tensor([i for i in range(-d3, d3, 2)], dtype=torch.float64)
print(f"input:\n  x2 = {x2}")
print(f"  x2.shape = {x2.shape}")
print()

nums = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53]
# y2 is 1D array. length of y2 = d4.
y2 = torch.tensor(nums[:d4], dtype=torch.float64)
print(f"output:\n  y2 = {y2}")
print(f"  y2.shape = {y2.shape}")
print()


# before gradient descent
print("before gradient descent")
print("W2 (W2 is the linear mapping matrix) =")
print(W2)
print(f"W2.shape = {W2.shape}")
print()

# do gradient descent for n steps
print("do gradient descent")
for step in range(n_steps):
    print("-" * 80)
    print(f"step {step:>2}")

    # zero the stored gradient
    optimizer2.zero_grad()

    # make a prediction
    ## use tensordot to calculate the prediction
    # y2_prediction_stepi = torch.tensordot(x2, W2, dims=[[-1], [0]])
    ## use matrix multiplication to calculate the prediction
    ## dimension: (16) x (16, 4) -> (4)
    y2_prediction_stepi = x2 @ W2

    # calculate the loss between prediction and target
    loss2_stepi = loss_fn2(y2_prediction_stepi, y2)
    print(f"loss = {loss2_stepi.item()}")

    # backward propagation
    loss2_stepi.backward()
    # this adjusts the parameters in the matrix
    optimizer2.step()

    print("W2 (W2 is the linear mapping matrix) =")
    print(W2)
	"""use pytorch in this example.

	input: dimension = (16,)
	output: dimension = (4,)
	matrix of linear mapping: dimension = (16, 4)
	"""

	import torch
	from torch import nn


	# variables for dimensions
	d1 = 64
	d2 = 32
	d3 = 16
	d4 = 4
	d5 = 1

	# number of steps for gradient descent.
	n_steps = 5
	learning_rate = 0.0005

	# the linear mapping matrix
	W2 = nn.Parameter(torch.zeros((d3, d4), dtype=torch.float64, requires_grad=True))

	# optimizer and loss function.
	# stochastic gradient descent optimizer
	optimizer2 = torch.optim.SGD([W2], lr=learning_rate)
	# print(optimizer2)
	# mean squared error (squared L2 norm)
	loss_fn2 = torch.nn.MSELoss()

	# input is x2, output is y2.
	# x2 is 1D array. length of x2 = d3.
	x2 = torch.tensor([i for i in range(-d3, d3, 2)], dtype=torch.float64)
	print(f"input:\n x2 = {x2}")
	print(f" x2.shape = {x2.shape}")
	print()

	nums = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53]
	# y2 is 1D array. length of y2 = d4.
	y2 = torch.tensor(nums[:d4], dtype=torch.float64)
	print(f"output:\n y2 = {y2}")
	print(f" y2.shape = {y2.shape}")
	print()


	# before gradient descent
	print("before gradient descent")
	print("W2 (W2 is the linear mapping matrix) =")
	print(W2)
	print(f"W2.shape = {W2.shape}")
	print()

	# do gradient descent for n steps
	print("do gradient descent")
	for step in range(n_steps):
	print("-" * 80)
	print(f"step {step:>2}")

	# zero the stored gradient
	optimizer2.zero_grad()

	# make a prediction
	## use tensordot to calculate the prediction
	# y2_prediction_stepi = torch.tensordot(x2, W2, dims=[[-1], [0]])
	## use matrix multiplication to calculate the prediction
	## dimension: (16) x (16, 4) -> (4)
	y2_prediction_stepi = x2 @ W2

	# calculate the loss between prediction and target
	loss2_stepi = loss_fn2(y2_prediction_stepi, y2)
	print(f"loss = {loss2_stepi.item()}")

	# backward propagation
	loss2_stepi.backward()
	# this adjusts the parameters in the matrix
	optimizer2.step()

	print("W2 (W2 is the linear mapping matrix) =")
	print(W2)