mratsim/bung.nim

## bung.nim
import arraymancer
import sequtils
import random
import strformat

# https://d2l.ai/d2l-en.pdf 3.2 LinearRegressionImplementationfromScratch english version.
iterator data_iter[T](batch_size:Natural, features:Tensor[T], labels:Tensor[T]): (Tensor[T],Tensor[T]) =
  let num_examples = len(features)
  var indices = toSeq(countup(0,num_examples - 1))
  random.shuffle(indices)
  for i in countup(0, num_examples - 1 , batch_size):
    let j = indices[i: min(i + batch_size, num_examples)]
    yield (features.take(j), labels.take(j)) # take batch_size elements in each row.

proc linreg[T](X:Tensor[T], w:Tensor[T], b:T):Tensor[T] =
  # linear regression
  return X * w +. b

# proc squared_loss[T](y_hat:Tensor[T], y:Tensor[T]):
#   # loss function
#   return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

# proc sgd(params, lr, batch_size):
#   # Stochastic Batch Gradient Descent, optimization algorithm
#   for param in params:
#     param[:] = param - lr * param.grad / batch_size

let batch_size = 10
let num_inputs = 2
let num_examples = 1000
let true_w = [2.0'f32,-3.4'f32] # weights
let true_b = 4.2'f32 # bais
let features = randomNormalTensor[float32]([num_examples, num_inputs],std = 1.0'f32)

var labels = true_w[0] * features[_,0] + true_w[1] * features[_,1] +. true_b
# noise
labels += randomNormalTensor[float32]([num_examples,1],std = 0.01'f32)

let ctx = newContext Tensor[float32]

# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
let
  x = ctx.variable(features)
  y = labels

  batch = num_examples
  in_features = num_inputs
  out_features = 1

# When not initializing via the network DSL you need requires_grad = true
var w = ctx.variable(
              randomNormalTensor[float32]([out_features, in_features],std = 0.01'f32),
              requires_grad = true)
var b = ctx.variable(
              zeros[float32]([1, out_features]),
              requires_grad = true) # Shape [1, out_features], check is good but error message is wrong
# ##################################################################
# Define the model.

network ctx, OneLayersNet:
  # layers:
  forward x:
    x.linear(w,b)

let
  model = ctx.init(OneLayersNet)
  optim = model.optimizerSGD(learning_rate = 0.03'f32)

# ##################################################################
# Training

for t in 0 ..< 3:
  let
    y_pred = model.forward(x)
    loss = y_pred.mse_loss(y)

  echo &"Epoch {t}: loss {loss.value[0]}"

  loss.backprop()
  optim.update()
echo w.value
echo b.value

# epoch 1, loss 0.040643 epoch 2, loss 0.000157 epoch 3, loss 0.000050
# 训练完成后，我们可以比较学到的参数和用来生成训练集的真实参数。它们应该很接近。
# In [13]: true_w, w
# Out[13]: ([2, -3.4], [[ 1.9999995]
# [-3.3997662]] <NDArray 2x1 @cpu(0)>)
# In [14]: true_b, b
# Out[14]: (4.2, [4.1996264]
#           <NDArray 1 @cpu(0)>)
	import arraymancer
	import sequtils
	import random
	import strformat

	# https://d2l.ai/d2l-en.pdf 3.2 LinearRegressionImplementationfromScratch english version.
	iterator data_iter[T](batch_size:Natural, features:Tensor[T], labels:Tensor[T]): (Tensor[T],Tensor[T]) =
	let num_examples = len(features)
	var indices = toSeq(countup(0,num_examples - 1))
	random.shuffle(indices)
	for i in countup(0, num_examples - 1 , batch_size):
	let j = indices[i: min(i + batch_size, num_examples)]
	yield (features.take(j), labels.take(j)) # take batch_size elements in each row.

	proc linreg[T](X:Tensor[T], w:Tensor[T], b:T):Tensor[T] =
	# linear regression
	return X * w +. b

	# proc squared_loss[T](y_hat:Tensor[T], y:Tensor[T]):
	# # loss function
	# return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

	# proc sgd(params, lr, batch_size):
	# # Stochastic Batch Gradient Descent, optimization algorithm
	# for param in params:
	# param[:] = param - lr * param.grad / batch_size

	let batch_size = 10
	let num_inputs = 2
	let num_examples = 1000
	let true_w = [2.0'f32,-3.4'f32] # weights
	let true_b = 4.2'f32 # bais
	let features = randomNormalTensor[float32]([num_examples, num_inputs],std = 1.0'f32)

	var labels = true_w[0] * features[_,0] + true_w[1] * features[_,1] +. true_b
	# noise
	labels += randomNormalTensor[float32]([num_examples,1],std = 0.01'f32)

	let ctx = newContext Tensor[float32]

	# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
	let
	x = ctx.variable(features)
	y = labels

	batch = num_examples
	in_features = num_inputs
	out_features = 1

	# When not initializing via the network DSL you need requires_grad = true
	var w = ctx.variable(
	randomNormalTensor[float32]([out_features, in_features],std = 0.01'f32),
	requires_grad = true)
	var b = ctx.variable(
	zeros[float32]([1, out_features]),
	requires_grad = true) # Shape [1, out_features], check is good but error message is wrong
	# ##################################################################
	# Define the model.

	network ctx, OneLayersNet:
	# layers:
	forward x:
	x.linear(w,b)

	let
	model = ctx.init(OneLayersNet)
	optim = model.optimizerSGD(learning_rate = 0.03'f32)

	# ##################################################################
	# Training

	for t in 0 ..< 3:
	let
	y_pred = model.forward(x)
	loss = y_pred.mse_loss(y)

	echo &"Epoch {t}: loss {loss.value[0]}"

	loss.backprop()
	optim.update()
	echo w.value
	echo b.value

	# epoch 1, loss 0.040643 epoch 2, loss 0.000157 epoch 3, loss 0.000050
	# 训练完成后，我们可以比较学到的参数和用来生成训练集的真实参数。它们应该很接近。
	# In [13]: true_w, w
	# Out[13]: ([2, -3.4], [[ 1.9999995]
	# [-3.3997662]] <NDArray 2x1 @cpu(0)>)
	# In [14]: true_b, b
	# Out[14]: (4.2, [4.1996264]
	# <NDArray 1 @cpu(0)>)