Created
September 20, 2020 17:26
-
-
Save mratsim/8d94ad7eefdc5a836f2dc618f28aabe0 to your computer and use it in GitHub Desktop.
Bung's debugging session
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import arraymancer | |
import sequtils | |
import random | |
import strformat | |
# https://d2l.ai/d2l-en.pdf 3.2 LinearRegressionImplementationfromScratch english version. | |
iterator data_iter[T](batch_size:Natural, features:Tensor[T], labels:Tensor[T]): (Tensor[T],Tensor[T]) = | |
let num_examples = len(features) | |
var indices = toSeq(countup(0,num_examples - 1)) | |
random.shuffle(indices) | |
for i in countup(0, num_examples - 1 , batch_size): | |
let j = indices[i: min(i + batch_size, num_examples)] | |
yield (features.take(j), labels.take(j)) # take batch_size elements in each row. | |
proc linreg[T](X:Tensor[T], w:Tensor[T], b:T):Tensor[T] = | |
# linear regression | |
return X * w +. b | |
# proc squared_loss[T](y_hat:Tensor[T], y:Tensor[T]): | |
# # loss function | |
# return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2 | |
# proc sgd(params, lr, batch_size): | |
# # Stochastic Batch Gradient Descent, optimization algorithm | |
# for param in params: | |
# param[:] = param - lr * param.grad / batch_size | |
let batch_size = 10 | |
let num_inputs = 2 | |
let num_examples = 1000 | |
let true_w = [2.0'f32,-3.4'f32] # weights | |
let true_b = 4.2'f32 # bais | |
let features = randomNormalTensor[float32]([num_examples, num_inputs],std = 1.0'f32) | |
var labels = true_w[0] * features[_,0] + true_w[1] * features[_,1] +. true_b | |
# noise | |
labels += randomNormalTensor[float32]([num_examples,1],std = 0.01'f32) | |
let ctx = newContext Tensor[float32] | |
# Create random Tensors to hold inputs and outputs, and wrap them in Variables. | |
let | |
x = ctx.variable(features) | |
y = labels | |
batch = num_examples | |
in_features = num_inputs | |
out_features = 1 | |
# When not initializing via the network DSL you need requires_grad = true | |
var w = ctx.variable( | |
randomNormalTensor[float32]([out_features, in_features],std = 0.01'f32), | |
requires_grad = true) | |
var b = ctx.variable( | |
zeros[float32]([1, out_features]), | |
requires_grad = true) # Shape [1, out_features], check is good but error message is wrong | |
# ################################################################## | |
# Define the model. | |
network ctx, OneLayersNet: | |
# layers: | |
forward x: | |
x.linear(w,b) | |
let | |
model = ctx.init(OneLayersNet) | |
optim = model.optimizerSGD(learning_rate = 0.03'f32) | |
# ################################################################## | |
# Training | |
for t in 0 ..< 3: | |
let | |
y_pred = model.forward(x) | |
loss = y_pred.mse_loss(y) | |
echo &"Epoch {t}: loss {loss.value[0]}" | |
loss.backprop() | |
optim.update() | |
echo w.value | |
echo b.value | |
# epoch 1, loss 0.040643 epoch 2, loss 0.000157 epoch 3, loss 0.000050 | |
# 训练完成后,我们可以比较学到的参数和用来生成训练集的真实参数。它们应该很接近。 | |
# In [13]: true_w, w | |
# Out[13]: ([2, -3.4], [[ 1.9999995] | |
# [-3.3997662]] <NDArray 2x1 @cpu(0)>) | |
# In [14]: true_b, b | |
# Out[14]: (4.2, [4.1996264] | |
# <NDArray 1 @cpu(0)>) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment