Skip to content

Instantly share code, notes, and snippets.

@umbra-scientia
Created August 31, 2020 21:08
Show Gist options
  • Save umbra-scientia/153e94f218ad2b6a8616c219cbaef804 to your computer and use it in GitHub Desktop.
Save umbra-scientia/153e94f218ad2b6a8616c219cbaef804 to your computer and use it in GitHub Desktop.
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
float RandomFloat() {
int r = rand() - rand();
return float(r) / float(RAND_MAX);
}
template<int dim>
struct Vector {
float elem[dim];
Vector() { memset(elem, 0, sizeof(elem)); }
Vector(float v) {
for(int i=0;i<dim;i++) elem[i] = v;
}
float& operator[](int x) { return elem[x]; }
void Initialize() {
for(int i=0;i<dim;i++) elem[i] = RandomFloat();
}
void Normalize(float len = 1.0) {
float sqsum = 0.0;
for(int i=0;i<dim;i++) sqsum += elem[i] * elem[i];
float scale = len / sqrt(sqsum);
for(int i=0;i<dim;i++) elem[i] *= scale;
}
void operator+=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] += other.elem[i];}
void operator-=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] -= other.elem[i];}
void operator*=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] *= other.elem[i];}
void operator/=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] /= other.elem[i];}
void operator&=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] = fmin(elem[i], other.elem[i]);}
void operator|=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] = fmax(elem[i], other.elem[i]);}
Vector operator+(Vector other) const { other += *this; return other; }
Vector operator-(Vector other) const { other -= *this; return other; }
Vector operator*(Vector other) const { other *= *this; return other; }
Vector operator/(Vector other) const { other /= *this; return other; }
Vector operator&(Vector other) const { other &= *this; return other; }
Vector operator|(Vector other) const { other |= *this; return other; }
Vector relu(float leak = 0.0) const {
Vector out;
for(int i=0;i<dim;i++) out.elem[i] = fmax(elem[i], elem[i]*leak);
return out;
}
Vector clip(float cap = 2.0) const {
Vector out;
for(int i=0;i<dim;i++) out.elem[i] = fmax(fmin(elem[i], cap), -cap);
return out;
}
void Print() const {
printf("[");
for(int i=0;i<dim;i++) {
if (i) printf(" ");
printf("%+f", elem[i]);
}
printf("]\n");
}
};
template<int out_dim, int in_dim> struct Layer {
virtual void Initialize() = 0;
virtual void Transform(Vector<out_dim>& out, const Vector<in_dim>& in) = 0;
virtual void ClearGrad() = 0;
virtual Vector<in_dim> AccumGrad(const Vector<out_dim>& gradient_in, const Vector<in_dim>& value_in) = 0;
virtual void ApplyGrad(float learn_rate) = 0;
};
template<int out_dim, int in_dim> struct Matrix : public Layer<out_dim, in_dim> {
Vector<out_dim> row[in_dim+1];
Vector<out_dim> grad[in_dim+1];
float& operator[](int x) { return row[x]; }
void Initialize() {
float scale = 1.0 / sqrt(in_dim + 1.0);
for(int i=0;i<in_dim+1;i++) {
row[i].Initialize();
row[i].Normalize(scale);
}
}
void Identity() {
memset(row, 0, sizeof(row));
int c = out_dim;
if (in_dim < c) c = in_dim;
for(;c >= 0; c--) {
row[c][c] = 1.0;
}
}
void Transform(Vector<out_dim>& out, const Vector<in_dim>& in) {
out = row[in_dim];
for(int i=0;i<in_dim;i++) {
out += row[i] * in.elem[i];
}
}
void ClearGrad() {
memset(grad, 0, sizeof(grad));
}
Vector<in_dim> AccumGrad(const Vector<out_dim>& gradient_in, const Vector<in_dim>& value_in) {
Vector<in_dim> gradient_out;
grad[in_dim] += gradient_in;
for(int j=0;j<in_dim;j++) {
grad[j] += gradient_in * value_in.elem[j];
for(int i=0;i<out_dim;i++) {
gradient_out[j] += row[i][j] * gradient_in.elem[i];
}
}
return gradient_out;
}
void ApplyGrad(float learn_rate) {
for(int i=0;i<in_dim+1;i++) {
row[i] += grad[i] * learn_rate;
}
}
void ClipGrad(float scale = 1.0, float threshold = 1.0) {
for(int i=0;i<in_dim+1;i++) {
for(int j=0;j<out_dim;j++) {
const float v = fabs(grad[i][j]);
if (v > threshold) {
threshold = v;
}
}
}
if (threshold <= 0) return;
threshold = scale / threshold;
for(int i=0;i<in_dim+1;i++) {
for(int j=0;j<out_dim;j++) {
grad[i][j] *= threshold;
}
}
}
void Regularize(float lr) {
for(int i=0;i<in_dim+1;i++) {
row[i] -= row[i] * lr * 1e-4;
}
}
};
template<int dim> struct ReLU : public Layer<dim, dim> {
void Initialize() {}
void Transform(Vector<dim>& out, const Vector<dim>& in) {
for(int i=0;i<dim;i++) {
if (in[i] > 0.0) {
out[i] = in[i];
} else {
out[i] = 0.0;
}
}
}
void ClearGrad() {}
Vector<dim> AccumGrad(const Vector<dim>& gradient_in, const Vector<dim>& value_in) {
Vector<dim> gradient_out;
for(int i=0;i<dim;i++) {
if (value_in[i] >= 0.0) {
gradient_out[i] = gradient_in[i];
} else {
gradient_out[i] = 0.0;
}
}
return gradient_out;
}
void ApplyGrad(float learn_rate) {}
};
template<int N> void PrintVecs(const Vector<N> vec[], int n) {
for(int i=0;i<n;i++) vec[i].Print();
}
int main() {
float lr = 0.05;
Matrix<5,5> test1, test2;
test1.Initialize();
test2.Initialize();
test1.Identity();
for(int i=0;i<9001;i++) {
Vector<5> in1, in2, out;
in1.Initialize();
test1.Regularize(lr);
test2.Regularize(lr);
test1.Transform(in2, in1);
test2.Transform(out, in2);
printf("in1=");in1.Print();
printf("out=");out.Print();
test1.ClearGrad();
test2.ClearGrad();
auto grad2 = out - in1;
auto grad1 = test2.AccumGrad(grad2, in2);
test2.ClipGrad();
auto grad0 = test1.AccumGrad(grad1, in1);
test1.ClipGrad();
printf("test1.grad (%d)\n", i);
PrintVecs(test1.grad, 6);
printf("test2.grad (%d)\n", i);
PrintVecs(test2.grad, 6);
test1.ApplyGrad(lr);
test2.ApplyGrad(lr);
printf("test1.row (%d)\n", i);
PrintVecs(test1.row, 6);
printf("test2.row (%d)\n", i);
PrintVecs(test2.row, 6);
usleep(100000);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment