Skip to content

Instantly share code, notes, and snippets.

@mrbid
Last active April 2, 2024 02:48
Show Gist options
  • Save mrbid/8a74f4d66ffcc6e869d3e3ab4502a277 to your computer and use it in GitHub Desktop.
Save mrbid/8a74f4d66ffcc6e869d3e3ab4502a277 to your computer and use it in GitHub Desktop.
RPROP in C
/*
James William Fletcher (github.com/mrbid) & Test_User (notabug.org/Test_User)
April 2024
A sort of RPROP in C
Train network to output TRAIN_MAX when 333 is input and TRAIN_MIN otherwise
Check the last sign of the unit, if both signs are the same move in direction
of last sign if signs are different don't update weight just update sign.
Increment in 1's only; no magnitude. Some networks accumulate magnitude the more
you move in the same direction with no change in sign, int8 would explode too fast.
Problem:
- Multiplying two sint8 quickly overflows
- On x86 integers wrap when they overflow, casting and branching needed to prevent this.
- ? or https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html
- https://stackoverflow.com/questions/38625857/using-the-builtin-function-builtin-add-overflow-p-in-gcc
This system would require no cyclic learning rate, no skip connections, maybe dropout.
sint8 range: -128 to 127
gcc rprop.c -Ofast -ggdb3 -o rprop
valgrind -s --track-origins=yes --leak-check=full --show-leak-kinds=all ./rprop
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define uint unsigned int
#define sint8 int8_t // signed int8
#define UNITS 16
#define EPOCHS 1000000
#define TRAIN_MAX 64
#define TRAIN_MIN -64
#define ReLU_LEAK 0
sint8 cap_multiply(sint8 x, sint8 y) {
sint8 res;
if (__builtin_mul_overflow(x, y, &res)) {
x = x > 0 ? 1 : -1;
y = y > 0 ? 1 : -1;
return x*y > 0 ? 127 : -128;
}
return res;
}
sint8 cap_add(sint8 x, sint8 y) {
sint8 res;
if (__builtin_add_overflow(x, y, &res)) {
return x > 0 ? 127 : -128;
}
return res;
}
sint8 cap_sub(sint8 x, sint8 y) {
sint8 res;
if (__builtin_sub_overflow(x, y, &res)) {
return x >= 0 ? 127 : -128;
}
return res;
}
typedef struct
{
// weights (units, weights)
sint8 l1w[UNITS][3];
sint8 l2w[UNITS][UNITS];
sint8 l3w[UNITS][UNITS];
sint8 l4w[1][UNITS];
// bias' (units, bias)
sint8 l1b[UNITS][1];
sint8 l2b[UNITS][1];
sint8 l3b[UNITS][1];
sint8 l4b;
} network;
network net; // define network as net
void dumpWeights()
{
puts("l1w:");
for(int i = 0; i < UNITS; i++)
for(int j = 0; j < 3; j++)
printf("%i ", net.l1w[i][j]);
printf("\n\n");
puts("l2w:");
for(int i = 0; i < UNITS; i++)
for(int j = 0; j < UNITS; j++)
printf("%i ", net.l2w[i][j]);
printf("\n\n");
puts("l3w:");
for(int i = 0; i < UNITS; i++)
for(int j = 0; j < UNITS; j++)
printf("%i ", net.l3w[i][j]);
printf("\n\n");
puts("l4w:");
for(int i = 0; i < UNITS; i++)
printf("%i ", net.l3w[0][i]);
printf("\n\n");
}
void layerStat()
{
int min=999, max=0, avg=0, avgd=0;
for(int i = 0; i < UNITS; i++)
{
for(int j = 0; j < 3; j++)
{
if(net.l1w[i][j] > max){max = net.l1w[i][j];}
if(net.l1w[i][j] < min){min = net.l1w[i][j];}
avg += net.l1w[i][j];
avgd++;
}
}
avg /= avgd;
printf("l1w: %i %i %i [%i]\n", min, avg/avgd, max, avg);
min=999, max=0, avg=0, avgd=0;
for(int i = 0; i < UNITS; i++)
{
for(int j = 0; j < UNITS; j++)
{
if(net.l2w[i][j] > max){max = net.l2w[i][j];}
if(net.l2w[i][j] < min){min = net.l2w[i][j];}
avg += net.l2w[i][j];
avgd++;
}
}
avg /= avgd;
printf("l2w: %i %i %i [%i]\n", min, avg/avgd, max, avg);
min=999, max=0, avg=0, avgd=0;
for(int i = 0; i < UNITS; i++)
{
for(int j = 0; j < UNITS; j++)
{
if(net.l3w[i][j] > max){max = net.l3w[i][j];}
if(net.l3w[i][j] < min){min = net.l3w[i][j];}
avg += net.l3w[i][j];
avgd++;
}
}
avg /= avgd;
printf("l3w: %i %i %i [%i]\n", min, avg/avgd, max, avg);
min=999, max=0, avg=0, avgd=0;
for(int i = 0; i < UNITS; i++)
{
if(net.l3w[0][i] > max){max = net.l3w[0][i];}
if(net.l3w[0][i] < min){min = net.l3w[0][i];}
avg += net.l3w[0][i];
avgd++;
}
avg /= avgd;
printf("l3w: %i %i %i [%i]\n", min, avg/avgd, max, avg);
}
sint8 ReLU(sint8 s) // ReLU with leaky hyper-parameter
{
return s < ReLU_LEAK ? ReLU_LEAK : s;
}
sint8 ReLU_D(sint8 s) // ReLU derivative
{
return s > 0 ? 1 : 0;
}
sint8 RPROP(signed int input, signed int error) // RPROP optimiser
{
return (input * error) < 0 ? -1 : 1;
}
sint8 srnd8() // random signed int8
{
return (sint8)((sint8)(rand()%255))-128;
}
sint8 srnd8_weight() // random signed int8 weight
{
sint8 r=0;
while(r == 0){r = srnd8();}
return r;
}
int main()
{
// seed rand
srand(777);
// random weights
for(uint i = 0; i < UNITS; i++)
for(uint j = 0; j < 3; j++) {net.l1w[i][j] = srnd8_weight(); net.l1b[i][0] = 0;}
for(uint i = 0; i < UNITS; i++)
for(uint j = 0; j < UNITS; j++) {net.l2w[i][j] = srnd8_weight(); net.l2b[i][0] = 0;}
for(uint i = 0; i < UNITS; i++)
for(uint j = 0; j < UNITS; j++) {net.l3w[i][j] = srnd8_weight(); net.l3b[i][0] = 0;}
for(uint i = 0; i < UNITS; i++) {net.l4w[0][i] = srnd8_weight();}
net.l4b = 0;
//dumpWeights();
// training switch
unsigned char ts = 0;
// sign tracking for each weight and bias for backprop
sint8 l1ws[UNITS][3] = {0};
sint8 l2ws[UNITS][UNITS] = {0};
sint8 l3ws[UNITS][UNITS] = {0};
sint8 l4ws[1][UNITS] = {0};
sint8 l1bs[UNITS][1] = {0};
sint8 l2bs[UNITS][1] = {0};
sint8 l3bs[UNITS][1] = {0};
sint8 l4bs = 0;
// train
for(uint e = 0; e < EPOCHS; e++)
{
// forward pass input gen
sint8 input[3];
if(ts == 0)
{
input[0] = 3;
input[1] = 3;
input[2] = 3;
}
else
{
do
{
input[0] = srnd8();
input[1] = srnd8();
input[2] = srnd8();
}
while(input[0] == 3 && input[1] == 3 && input[2] == 3);
}
// ---
// --------
// ---
// forward pass
sint8 l1o[UNITS];
for(uint i = 0; i < UNITS; i++)
l1o[i] = ReLU(cap_add(cap_add(cap_multiply(input[0], net.l1w[i][0]), cap_multiply(input[1], net.l1w[i][1])), cap_add(cap_multiply(input[2], net.l1w[i][2]), net.l1b[i][0])));
sint8 l2o[UNITS];
for(uint i = 0; i < UNITS; i++)
{
sint8 wa = 0;
for(uint j = 0; j < UNITS; j++)
wa = cap_add(wa, cap_multiply(l1o[j], net.l2w[i][j]));
l2o[i] = ReLU(cap_add(wa, net.l2b[i][0]));
}
sint8 l3o[UNITS];
for(uint i = 0; i < UNITS; i++)
{
sint8 wa = 0;
for(uint j = 0; j < UNITS; j++)
wa = cap_add(wa, cap_multiply(l2o[j], net.l3w[i][j]));
l3o[i] = ReLU(cap_add(wa, net.l3b[i][0]));
}
sint8 l4o = 0;
for(uint i = 0; i < UNITS; i++)
l4o = cap_add(l4o, cap_multiply(l2o[i], net.l4w[0][i]));
l4o = cap_add(l4o, net.l4b);
// calc slope of loss
sint8 loss;
if(ts == 0)
loss = cap_sub(TRAIN_MAX, l4o);
else
loss = cap_sub(TRAIN_MIN, l4o);
if(ts == 0){layerStat();}
if(ts == 0){printf("[%u] [%i] 333: %i (%i)\n", e, ts, l4o, loss);}
else {printf("[%u] [%i] not: %i (%i)\n", e, ts, l4o, loss);}
// ---
// --------
// ---
// backpass calc error/gradient buffers
signed int ler; // storing cumulative error per layer
// ---
// get cumulative error of single output unit (layer4)
ler = 0;
for(uint i = 0; i < UNITS; i++){ler += net.l4w[0][i] * loss;}
ler += net.l4b * loss;
// set unit error of lower layer3 units
signed int l3e[UNITS];
for(uint i = 0; i < UNITS; i++){l3e[i] = ReLU_D(l3o[i]) * ler;}
// ---
// get cumulative error of layer3 units
ler = 0;
for(uint i = 0; i < UNITS; i++)
{
for(uint j = 0; j < UNITS; j++)
ler += net.l3w[i][j] * l3e[i];
ler += net.l3b[i][0] * l3e[i];
}
// set each unit error of lower layer2 units
signed int l2e[UNITS];
for(uint i = 0; i < UNITS; i++){l2e[i] = ReLU_D(l2o[i]) * ler;}
// ---
// get cumulative error of layer2 units
ler = 0;
for(uint i = 0; i < UNITS; i++)
{
for(uint j = 0; j < UNITS; j++)
ler += net.l2w[i][j] * l2e[i];
ler += net.l2b[i][0] * l2e[i];
}
// set unit error of lower layer1 units
signed int l1e[UNITS];
for(uint i = 0; i < UNITS; i++){l1e[i] = ReLU_D(l1o[i]) * ler;}
// ---
// --------
// ---
// forwardpass nudge weights based on error/gradient sign
for(uint i = 0; i < UNITS; i++) // layer1
{
for(uint j = 0; j < 3; j++)
{
// nudge weight by sign direction
sint8 ns = RPROP(input[j], l1e[i]);
if(ns != l1ws[i][j]){l1ws[i][j] = ns;} // if different update sign
else{net.l1w[i][j] = cap_add(net.l1w[i][j], ns);} // if same move in direction of gradient
// nudge bias by sign direction
ns = RPROP(1, l1e[i]);
if(ns != l1bs[i][0]){l1bs[i][0] = ns;} // if different update sign
else{net.l1b[i][j] = cap_add(net.l1b[i][j], ns);} // if same move in direction of gradient
}
}
for(uint i = 0; i < UNITS; i++) // layer2
{
for(uint j = 0; j < UNITS; j++)
{
// nudge weight by sign direction
sint8 ns = RPROP(l1o[j], l2e[i]);
if(ns != l2ws[i][j]){l2ws[i][j] = ns;} // if different update sign
else{net.l2w[i][j] = cap_add(net.l2w[i][j], ns);} // if same move in direction of gradient
// nudge bias by sign direction
ns = RPROP(1, l2e[i]);
if(ns != l2bs[i][0]){l2bs[i][0] = ns;} // if different update sign
else{net.l2b[i][j] = cap_add(net.l2b[i][j], ns);} // if same move in direction of gradient
}
}
for(uint i = 0; i < UNITS; i++) // layer3
{
for(uint j = 0; j < UNITS; j++)
{
// nudge weight by sign direction
sint8 ns = RPROP(l2o[j], l3e[i]);
if(ns != l3ws[i][j]){l3ws[i][j] = ns;} // if different update sign
else{net.l3w[i][j] = cap_add(net.l3w[i][j], ns);} // if same move in direction of gradient
// nudge bias by sign direction
ns = RPROP(1, l3e[i]);
if(ns != l3bs[i][0]){l3bs[i][0] = ns;} // if different update sign
else{net.l3b[i][j] = cap_add(net.l3b[i][j], ns);} // if same move in direction of gradient
}
}
for(uint i = 0; i < UNITS; i++) // layer4 (output)
{
// nudge weight by sign direction
sint8 ns = RPROP(l3o[i], loss);
if(ns != l4ws[0][i]){l4ws[0][i] = ns;} // if different update sign
else{net.l4w[0][i] = cap_add(net.l4w[0][i], ns);} // if same move in direction of gradient
// nudge bias by sign direction
ns = RPROP(1, loss);
if(ns != l4bs){l4bs = ns;} // if different update sign
else{net.l4b = cap_add(net.l4b, ns);} // if same move in direction of gradient
}
// ---
// --------
// ---
// Auto kill if its stops optimising after x epoches
static sint8 laslos = 0, laslosc = 0;
if(ts == 1)
{
if(loss == laslos){laslosc++;}
else
{
laslos = loss;
laslosc = 0;
}
if(laslosc > 9){return 0;}
}
// input flip switch
ts = 1 - ts;
if(ts == 0){puts("");}
}
// done
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment