dyigitpolat/neural_logic.c

## neural_logic.c
#include "stdio.h"
#include "stdlib.h"
#include "math.h"
#include "time.h"

#define N 9

double in1, in2;
double w[N];

int train_loops = 100000;

double activation( double sum)
{
  return (tanh(sum) + 1.0) / 2.0;

  //ReLU Activation -not used right now.
  if( sum > 0)
    return sum;
  else
    return 0;
}

double a_out()
{
  return in1;
}

double b_out()
{
  return in2;
}

double c_out()
{
  double dot = a_out()*w[0] + b_out()*w[2] + w[6];
  return activation(dot);
}

double d_out()
{
  double dot = a_out()*w[1] + b_out()*w[3] + w[7];
  return activation(dot);
}

double y()
{
  double dot = c_out()*w[4] + d_out()*w[5] + w[8];
  return activation(dot);
}

double magnitude( const double* vector)
{
  int i;
  double magnitude = 0.0;
  for( i = 0; i < N; i++)
  {
    magnitude += vector[i]*vector[i]; //distance from the origin
  }

  return sqrt(magnitude);
}

void normalize( const double* vector, double* out)
{
  double mag;
  int i;

  mag = magnitude(vector);
  if( mag == 0.0)
  {
    for( i = 0; i < N; i++)
    {
      out[i] = 0; //make its magnitude 1.
    }
    return;
  }

  for( i = 0; i < N; i++)
  {
    out[i] = vector[i] / mag; //make its magnitude 1.
  }

}

void scale( double* c, const double* a, double b) //c = a * b
{
  int i;
  for( i = 0; i < N; i++)
  {
    c[i] = a[i] * b;
  }
}

void subtract( double* c, const double* a, const double* b) //c = a - b
{
  int i;
  for( i = 0; i < N; i++)
  {
    c[i] = a[i] - b[i];
  }
}

void add( double* c, const double* a, const double* b) //c = a - b
{
  int i;
  for( i = 0; i < N; i++)
  {
    c[i] = a[i] + b[i];
  }
}

void copy( double* a, const double* b) //a = b
{
  int i;
  for( i = 0; i < N; i++)
  {
    a[i] =  b[i]; //distance from the origin
  }
}

void print( double* vec)
{
  int i;
  for( i = 0; i < N; i++)
  {
    printf( "%d: %f \n", i, vec[i]);
  }
  printf( "\n");
}

void init_network()
{
  int i;
  for( i = 0; i < N; i++)
  {
    if( rand() % 2)
      w[i] = ((rand() % 10000) * 0.0002) * 0.02 + 0.2;
    else
      w[i] = -((rand() % 10000) * 0.0002) * 0.02 - 0.2;
  }

  printf( "network initialized. \n");
}

double compute_loss( double target)
{
  if( ( y() - target)*( y() - target) > 0.1)
    return ( y() - target)*( y() - target);
  else
    return 0;
}

 //TODO:
void compute_backprop_gradient( double* gradient, double target)
{

}


void compute_ReLU_gradient( double* gradient, double target)
{
  gradient[0] = (w[0] > 0 ? in1*w[4] : 0);
  gradient[1] = (w[1] > 0 ? in1*w[4] : 0);
  gradient[2] = (w[2] > 0 ? in2*w[5] : 0);
  gradient[3] = (w[3] > 0 ? in2*w[5] : 0);
  gradient[4] = (w[4] > 0 ? in1*w[0] + in1*w[1] : 0);
  gradient[5] = (w[5] > 0 ? in2*w[2] + in2*w[3] : 0);

  if( y() - target > 0)
    scale( gradient, gradient, -1.0);
}


void compute_numerical_gradient( double* gradient, double target)
{
  double temp[N];
  double temp2[N];
  double prev_loss;
  double d;

  int i;
  prev_loss = compute_loss( target);
  //printf(">>>%f\n", prev_loss);
  d = 0.00000001;
  for( i = 0; i < N; i++)
  {
    copy( temp2, w);

    copy( temp, w);
    temp[i] += d;
    copy( w, temp);

    gradient[i] = compute_loss( target) - prev_loss;

    copy( w, temp2);
  }

}

void compute_gradient( double* gradient, double target)
{
  compute_numerical_gradient( gradient, target);
}

void train( int gate)
{
  double gradient[N];
  double learning_rate;

  int i;
  learning_rate = 0.00000001;
  for( i = 0; i < train_loops; i++)
  {
    int r = rand();
    int a = r % 2;
    int b = ( r >> 1) % 2;
    int c = 0;
    switch( gate)
    {
      case 0: c = (a&b); break;
      case 1: c = (a|b); break;
      case 2: c = (a^b); break;
      case 3: c = (~(a&b) & 0x0001); break;
      case 4: c = (~(a|b) & 0x0001); break;
      case 5: c = (~(a^b) & 0x0001); break;
      default: c = (a&b); break;
    }

    in1 = a;
    in2 = b;
    double target = c;

    compute_gradient( gradient, target);
    scale( gradient, gradient, learning_rate);
    normalize( gradient, gradient);
    subtract( w, w, gradient);
  }
}

void test_logic()
{
  int i;
  for( i = 0; i < 4; ++i)
  {
    int a = i % 2;
    int b = ( i >> 1) % 2;
    in1 = a;
    in2 = b;

    int c = 0;
    if( y() > 0.5)
    {
      c = 1;
    }

    printf( "%d %d: %d ( Y = %f) \n", a, b, c, y());
  }

}

int main( int argc, char** argv)
{
  srand(time(NULL));
  printf( "\n");

  init_network();
  test_logic();
  printf( "untrained test complete. \n\n\n");

  int i;
  for( i = 0; i < 6; i++)
  {
    init_network();
    print(w);

    train( i);
    switch( i)
    {
      case 0: printf( "trained AND gate. \n"); break;
      case 1: printf( "trained OR gate. \n"); break;
      case 2: printf( "trained XOR gate. \n"); break;
      case 3: printf( "trained NAND gate. \n"); break;
      case 4: printf( "trained NOR gate. \n"); break;
      case 5: printf( "trained XNOR gate. \n"); break;
      default: printf( "trained 0 gate. \n"); break;
    }
    print(w);

    test_logic();
    printf( "test complete. \n\n");
  }

  return 0;
}
	#include "stdio.h"
	#include "stdlib.h"
	#include "math.h"
	#include "time.h"

	#define N 9

	double in1, in2;
	double w[N];

	int train_loops = 100000;

	double activation( double sum)
	{
	return (tanh(sum) + 1.0) / 2.0;

	//ReLU Activation -not used right now.
	if( sum > 0)
	return sum;
	else
	return 0;
	}

	double a_out()
	{
	return in1;
	}

	double b_out()
	{
	return in2;
	}

	double c_out()
	{
	double dot = a_out()w[0] + b_out()w[2] + w[6];
	return activation(dot);
	}

	double d_out()
	{
	double dot = a_out()w[1] + b_out()w[3] + w[7];
	return activation(dot);
	}

	double y()
	{
	double dot = c_out()w[4] + d_out()w[5] + w[8];
	return activation(dot);
	}

	double magnitude( const double* vector)
	{
	int i;
	double magnitude = 0.0;
	for( i = 0; i < N; i++)
	{
	magnitude += vector[i]*vector[i]; //distance from the origin
	}

	return sqrt(magnitude);
	}

	void normalize( const double* vector, double* out)
	{
	double mag;
	int i;

	mag = magnitude(vector);
	if( mag == 0.0)
	{
	for( i = 0; i < N; i++)
	{
	out[i] = 0; //make its magnitude 1.
	}
	return;
	}

	for( i = 0; i < N; i++)
	{
	out[i] = vector[i] / mag; //make its magnitude 1.
	}

	}

	void scale( double* c, const double* a, double b) //c = a * b
	{
	int i;
	for( i = 0; i < N; i++)
	{
	c[i] = a[i] * b;
	}
	}

	void subtract( double* c, const double* a, const double* b) //c = a - b
	{
	int i;
	for( i = 0; i < N; i++)
	{
	c[i] = a[i] - b[i];
	}
	}

	void add( double* c, const double* a, const double* b) //c = a - b
	{
	int i;
	for( i = 0; i < N; i++)
	{
	c[i] = a[i] + b[i];
	}
	}

	void copy( double* a, const double* b) //a = b
	{
	int i;
	for( i = 0; i < N; i++)
	{
	a[i] = b[i]; //distance from the origin
	}
	}

	void print( double* vec)
	{
	int i;
	for( i = 0; i < N; i++)
	{
	printf( "%d: %f \n", i, vec[i]);
	}
	printf( "\n");
	}

	void init_network()
	{
	int i;
	for( i = 0; i < N; i++)
	{
	if( rand() % 2)
	w[i] = ((rand() % 10000) * 0.0002) * 0.02 + 0.2;
	else
	w[i] = -((rand() % 10000) * 0.0002) * 0.02 - 0.2;
	}

	printf( "network initialized. \n");
	}

	double compute_loss( double target)
	{
	if( ( y() - target)*( y() - target) > 0.1)
	return ( y() - target)*( y() - target);
	else
	return 0;
	}

	//TODO:
	void compute_backprop_gradient( double* gradient, double target)
	{

	}


	void compute_ReLU_gradient( double* gradient, double target)
	{
	gradient[0] = (w[0] > 0 ? in1*w[4] : 0);
	gradient[1] = (w[1] > 0 ? in1*w[4] : 0);
	gradient[2] = (w[2] > 0 ? in2*w[5] : 0);
	gradient[3] = (w[3] > 0 ? in2*w[5] : 0);
	gradient[4] = (w[4] > 0 ? in1w[0] + in1w[1] : 0);
	gradient[5] = (w[5] > 0 ? in2w[2] + in2w[3] : 0);

	if( y() - target > 0)
	scale( gradient, gradient, -1.0);
	}


	void compute_numerical_gradient( double* gradient, double target)
	{
	double temp[N];
	double temp2[N];
	double prev_loss;
	double d;

	int i;
	prev_loss = compute_loss( target);
	//printf(">>>%f\n", prev_loss);
	d = 0.00000001;
	for( i = 0; i < N; i++)
	{
	copy( temp2, w);

	copy( temp, w);
	temp[i] += d;
	copy( w, temp);

	gradient[i] = compute_loss( target) - prev_loss;

	copy( w, temp2);
	}

	}

	void compute_gradient( double* gradient, double target)
	{
	compute_numerical_gradient( gradient, target);
	}

	void train( int gate)
	{
	double gradient[N];
	double learning_rate;

	int i;
	learning_rate = 0.00000001;
	for( i = 0; i < train_loops; i++)
	{
	int r = rand();
	int a = r % 2;
	int b = ( r >> 1) % 2;
	int c = 0;
	switch( gate)
	{
	case 0: c = (a&b); break;
	case 1: c = (a\|b); break;
	case 2: c = (a^b); break;
	case 3: c = (~(a&b) & 0x0001); break;
	case 4: c = (~(a\|b) & 0x0001); break;
	case 5: c = (~(a^b) & 0x0001); break;
	default: c = (a&b); break;
	}

	in1 = a;
	in2 = b;
	double target = c;

	compute_gradient( gradient, target);
	scale( gradient, gradient, learning_rate);
	normalize( gradient, gradient);
	subtract( w, w, gradient);
	}
	}

	void test_logic()
	{
	int i;
	for( i = 0; i < 4; ++i)
	{
	int a = i % 2;
	int b = ( i >> 1) % 2;
	in1 = a;
	in2 = b;

	int c = 0;
	if( y() > 0.5)
	{
	c = 1;
	}

	printf( "%d %d: %d ( Y = %f) \n", a, b, c, y());
	}

	}

	int main( int argc, char** argv)
	{
	srand(time(NULL));
	printf( "\n");

	init_network();
	test_logic();
	printf( "untrained test complete. \n\n\n");

	int i;
	for( i = 0; i < 6; i++)
	{
	init_network();
	print(w);

	train( i);
	switch( i)
	{
	case 0: printf( "trained AND gate. \n"); break;
	case 1: printf( "trained OR gate. \n"); break;
	case 2: printf( "trained XOR gate. \n"); break;
	case 3: printf( "trained NAND gate. \n"); break;
	case 4: printf( "trained NOR gate. \n"); break;
	case 5: printf( "trained XNOR gate. \n"); break;
	default: printf( "trained 0 gate. \n"); break;
	}
	print(w);

	test_logic();
	printf( "test complete. \n\n");
	}

	return 0;
	}