Skip to content

Instantly share code, notes, and snippets.

@espiritusanti
Created October 2, 2019 02:22
Show Gist options
  • Save espiritusanti/b7485c68a06ef2c8c76d8c62c8c39d8f to your computer and use it in GitHub Desktop.
Save espiritusanti/b7485c68a06ef2c8c76d8c62c8c39d8f to your computer and use it in GitHub Desktop.
//
// main.cpp
// NeuralNetwork
//
// Created by Santiago Becerra on 9/15/19.
// Copyright © 2019 Santiago Becerra. All rights reserved.
//
//
#include <iostream>
#include <list>
#include <cstdlib>
#include <math.h>
// Simple network that can learn XOR
// Feartures : sigmoid activation function, stochastic gradient descent, and mean square error fuction
// Potential improvements :
// Different activation functions
// Batch training
// Different error funnctions
// Arbitrary number of hidden layers
// Read training end test data from a file
// Add visualization of training
// Add recurrence? (maybe that should be a separate project)
double sigmoid(double x) { return 1 / (1 + exp(-x)); }
double dSigmoid(double x) { return x * (1 - x); }
double init_weight() { return ((double)rand())/((double)RAND_MAX); }
void shuffle(int *array, size_t n)
{
if (n > 1)
{
size_t i;
for (i = 0; i < n - 1; i++)
{
size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
int t = array[j];
array[j] = array[i];
array[i] = t;
}
}
}
int main(int argc, const char * argv[]) {
static const int numInputs = 2;
static const int numHiddenNodes = 2;
static const int numOutputs = 1;
const double lr = 0.1f;
double hiddenLayer[numHiddenNodes];
double outputLayer[numOutputs];
double hiddenLayerBias[numHiddenNodes];
double outputLayerBias[numOutputs];
double hiddenWeights[numInputs][numHiddenNodes];
double outputWeights[numHiddenNodes][numOutputs];
static const int numTrainingSets = 4;
double training_inputs[numTrainingSets][numInputs] = { {0.0f,0.0f},{1.0f,0.0f},{0.0f,1.0f},{1.0f,1.0f} };
double training_outputs[numTrainingSets][numOutputs] = { {0.0f},{1.0f},{1.0f},{0.0f} };
for (int i=0; i<numInputs; i++) {
for (int j=0; j<numHiddenNodes; j++) {
hiddenWeights[i][j] = init_weight();
}
}
for (int i=0; i<numHiddenNodes; i++) {
hiddenLayerBias[i] = init_weight();
for (int j=0; j<numOutputs; j++) {
outputWeights[i][j] = init_weight();
}
}
for (int i=0; i<numOutputs; i++) {
outputLayerBias[i] = init_weight();
}
int trainingSetOrder[] = {0,1,2,3};
for (int n=0; n < 10000; n++) {
shuffle(trainingSetOrder,numTrainingSets);
for (int x=0; x<numTrainingSets; x++) {
int i = trainingSetOrder[x];
// Forward pass
for (int j=0; j<numHiddenNodes; j++) {
double activation=hiddenLayerBias[j];
for (int k=0; k<numInputs; k++) {
activation+=training_inputs[i][k]*hiddenWeights[k][j];
}
hiddenLayer[j] = sigmoid(activation);
}
for (int j=0; j<numOutputs; j++) {
double activation=outputLayerBias[j];
for (int k=0; k<numHiddenNodes; k++) {
activation+=hiddenLayer[k]*outputWeights[k][j];
}
outputLayer[j] = sigmoid(activation);
}
std::cout << "Input:" << training_inputs[i][0] << " " << training_inputs[i][1] << " Output:" << outputLayer[0] << " Expected Output: " << training_outputs[i][0] << "\n";
// Backprop
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[i][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dSigmoid(outputLayer[j]);
}
double deltaHidden[numHiddenNodes];
for (int j=0; j<numHiddenNodes; j++) {
double errorHidden = 0.0f;
for(int k=0; k<numOutputs; k++) {
errorHidden+=deltaOutput[k]*outputWeights[j][k];
}
deltaHidden[j] = errorHidden*dSigmoid(hiddenLayer[j]);
}
for (int j=0; j<numOutputs; j++) {
outputLayerBias[j] += deltaOutput[j]*lr;
for (int k=0; k<numHiddenNodes; k++) {
outputWeights[k][j]+=hiddenLayer[k]*deltaOutput[j]*lr;
}
}
for (int j=0; j<numHiddenNodes; j++) {
hiddenLayerBias[j] += deltaHidden[j]*lr;
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[i][k]*deltaHidden[j]*lr;
}
}
}
}
// Print weights
std::cout << "Final Hidden Weights\n[ ";
for (int j=0; j<numHiddenNodes; j++) {
std::cout << "[ ";
for(int k=0; k<numInputs; k++) {
std::cout << hiddenWeights[k][j] << " ";
}
std::cout << "] ";
}
std::cout << "]\n";
std::cout << "Final Hidden Biases\n[ ";
for (int j=0; j<numHiddenNodes; j++) {
std::cout << hiddenLayerBias[j] << " ";
}
std::cout << "]\n";
std::cout << "Final Output Weights";
for (int j=0; j<numOutputs; j++) {
std::cout << "[ ";
for (int k=0; k<numHiddenNodes; k++) {
std::cout << outputWeights[k][j] << " ";
}
std::cout << "]\n";
}
std::cout << "Final Output Biases\n[ ";
for (int j=0; j<numOutputs; j++) {
std::cout << outputLayerBias[j] << " ";
}
std::cout << "]\n";
return 0;
}
@zoliweiss
Copy link

Hi, is this correct?
double dSigmoid(double x) { return x * (1 - x); }
The derivative of the sigmoid should be sigmoid(x) * (1 + sigmoid(x))

@RayT76
Copy link

RayT76 commented Jun 13, 2020

Hi, is this correct?
double dSigmoid(double x) { return x * (1 - x); }
The derivative of the sigmoid should be sigmoid(x) * (1 + sigmoid(x))

Hi, I believe you and the code are correct. The Sigmoid function is called on the nodes in the Training cycle, then this is passed to the dSigmoid in the Backprop cycle. So the sigmoid function is done before passing it to dsigmoid function.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment