eqdw/gist:246938

## gistfile1.rb
class Node

  #sigmoid and derivitive of sigmoid. Might be useful
  def self.sigmoid(num)
    1.0 / (1 + Math.exp(-num) )
  end
  def self.sigderiv(num)
    return (Math.exp(-num) )  / ( (Math.exp(-num) + 1)**2)
  end


  #fan_in is a list of [node,weight] pairs representing the fan-in
  #to the node
  #input layer has input value in place of node
  attr_accessor :fan_in, :id

  def input
    sum = 0
    @fan_in.each do |elem|
      if elem[0].class == Float
        sum += (elem[0] * elem[1])
      else
        sum += elem[0].output * elem[1]
      end
    end
    sum
  end

  def output

    return Node.sigmoid(self.input)
  end


  def initialize(fan_in,id)
    @fan_in = fan_in
    @id = id
  end


end

#structure:
#
#    in0   n00
#          n01  n10  n20 --> out
#          n02  n11
#    in1   n03
#

#array with references to all nodes
@layers = []


#create first layer
@layers[0] = []

#creates 4 nodes, gives them sequential IDs, and initializes their input
#with dummy values
#                            (these will be overwritten)
4.times do |i| #          input 1   input 2
  @layers[0] << Node.new([[0.0,1.0],[0.0,1.0]], i)
end

@layers[1] = []

2.times do |i|
  i += 4 #to make the ids sequential
  tmp = []
  @layers[0].each do |j|
    tmp << [j, 1.0]
  end
  @layers[1] << Node.new(tmp, i)
end

#output node
@layers[2] = []
tmp = []
@layers[1].each do |i|
  tmp << [i,1.0]
end
@layers[2] << Node.new(tmp,6)


# at this point the network is set up. Calling Node::output on
# the final node (layers[2][0].output) will find the output of the
# network

#at this point, wrap a request for output with an aesthetically pleasing
#interface

def process(x0,x1)
  #set all the input layer nodes accordingly
  f0 = x0.to_f
  f1 = x1.to_f
  @layers[0].each do |node|
    #hard code since this NN is small
    node.fan_in[0][0] = f0
    node.fan_in[1][0] = f1
  end

  return @layers[2][0].output
end

##TEST
##puts process(1.0,1.0)

##puts "Sigmoid(1): #{Node.sigmoid(1)}"
##puts "out: #{Node.sigmoid(2 * Node.sigmoid( 4 * Node.sigmoid(2)))}"


#iterates the training algorithm one time
#actual algorithm used differs slightly from one stated in
#paper for ease of implementation.
#It is an equivalent form.
def train(x0,x1,y)
  #total error in output signal
  delta = y.to_f - process(x0,x1)
  #puts "delta: #{delta}"
  deltas = []

  #hard-coded as it's a small network and this makes the intention explicit

  #propagate fractional error back depending on relative weighting of
  #connection
  #
  deltas[4] = @layers[2][0].fan_in[0][1] * delta
  deltas[5] = @layers[2][0].fan_in[1][1] * delta

  deltas[0] = @layers[1][0].fan_in[0][1] * deltas[4] +
              @layers[1][1].fan_in[0][1] * deltas[5]

  deltas[1] = @layers[1][0].fan_in[1][1] * deltas[4] +
              @layers[1][1].fan_in[1][1] * deltas[5]

  deltas[2] = @layers[1][0].fan_in[2][1] * deltas[4] +
              @layers[1][1].fan_in[2][1] * deltas[5]

  deltas[3] = @layers[1][0].fan_in[3][1] * deltas[4] +
              @layers[1][1].fan_in[3][1] * deltas[5]


  #6.times do |i|
  #  puts "deltas[#{i}]: #{deltas[i]}"
  #end


  #adjust node 0's weights
  node = @layers[0][0]
  node.fan_in[0][1] = node.fan_in[0][1] + deltas[0]*x0*Node.sigderiv(node.input)
  node.fan_in[1][1] = node.fan_in[1][1] + deltas[0]*x1*Node.sigderiv(node.input)
  #node 1
  node = @layers[0][1]
  node.fan_in[0][1] = node.fan_in[0][1] + deltas[1]*x0*Node.sigderiv(node.input)
  node.fan_in[1][1] = node.fan_in[1][1] + deltas[1]*x1*Node.sigderiv(node.input)
  #node 2
  node = @layers[0][2]
  node.fan_in[0][1] = node.fan_in[0][1] + deltas[2]*x0*Node.sigderiv(node.input)
  node.fan_in[1][1] = node.fan_in[1][1] + deltas[2]*x1*Node.sigderiv(node.input)
  #node 3
  node = @layers[0][3]
  node.fan_in[0][1] = node.fan_in[0][1] + deltas[3]*x0*Node.sigderiv(node.input)
  node.fan_in[1][1] = node.fan_in[1][1] + deltas[3]*x1*Node.sigderiv(node.input)

  #middle layer
  #node 4
  node = @layers[1][0]
  node.fan_in[0][1] = node.fan_in[0][1] +
    deltas[4] * node.fan_in[0][0].output * Node.sigderiv(node.input)
  node.fan_in[1][1] = node.fan_in[1][1] +
    deltas[4] * node.fan_in[1][0].output * Node.sigderiv(node.input)
  node.fan_in[2][1] = node.fan_in[2][1] +
    deltas[4] * node.fan_in[2][0].output * Node.sigderiv(node.input)
  node.fan_in[3][1] = node.fan_in[3][1] +
    deltas[4] * node.fan_in[3][0].output * Node.sigderiv(node.input)
  #node5
  node = @layers[1][1]
  node.fan_in[0][1] = node.fan_in[0][1] +
    deltas[5] * node.fan_in[0][0].output * Node.sigderiv(node.input)
  node.fan_in[1][1] = node.fan_in[1][1] +
    deltas[5] * node.fan_in[1][0].output * Node.sigderiv(node.input)
  node.fan_in[2][1] = node.fan_in[2][1] +
    deltas[5] * node.fan_in[2][0].output * Node.sigderiv(node.input)
  node.fan_in[3][1] = node.fan_in[3][1] +
    deltas[5] * node.fan_in[3][0].output * Node.sigderiv(node.input)

  #output layer
  #node 6
  node = @layers[2][0]
  node.fan_in[0][1] = node.fan_in[0][1] +
    delta * node.fan_in[0][0].output * Node.sigderiv(node.input)
  node.fan_in[1][1] = node.fan_in[1][1] +
    delta * node.fan_in[1][0].output * Node.sigderiv(node.input)
  #after all that typing, all the weights _should_ be updated
end

XOR = [[0,0,0], [0,1,1], [1,0,1], [1,1,0] ]
100000.times do |i|
  puts "#{i}th iteration" if i%50==0
  k = rand(4)
  train(xor[k][0], xor[k][1], xor[k][2] )
end
	class Node

	#sigmoid and derivitive of sigmoid. Might be useful
	def self.sigmoid(num)
	1.0 / (1 + Math.exp(-num) )
	end
	def self.sigderiv(num)
	return (Math.exp(-num) ) / ( (Math.exp(-num) + 1)**2)
	end


	#fan_in is a list of [node,weight] pairs representing the fan-in
	#to the node
	#input layer has input value in place of node
	attr_accessor :fan_in, :id

	def input
	sum = 0
	@fan_in.each do \|elem\|
	if elem[0].class == Float
	sum += (elem[0] * elem[1])
	else
	sum += elem[0].output * elem[1]
	end
	end
	sum
	end

	def output

	return Node.sigmoid(self.input)
	end


	def initialize(fan_in,id)
	@fan_in = fan_in
	@id = id
	end


	end

	#structure:
	#
	# in0 n00
	# n01 n10 n20 --> out
	# n02 n11
	# in1 n03
	#

	#array with references to all nodes
	@layers = []


	#create first layer
	@layers[0] = []

	#creates 4 nodes, gives them sequential IDs, and initializes their input
	#with dummy values
	# (these will be overwritten)
	4.times do \|i\| # input 1 input 2
	@layers[0] << Node.new([[0.0,1.0],[0.0,1.0]], i)
	end

	@layers[1] = []

	2.times do \|i\|
	i += 4 #to make the ids sequential
	tmp = []
	@layers[0].each do \|j\|
	tmp << [j, 1.0]
	end
	@layers[1] << Node.new(tmp, i)
	end

	#output node
	@layers[2] = []
	tmp = []
	@layers[1].each do \|i\|
	tmp << [i,1.0]
	end
	@layers[2] << Node.new(tmp,6)


	# at this point the network is set up. Calling Node::output on
	# the final node (layers[2][0].output) will find the output of the
	# network

	#at this point, wrap a request for output with an aesthetically pleasing
	#interface

	def process(x0,x1)
	#set all the input layer nodes accordingly
	f0 = x0.to_f
	f1 = x1.to_f
	@layers[0].each do \|node\|
	#hard code since this NN is small
	node.fan_in[0][0] = f0
	node.fan_in[1][0] = f1
	end

	return @layers[2][0].output
	end

	##TEST
	##puts process(1.0,1.0)

	##puts "Sigmoid(1): #{Node.sigmoid(1)}"
	##puts "out: #{Node.sigmoid(2 * Node.sigmoid( 4 * Node.sigmoid(2)))}"


	#iterates the training algorithm one time
	#actual algorithm used differs slightly from one stated in
	#paper for ease of implementation.
	#It is an equivalent form.
	def train(x0,x1,y)
	#total error in output signal
	delta = y.to_f - process(x0,x1)
	#puts "delta: #{delta}"
	deltas = []

	#hard-coded as it's a small network and this makes the intention explicit

	#propagate fractional error back depending on relative weighting of
	#connection
	#
	deltas[4] = @layers[2][0].fan_in[0][1] * delta
	deltas[5] = @layers[2][0].fan_in[1][1] * delta

	deltas[0] = @layers[1][0].fan_in[0][1] * deltas[4] +
	@layers[1][1].fan_in[0][1] * deltas[5]

	deltas[1] = @layers[1][0].fan_in[1][1] * deltas[4] +
	@layers[1][1].fan_in[1][1] * deltas[5]

	deltas[2] = @layers[1][0].fan_in[2][1] * deltas[4] +
	@layers[1][1].fan_in[2][1] * deltas[5]

	deltas[3] = @layers[1][0].fan_in[3][1] * deltas[4] +
	@layers[1][1].fan_in[3][1] * deltas[5]


	#6.times do \|i\|
	# puts "deltas[#{i}]: #{deltas[i]}"
	#end


	#adjust node 0's weights
	node = @layers[0][0]
	node.fan_in[0][1] = node.fan_in[0][1] + deltas[0]x0Node.sigderiv(node.input)
	node.fan_in[1][1] = node.fan_in[1][1] + deltas[0]x1Node.sigderiv(node.input)
	#node 1
	node = @layers[0][1]
	node.fan_in[0][1] = node.fan_in[0][1] + deltas[1]x0Node.sigderiv(node.input)
	node.fan_in[1][1] = node.fan_in[1][1] + deltas[1]x1Node.sigderiv(node.input)
	#node 2
	node = @layers[0][2]
	node.fan_in[0][1] = node.fan_in[0][1] + deltas[2]x0Node.sigderiv(node.input)
	node.fan_in[1][1] = node.fan_in[1][1] + deltas[2]x1Node.sigderiv(node.input)
	#node 3
	node = @layers[0][3]
	node.fan_in[0][1] = node.fan_in[0][1] + deltas[3]x0Node.sigderiv(node.input)
	node.fan_in[1][1] = node.fan_in[1][1] + deltas[3]x1Node.sigderiv(node.input)

	#middle layer
	#node 4
	node = @layers[1][0]
	node.fan_in[0][1] = node.fan_in[0][1] +
	deltas[4] * node.fan_in[0][0].output * Node.sigderiv(node.input)
	node.fan_in[1][1] = node.fan_in[1][1] +
	deltas[4] * node.fan_in[1][0].output * Node.sigderiv(node.input)
	node.fan_in[2][1] = node.fan_in[2][1] +
	deltas[4] * node.fan_in[2][0].output * Node.sigderiv(node.input)
	node.fan_in[3][1] = node.fan_in[3][1] +
	deltas[4] * node.fan_in[3][0].output * Node.sigderiv(node.input)
	#node5
	node = @layers[1][1]
	node.fan_in[0][1] = node.fan_in[0][1] +
	deltas[5] * node.fan_in[0][0].output * Node.sigderiv(node.input)
	node.fan_in[1][1] = node.fan_in[1][1] +
	deltas[5] * node.fan_in[1][0].output * Node.sigderiv(node.input)
	node.fan_in[2][1] = node.fan_in[2][1] +
	deltas[5] * node.fan_in[2][0].output * Node.sigderiv(node.input)
	node.fan_in[3][1] = node.fan_in[3][1] +
	deltas[5] * node.fan_in[3][0].output * Node.sigderiv(node.input)

	#output layer
	#node 6
	node = @layers[2][0]
	node.fan_in[0][1] = node.fan_in[0][1] +
	delta * node.fan_in[0][0].output * Node.sigderiv(node.input)
	node.fan_in[1][1] = node.fan_in[1][1] +
	delta * node.fan_in[1][0].output * Node.sigderiv(node.input)
	#after all that typing, all the weights _should_ be updated
	end

	XOR = [[0,0,0], [0,1,1], [1,0,1], [1,1,0] ]
	100000.times do \|i\|
	puts "#{i}th iteration" if i%50==0
	k = rand(4)
	train(xor[k][0], xor[k][1], xor[k][2] )
	end