yusugomori/SdA.go

## SdA.go
package main

import (
	"fmt"
	"math"
	"math/rand"
)


func uniform(min float64, max float64) float64 {
	return rand.Float64() * (max - min) + min
}

func binomial(n int, p float64) int {
	if p < 0 || p > 1 { return 0 }

	c := 0
	var r float64

	for i := 0; i < n; i++ {
		r = rand.Float64()
		if r < p { c++ }
	}

	return c
}

func sigmoid(x float64) float64 {
	return 1.0 / (1.0 + math.Exp(-x))
}


type SdA struct {
	N int
	n_ins int
	hidden_layer_sizes []int
	n_outs int
	n_layers int
	sigmoid_layers []HiddenLayer
	dA_layers []dA
	log_layer LogisticRegression
}

type HiddenLayer struct {
	N int
	n_in int
	n_out int
	W [][]float64
	b []float64
}

type dA struct {
	N int
	n_visible int
	n_hidden int
	W [][]float64
	hbias []float64
	vbias []float64
}

type LogisticRegression struct {
	N int
	n_in int
	n_out int
	W [][]float64
	b []float64
}


// SdA
func SdA__construct(this *SdA, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
	var input_size int

	this.N = N
	this.n_ins = n_ins
	this.hidden_layer_sizes = hidden_layer_sizes
	this.n_outs = n_outs
	this.n_layers = n_layers

	this.sigmoid_layers = make([]HiddenLayer, n_layers)
	this.dA_layers = make([]dA, n_layers)

	// construct multi-layer
	for i := 0; i < n_layers; i++ {
		if i == 0 {
			input_size = n_ins
		} else {
			input_size = hidden_layer_sizes[i-1]
		}

		// construct sigmoid_layer
		HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)

		// construct dA_layer
		dA__construct(&(this.dA_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, nil)
	}

	// layer for output using LogisticRegression
	LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
}

func SdA_pretrain(this *SdA, train_X [][]int, lr float64, corruption_level float64, epochs int) {
	var (
		layer_input []int
		prev_layer_input_size int
		prev_layer_input []int
	)
	for i := 0; i < this.n_layers; i++ {	// layer-wise
		for epoch := 0; epoch < epochs; epoch++ {	 // training epochs
			for n := 0; n < this.N; n++ {	 // input x1...xN

				// layer input
				for l := 0; l <= i; l++ {
					if l == 0 {
						layer_input = make([]int, this.n_ins)
						for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
					} else {
						if l == 1 {
							prev_layer_input_size = this.n_ins
						} else {
							prev_layer_input_size = this.hidden_layer_sizes[l-2]
						}

						prev_layer_input = make([]int, prev_layer_input_size)
						for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }

						layer_input = make([]int, this.hidden_layer_sizes[l-1])

						HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
					}
				}

				dA_train(&(this.dA_layers[i]), layer_input, lr, corruption_level)
			}
		}
	}
}

func SdA_finetune(this *SdA, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
	var (
		layer_input []int
		prev_layer_input []int
	)

	for epoch := 0; epoch < epochs; epoch++ {
		for n := 0; n < this.N; n++ {	 // input x1...xN

			// layer input
			for i := 0; i < this.n_layers; i++ {
				if i == 0 {
					prev_layer_input = make([]int, this.n_ins)
					for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
				} else {
					prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
					for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
				}

				layer_input = make([]int, this.hidden_layer_sizes[i])
				HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
			}

			LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
		}
		// lr *= 0.95
	}
}

func SdA_predict(this *SdA, x []int, y []float64) {
	var (
		layer_input []float64
	)
	prev_layer_input := make([]float64, this.n_ins)
	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }

	// layer activation
	for i := 0; i < this.n_layers; i++ {
		layer_input = make([]float64, this.sigmoid_layers[i].n_out)

		for k := 0; k < this.sigmoid_layers[i].n_out; k++ {
			linear_outuput := 0.0

			for j := 0; j < this.sigmoid_layers[i].n_in; j++ {
				linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
			}
			linear_outuput += this.sigmoid_layers[i].b[k]
			layer_input[k] = sigmoid(linear_outuput)
		}

		if i < this.n_layers-1 {
			prev_layer_input = make([]float64, this.sigmoid_layers[i].n_out)

			for j := 0; j < this.sigmoid_layers[i].n_out; j++ {
				prev_layer_input[j] = layer_input[j]
			}
		}
	}

	for i := 0; i < this.log_layer.n_out; i++ {
		y[i] = 0
		for j := 0; j < this.log_layer.n_in; j++ {
			y[i] += this.log_layer.W[i][j] * layer_input[j]
		}
		y[i] += this.log_layer.b[i]
	}

	LogisticRegression_softmax(&(this.log_layer), y)
}

// HiddenLayer
func HiddenLayer__construct(this *HiddenLayer, N int, n_in int, n_out int, W [][]float64, b []float64) {
	a := 1.0 / float64(n_in)

	this.N = N
	this.n_in = n_in
	this.n_out = n_out

	if W == nil {
		this.W = make([][]float64, n_out)
		for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }

		for i := 0; i < n_out; i++ {
			for j := 0; j < n_in; j++ {
				this.W[i][j] = uniform(-a, a)
			}
		}
	} else {
		this.W = W
	}

	if b == nil {
		this.b = make([]float64, n_out)
	} else {
		this.b = b
	}
}

func HiddenLayer_output(this *HiddenLayer, input []int, w []float64, b float64) float64 {
	linear_output := 0.0

	for j := 0; j < this.n_in; j++ {
		linear_output += w[j] * float64(input[j])
	}
	linear_output += b

	return sigmoid(linear_output)
}

func HiddenLayer_sample_h_given_v(this *HiddenLayer, input []int, sample []int) {
	for i := 0; i < this.n_out; i++ {
		sample[i] = binomial(1, HiddenLayer_output(this, input, this.W[i], this.b[i]))
	}
}

// dA
func dA__construct(this *dA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
	a := 1.0 / float64(n_visible)

	this.N = N
	this.n_visible = n_visible
	this.n_hidden = n_hidden

	if W == nil {
		this.W = make([][]float64, n_hidden)
		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }

		for i := 0; i < n_hidden; i++ {
			for j := 0; j < n_visible; j++ {
				this.W[i][j] = uniform(-a, a)
			}
		}
	} else {
		this.W = W
	}

	if hbias == nil {
		this.hbias = make([]float64, n_hidden)
	} else {
		this.hbias = hbias
	}

	if vbias == nil {
		this.vbias = make([]float64, n_visible)
	} else {
		this.vbias = vbias
	}
}

func dA_get_corrupted_input(this *dA, x []int, tilde_x []int, p float64) {
	for i := 0; i < this.n_visible; i++ {
		if x[i] == 0 {
			tilde_x[i] = 0
		} else {
			tilde_x[i] = binomial(1, p)
		}
	}
}

// Encode
func dA_get_hidden_values(this *dA, x []int, y []float64) {
	for i := 0; i < this.n_hidden; i++ {
		y[i] = 0
		for j := 0; j < this.n_visible; j++ {
			y[i] += this.W[i][j] * float64(x[j])
		}
		y[i] += this.hbias[i]
		y[i] = sigmoid(y[i])
	}
}

// Decode
func dA_get_reconstructed_input(this *dA, y []float64, z []float64) {
	for i := 0; i < this.n_visible; i++ {
		z[i] = 0
		for j := 0; j < this.n_hidden; j++ {
			z[i] += this.W[j][i] * y[j]
		}
		z[i] += this.vbias[i]
		z[i] = sigmoid(z[i])
	}
}

func dA_train(this *dA, x []int, lr float64, corruption_level float64) {
	tilde_x := make([]int, this.n_visible)
	y := make([]float64, this.n_hidden)
	z := make([]float64, this.n_visible)

	L_vbias := make([]float64, this.n_visible)
	L_hbias := make([]float64, this.n_hidden)

	p := 1 - corruption_level

	dA_get_corrupted_input(this, x, tilde_x, p)
	dA_get_hidden_values(this, tilde_x, y)
	dA_get_reconstructed_input(this, y, z)

	// vbias
	for i := 0; i < this.n_visible; i++ {
		L_vbias[i] = float64(x[i]) - z[i]
		this.vbias[i] += lr * L_vbias[i] / float64(this.N)
	}

	// hbias
	for i := 0; i < this.n_hidden; i++ {
		L_hbias[i] = 0
		for j := 0; j < this.n_visible; j++ {
			L_hbias[i] += this.W[i][j] * L_vbias[j]
		}
		L_hbias[i] *= y[i] * (1- y[i])
		this.hbias[i] += lr * L_hbias[i] / float64(this.N)
	}

	// W
	for i := 0; i < this.n_hidden; i++ {
		for j := 0; j < this.n_visible; j++ {
			this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
		}
	}
}

func dA_reconstruct(this *dA, x []int, z []float64) {
	y := make([]float64, this.n_hidden)

	dA_get_hidden_values(this, x, y)
	dA_get_reconstructed_input(this, y, z)
}

// LogisticRegression
func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
	this.N = N
	this.n_in = n_in
	this.n_out = n_out

	this.W = make([][]float64, n_out)
	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }

	this.b = make([]float64, n_out)
}

func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
	p_y_given_x := make([]float64, this.n_out)
	dy := make([]float64, this.n_out)

	for i := 0; i < this.n_out; i++ {
		p_y_given_x[i] = 0
		for j := 0; j < this.n_in; j++ {
			p_y_given_x[i] += this.W[i][j] * float64(x[j])
		}
		p_y_given_x[i] += this.b[i]
	}
	LogisticRegression_softmax(this, p_y_given_x)

	for i := 0; i < this.n_out; i++ {
		dy[i] = float64(y[i]) - p_y_given_x[i]

		for j := 0; j < this.n_in; j++ {
			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
		}

		this.b[i] += lr * dy[i] / float64(this.N)
	}

}

func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
	var (
		max float64
		sum float64
	)

	for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
	for i := 0; i < this.n_out; i++ {
		x[i] = math.Exp(x[i] - max)
		sum += x[i]
	}

	for i := 0; i < this.n_out; i++ { x[i] /= sum }
}

func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
	for i := 0; i < this.n_out; i++ {
		y[i] = 0
		for j := 0; j < this.n_in; j++ {
			y[i] += this.W[i][j] * float64(x[j])
		}
		y[i] += this.b[i]
	}

	LogisticRegression_softmax(this, y)
}


func test_SdA() {
	rand.Seed(0)

	pretrain_lr := 0.1
	corruption_level := 0.3
	pretraining_epochs := 1000
	finetune_lr := 0.1
	finetune_epochs := 500

	train_N := 10
	test_N := 4
	n_ins := 28
	n_outs := 2
	hidden_layer_sizes := []int {15, 15}
	n_layers := len(hidden_layer_sizes)


	// training data
	train_X := [][]int {
		{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
	}

	train_Y := [][]int {
		{1, 0},
		{1, 0},
		{1, 0},
		{1, 0},
		{1, 0},
		{0, 1},
		{0, 1},
		{0, 1},
		{0, 1},
		{0, 1},
	}

	// construct SdA
	var sda SdA
	SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)

	// pretrain
	SdA_pretrain(&sda, train_X, pretrain_lr, corruption_level, pretraining_epochs)

	// finetune
	SdA_finetune(&sda, train_X, train_Y, finetune_lr, finetune_epochs)


  // test data
	test_X := [][]int {
		{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
	}

	test_Y := make([][]float64, test_N)
	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}

	// test
	for i := 0; i < test_N; i++ {
		SdA_predict(&sda, test_X[i], test_Y[i])
		for j := 0; j < n_outs; j++ {
			fmt.Printf("%.5f ", test_Y[i][j])
		}
		fmt.Printf("\n")
	}
}


func main() {
	test_SdA()
}
	package main

	import (
	"fmt"
	"math"
	"math/rand"
	)


	func uniform(min float64, max float64) float64 {
	return rand.Float64() * (max - min) + min
	}

	func binomial(n int, p float64) int {
	if p < 0 \|\| p > 1 { return 0 }

	c := 0
	var r float64

	for i := 0; i < n; i++ {
	r = rand.Float64()
	if r < p { c++ }
	}

	return c
	}

	func sigmoid(x float64) float64 {
	return 1.0 / (1.0 + math.Exp(-x))
	}


	type SdA struct {
	N int
	n_ins int
	hidden_layer_sizes []int
	n_outs int
	n_layers int
	sigmoid_layers []HiddenLayer
	dA_layers []dA
	log_layer LogisticRegression
	}

	type HiddenLayer struct {
	N int
	n_in int
	n_out int
	W [][]float64
	b []float64
	}

	type dA struct {
	N int
	n_visible int
	n_hidden int
	W [][]float64
	hbias []float64
	vbias []float64
	}

	type LogisticRegression struct {
	N int
	n_in int
	n_out int
	W [][]float64
	b []float64
	}


	// SdA
	func SdA__construct(this *SdA, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
	var input_size int

	this.N = N
	this.n_ins = n_ins
	this.hidden_layer_sizes = hidden_layer_sizes
	this.n_outs = n_outs
	this.n_layers = n_layers

	this.sigmoid_layers = make([]HiddenLayer, n_layers)
	this.dA_layers = make([]dA, n_layers)

	// construct multi-layer
	for i := 0; i < n_layers; i++ {
	if i == 0 {
	input_size = n_ins
	} else {
	input_size = hidden_layer_sizes[i-1]
	}

	// construct sigmoid_layer
	HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)

	// construct dA_layer
	dA__construct(&(this.dA_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, nil)
	}

	// layer for output using LogisticRegression
	LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
	}

	func SdA_pretrain(this *SdA, train_X [][]int, lr float64, corruption_level float64, epochs int) {
	var (
	layer_input []int
	prev_layer_input_size int
	prev_layer_input []int
	)
	for i := 0; i < this.n_layers; i++ { // layer-wise
	for epoch := 0; epoch < epochs; epoch++ { // training epochs
	for n := 0; n < this.N; n++ { // input x1...xN

	// layer input
	for l := 0; l <= i; l++ {
	if l == 0 {
	layer_input = make([]int, this.n_ins)
	for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
	} else {
	if l == 1 {
	prev_layer_input_size = this.n_ins
	} else {
	prev_layer_input_size = this.hidden_layer_sizes[l-2]
	}

	prev_layer_input = make([]int, prev_layer_input_size)
	for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }

	layer_input = make([]int, this.hidden_layer_sizes[l-1])

	HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
	}
	}

	dA_train(&(this.dA_layers[i]), layer_input, lr, corruption_level)
	}
	}
	}
	}

	func SdA_finetune(this *SdA, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
	var (
	layer_input []int
	prev_layer_input []int
	)

	for epoch := 0; epoch < epochs; epoch++ {
	for n := 0; n < this.N; n++ { // input x1...xN

	// layer input
	for i := 0; i < this.n_layers; i++ {
	if i == 0 {
	prev_layer_input = make([]int, this.n_ins)
	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
	} else {
	prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
	for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
	}

	layer_input = make([]int, this.hidden_layer_sizes[i])
	HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
	}

	LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
	}
	// lr *= 0.95
	}
	}

	func SdA_predict(this *SdA, x []int, y []float64) {
	var (
	layer_input []float64
	)
	prev_layer_input := make([]float64, this.n_ins)
	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }

	// layer activation
	for i := 0; i < this.n_layers; i++ {
	layer_input = make([]float64, this.sigmoid_layers[i].n_out)

	for k := 0; k < this.sigmoid_layers[i].n_out; k++ {
	linear_outuput := 0.0

	for j := 0; j < this.sigmoid_layers[i].n_in; j++ {
	linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
	}
	linear_outuput += this.sigmoid_layers[i].b[k]
	layer_input[k] = sigmoid(linear_outuput)
	}

	if i < this.n_layers-1 {
	prev_layer_input = make([]float64, this.sigmoid_layers[i].n_out)

	for j := 0; j < this.sigmoid_layers[i].n_out; j++ {
	prev_layer_input[j] = layer_input[j]
	}
	}
	}

	for i := 0; i < this.log_layer.n_out; i++ {
	y[i] = 0
	for j := 0; j < this.log_layer.n_in; j++ {
	y[i] += this.log_layer.W[i][j] * layer_input[j]
	}
	y[i] += this.log_layer.b[i]
	}

	LogisticRegression_softmax(&(this.log_layer), y)
	}

	// HiddenLayer
	func HiddenLayer__construct(this *HiddenLayer, N int, n_in int, n_out int, W [][]float64, b []float64) {
	a := 1.0 / float64(n_in)

	this.N = N
	this.n_in = n_in
	this.n_out = n_out

	if W == nil {
	this.W = make([][]float64, n_out)
	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }

	for i := 0; i < n_out; i++ {
	for j := 0; j < n_in; j++ {
	this.W[i][j] = uniform(-a, a)
	}
	}
	} else {
	this.W = W
	}

	if b == nil {
	this.b = make([]float64, n_out)
	} else {
	this.b = b
	}
	}

	func HiddenLayer_output(this *HiddenLayer, input []int, w []float64, b float64) float64 {
	linear_output := 0.0

	for j := 0; j < this.n_in; j++ {
	linear_output += w[j] * float64(input[j])
	}
	linear_output += b

	return sigmoid(linear_output)
	}

	func HiddenLayer_sample_h_given_v(this *HiddenLayer, input []int, sample []int) {
	for i := 0; i < this.n_out; i++ {
	sample[i] = binomial(1, HiddenLayer_output(this, input, this.W[i], this.b[i]))
	}
	}

	// dA
	func dA__construct(this *dA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
	a := 1.0 / float64(n_visible)

	this.N = N
	this.n_visible = n_visible
	this.n_hidden = n_hidden

	if W == nil {
	this.W = make([][]float64, n_hidden)
	for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }

	for i := 0; i < n_hidden; i++ {
	for j := 0; j < n_visible; j++ {
	this.W[i][j] = uniform(-a, a)
	}
	}
	} else {
	this.W = W
	}

	if hbias == nil {
	this.hbias = make([]float64, n_hidden)
	} else {
	this.hbias = hbias
	}

	if vbias == nil {
	this.vbias = make([]float64, n_visible)
	} else {
	this.vbias = vbias
	}
	}

	func dA_get_corrupted_input(this *dA, x []int, tilde_x []int, p float64) {
	for i := 0; i < this.n_visible; i++ {
	if x[i] == 0 {
	tilde_x[i] = 0
	} else {
	tilde_x[i] = binomial(1, p)
	}
	}
	}

	// Encode
	func dA_get_hidden_values(this *dA, x []int, y []float64) {
	for i := 0; i < this.n_hidden; i++ {
	y[i] = 0
	for j := 0; j < this.n_visible; j++ {
	y[i] += this.W[i][j] * float64(x[j])
	}
	y[i] += this.hbias[i]
	y[i] = sigmoid(y[i])
	}
	}

	// Decode
	func dA_get_reconstructed_input(this *dA, y []float64, z []float64) {
	for i := 0; i < this.n_visible; i++ {
	z[i] = 0
	for j := 0; j < this.n_hidden; j++ {
	z[i] += this.W[j][i] * y[j]
	}
	z[i] += this.vbias[i]
	z[i] = sigmoid(z[i])
	}
	}

	func dA_train(this *dA, x []int, lr float64, corruption_level float64) {
	tilde_x := make([]int, this.n_visible)
	y := make([]float64, this.n_hidden)
	z := make([]float64, this.n_visible)

	L_vbias := make([]float64, this.n_visible)
	L_hbias := make([]float64, this.n_hidden)

	p := 1 - corruption_level

	dA_get_corrupted_input(this, x, tilde_x, p)
	dA_get_hidden_values(this, tilde_x, y)
	dA_get_reconstructed_input(this, y, z)

	// vbias
	for i := 0; i < this.n_visible; i++ {
	L_vbias[i] = float64(x[i]) - z[i]
	this.vbias[i] += lr * L_vbias[i] / float64(this.N)
	}

	// hbias
	for i := 0; i < this.n_hidden; i++ {
	L_hbias[i] = 0
	for j := 0; j < this.n_visible; j++ {
	L_hbias[i] += this.W[i][j] * L_vbias[j]
	}
	L_hbias[i] = y[i] (1- y[i])
	this.hbias[i] += lr * L_hbias[i] / float64(this.N)
	}

	// W
	for i := 0; i < this.n_hidden; i++ {
	for j := 0; j < this.n_visible; j++ {
	this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
	}
	}
	}

	func dA_reconstruct(this *dA, x []int, z []float64) {
	y := make([]float64, this.n_hidden)

	dA_get_hidden_values(this, x, y)
	dA_get_reconstructed_input(this, y, z)
	}

	// LogisticRegression
	func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
	this.N = N
	this.n_in = n_in
	this.n_out = n_out

	this.W = make([][]float64, n_out)
	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }

	this.b = make([]float64, n_out)
	}

	func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
	p_y_given_x := make([]float64, this.n_out)
	dy := make([]float64, this.n_out)

	for i := 0; i < this.n_out; i++ {
	p_y_given_x[i] = 0
	for j := 0; j < this.n_in; j++ {
	p_y_given_x[i] += this.W[i][j] * float64(x[j])
	}
	p_y_given_x[i] += this.b[i]
	}
	LogisticRegression_softmax(this, p_y_given_x)

	for i := 0; i < this.n_out; i++ {
	dy[i] = float64(y[i]) - p_y_given_x[i]

	for j := 0; j < this.n_in; j++ {
	this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
	}

	this.b[i] += lr * dy[i] / float64(this.N)
	}

	}

	func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
	var (
	max float64
	sum float64
	)

	for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
	for i := 0; i < this.n_out; i++ {
	x[i] = math.Exp(x[i] - max)
	sum += x[i]
	}

	for i := 0; i < this.n_out; i++ { x[i] /= sum }
	}

	func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
	for i := 0; i < this.n_out; i++ {
	y[i] = 0
	for j := 0; j < this.n_in; j++ {
	y[i] += this.W[i][j] * float64(x[j])
	}
	y[i] += this.b[i]
	}

	LogisticRegression_softmax(this, y)
	}



	func test_SdA() {
	rand.Seed(0)

	pretrain_lr := 0.1
	corruption_level := 0.3
	pretraining_epochs := 1000
	finetune_lr := 0.1
	finetune_epochs := 500

	train_N := 10
	test_N := 4
	n_ins := 28
	n_outs := 2
	hidden_layer_sizes := []int {15, 15}
	n_layers := len(hidden_layer_sizes)


	// training data
	train_X := [][]int {
	{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
	{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
	{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
	{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
	{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
	}

	train_Y := [][]int {
	{1, 0},
	{1, 0},
	{1, 0},
	{1, 0},
	{1, 0},
	{0, 1},
	{0, 1},
	{0, 1},
	{0, 1},
	{0, 1},
	}

	// construct SdA
	var sda SdA
	SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)

	// pretrain
	SdA_pretrain(&sda, train_X, pretrain_lr, corruption_level, pretraining_epochs)

	// finetune
	SdA_finetune(&sda, train_X, train_Y, finetune_lr, finetune_epochs)


	// test data
	test_X := [][]int {
	{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
	{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
	}

	test_Y := make([][]float64, test_N)
	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}

	// test
	for i := 0; i < test_N; i++ {
	SdA_predict(&sda, test_X[i], test_Y[i])
	for j := 0; j < n_outs; j++ {
	fmt.Printf("%.5f ", test_Y[i][j])
	}
	fmt.Printf("\n")
	}
	}


	func main() {
	test_SdA()
	}