Skip to content

Instantly share code, notes, and snippets.

@Theoistic
Created September 14, 2021 04:26
Show Gist options
  • Save Theoistic/a557eee57483a3d2883f1bc9b9513847 to your computer and use it in GitHub Desktop.
Save Theoistic/a557eee57483a3d2883f1bc9b9513847 to your computer and use it in GitHub Desktop.
Bring people back from the dead ;)
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Runtime.Serialization.Formatters.Binary;
using System.Text.RegularExpressions;
using System.Threading;
namespace AfterLife
{
class Program
{
static AttentionSeq2Seq ss;
static Thread mainThread;
static void Main(string[] args)
{
if(args[0] == "train")
{
Train(args[1], int.Parse(args[2]));
} else
{
Chat(args[1]);
}
}
static (List<List<string>> input, List<List<string>> output) GetTrainingData(string file)
{
CultureInfo.CurrentCulture = new CultureInfo("fo-FO");
string json = File.ReadAllText(file);
Dialog dialog = Newtonsoft.Json.JsonConvert.DeserializeObject<Dialog>(json, new JsonSerializerSettings
{
MetadataPropertyHandling = MetadataPropertyHandling.Ignore,
MissingMemberHandling = MissingMemberHandling.Ignore,
Culture = new CultureInfo("fo-FO")
});
// clean data
dialog.Messages = dialog.Messages.Where(x => x.Type == "Generic" && !string.IsNullOrEmpty(x.Content)).OrderBy(x => x.TimestampMs).ToList();
List<(string, string)> d = new List<(string, string)>();
for (int i = 0; i < dialog.Messages.Count - 1; i++)
{
if (dialog.Messages[i].SenderName != dialog.Messages[i + 1].SenderName)
{
d.Add((Regex.Match(dialog.Messages[i].Content, @"^[a-z \w+ 0-9]+").Value, Regex.Match(dialog.Messages[i + 1].Content, @"^[a-z \w+ 0-9]+").Value));
i++;
}
}
List<List<string>> input = new List<List<string>>();
List<List<string>> output = new List<List<string>>();
for (int i = 0; i < d.Count; i++)
{
input.Add(d[i].Item1.ToLower().Trim().Split(' ').ToList());
output.Add(d[i].Item2.ToLower().Trim().Split(' ').ToList());
}
return (input, output);
}
static void Train(string file, int epoc)
{
var trainingData = GetTrainingData(file);
ss = new AttentionSeq2Seq(64, 32, 1, trainingData.input, trainingData.output, true);
ss.IterationDone += (sndr, e) =>
{
var progress = (e as CostEventArg);
Console.WriteLine($"{progress.Iteration} - Cost: {progress.Cost} - {DateTime.Now.ToLongTimeString()}");
};
ss.Train(epoc);
ss.Save();
}
static void Chat(string file)
{
var trainingData = GetTrainingData(file);
ss = new AttentionSeq2Seq(64, 32, 1, trainingData.input, trainingData.output, true);
ss.Load();
while(true)
{
string prompt = Console.ReadLine();
var response = ss.Predict(prompt.ToLower().Trim().Split(' ').ToList());
var responseText = "> " + string.Join(" ", response);
Console.WriteLine(responseText);
}
}
}
public class Participant
{
[JsonProperty("name")]
public string Name { get; set; }
}
public class Message
{
[JsonProperty("sender_name")]
public string SenderName { get; set; }
[JsonProperty("timestamp_ms")]
public long TimestampMs { get; set; }
[JsonProperty("content")]
public string Content { get; set; }
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("is_unsent")]
public bool IsUnsent { get; set; }
}
public class Dialog
{
[JsonProperty("participants")]
public List<Participant> Participants { get; set; }
[JsonProperty("messages")]
public List<Message> Messages { get; set; }
}
#region Seq2Seq
public class AttentionSeq2Seq
{
public event EventHandler IterationDone;
public int max_word = 100; // max length of generated sentences
public Dictionary<string, int> wordToIndex = new Dictionary<string, int>();
public Dictionary<int, string> indexToWord = new Dictionary<int, string>();
public List<string> vocab = new List<string>();
public List<List<string>> InputSequences;
public List<List<string>> OutputSequences;
public int hidden_size;
public int word_size;
// optimization hyperparameters
public double regc = 0.000001; // L2 regularization strength
public double learning_rate = 0.001; // learning rate
public double clipval = 5.0; // clip gradients at this value
public Optimizer solver;
public WeightMatrix Embedding;
public Encoder encoder;
public Encoder ReversEncoder;
public AttentionDecoder decoder;
public bool UseDropout { get; set; }
//Output Layer Weights
public WeightMatrix Whd { get; set; }
public WeightMatrix bd { get; set; }
public int Depth { get; set; }
public AttentionSeq2Seq(int inputSize, int hiddenSize, int depth, List<List<string>> input, List<List<string>> output, bool useDropout)
{
this.InputSequences = input;
this.OutputSequences = output;
this.Depth = depth;
// list of sizes of hidden layers
word_size = inputSize; // size of word embeddings.
this.hidden_size = hiddenSize;
solver = new Optimizer();
OneHotEncoding(input, output);
this.Whd = new WeightMatrix(hidden_size, vocab.Count + 2, true);
this.bd = new WeightMatrix(1, vocab.Count + 2, 0);
Embedding = new WeightMatrix(vocab.Count + 2, word_size, true);
encoder = new Encoder(hidden_size, word_size, depth);
ReversEncoder = new Encoder(hidden_size, word_size, depth);
decoder = new AttentionDecoder(hidden_size, word_size, depth);
}
private void OneHotEncoding(List<List<string>> _input, List<List<string>> _output)
{
// count up all words
Dictionary<string, int> d = new Dictionary<string, int>();
wordToIndex = new Dictionary<string, int>();
indexToWord = new Dictionary<int, string>();
vocab = new List<string>();
for (int j = 0, n2 = _input.Count; j < n2; j++)
{
var item = _input[j];
for (int i = 0, n = item.Count; i < n; i++)
{
var txti = item[i];
if (d.Keys.Contains(txti)) { d[txti] += 1; }
else { d.Add(txti, 1); }
}
var item2 = _output[j];
for (int i = 0, n = item2.Count; i < n; i++)
{
var txti = item2[i];
if (d.Keys.Contains(txti)) { d[txti] += 1; }
else { d.Add(txti, 1); }
}
}
// NOTE: start at one because we will have START and END tokens!
// that is, START token will be index 0 in model word vectors
// and END token will be index 0 in the next word softmax
var q = 2;
foreach (var ch in d)
{
if (ch.Value >= 1)
{
// add word to vocab
wordToIndex[ch.Key] = q;
indexToWord[q] = ch.Key;
vocab.Add(ch.Key);
q++;
}
}
}
public void Train(int trainingEpoch)
{
for (int ep = 0; ep < trainingEpoch; ep++)
{
Random r = new Random();
for (int itr = 0; itr < InputSequences.Count; itr++)
{
// sample sentence from data
List<string> OutputSentence;
ComputeGraph g;
double cost;
List<WeightMatrix> encoded = new List<WeightMatrix>();
Encode(r, out OutputSentence, out g, out cost, encoded);
cost = DecodeOutput(OutputSentence, g, cost, encoded);
g.backward();
UpdateParameters();
Reset();
if (IterationDone != null)
{
IterationDone(this, new CostEventArg()
{
Cost = cost / OutputSentence.Count
,
Iteration = ep
});
}
}
}
}
private void Encode(Random r, out List<string> OutputSentence, out ComputeGraph g, out double cost, List<WeightMatrix> encoded)
{
var sentIndex = r.Next(0, InputSequences.Count);
var inputSentence = InputSequences[sentIndex];
var reversSentence = InputSequences[sentIndex].ToList();
reversSentence.Reverse();
OutputSentence = OutputSequences[sentIndex];
g = new ComputeGraph();
cost = 0.0;
for (int i = 0; i < inputSentence.Count; i++)
{
int ix_source = wordToIndex[inputSentence[i]];
int ix_source2 = wordToIndex[reversSentence[i]];
var x = g.PeekRow(Embedding, ix_source);
var eOutput = encoder.Encode(x, g);
var x2 = g.PeekRow(Embedding, ix_source2);
var eOutput2 = ReversEncoder.Encode(x2, g);
encoded.Add(g.concatColumns(eOutput, eOutput2));
}
//if (UseDropout)
//{
// encoded = g.Dropout(encoded, 0.2);
//}
}
private double DecodeOutput(List<string> OutputSentence, ComputeGraph g, double cost, List<WeightMatrix> encoded)
{
int ix_input = 1;
for (int i = 0; i < OutputSentence.Count + 1; i++)
{
int ix_target = 0;
if (i == OutputSentence.Count)
{
ix_target = 0;
}
else
{
ix_target = wordToIndex[OutputSentence[i]];
}
var x = g.PeekRow(Embedding, ix_input);
var eOutput = decoder.Decode(x, encoded, g);
if (UseDropout)
{
eOutput = g.Dropout(eOutput, 0.2);
}
var o = g.add(
g.mul(eOutput, this.Whd), this.bd);
if (UseDropout)
{
o = g.Dropout(o, 0.2);
}
var probs = g.SoftmaxWithCrossEntropy(o);
cost += -Math.Log(probs.Weight[ix_target]);
o.Gradient = probs.Weight;
o.Gradient[ix_target] -= 1;
ix_input = ix_target;
}
return cost;
}
private void UpdateParameters()
{
var model = encoder.getParams();
model.AddRange(decoder.getParams());
model.AddRange(ReversEncoder.getParams());
model.Add(Embedding);
model.Add(Whd);
model.Add(bd);
solver.setp(model, learning_rate, regc, clipval);
}
private void Reset()
{
encoder.Reset();
ReversEncoder.Reset();
decoder.Reset();
}
public List<string> Predict(List<string> inputSeq)
{
ReversEncoder.Reset();
encoder.Reset();
decoder.Reset();
List<string> result = new List<string>();
var G2 = new ComputeGraph(false);
List<string> revseq = inputSeq.ToList();
revseq.Reverse();
List<WeightMatrix> encoded = new List<WeightMatrix>();
for (int i = 0; i < inputSeq.Count; i++)
{
int ix = wordToIndex[inputSeq[i]];
int ix2 = wordToIndex[revseq[i]];
var x2 = G2.PeekRow(Embedding, ix);
var o = encoder.Encode(x2, G2);
var x3 = G2.PeekRow(Embedding, ix2);
var eOutput2 = ReversEncoder.Encode(x3, G2);
var d = G2.concatColumns(o, eOutput2);
encoded.Add(d);
}
//if (UseDropout)
//{
// for (int i = 0; i < encoded.Weight.Length; i++)
// {
// encoded.Weight[i] *= 0.2;
// }
//}
var ix_input = 1;
while (true)
{
var x = G2.PeekRow(Embedding, ix_input);
var eOutput = decoder.Decode(x, encoded, G2);
if (UseDropout)
{
for (int i = 0; i < eOutput.Weight.Length; i++)
{
eOutput.Weight[i] *= 0.2;
}
}
var o = G2.add(
G2.mul(eOutput, this.Whd), this.bd);
if (UseDropout)
{
for (int i = 0; i < o.Weight.Length; i++)
{
o.Weight[i] *= 0.2;
}
}
var probs = G2.SoftmaxWithCrossEntropy(o);
var maxv = probs.Weight[0];
var maxi = 0;
for (int i = 1; i < probs.Weight.Length; i++)
{
if (probs.Weight[i] > maxv)
{
maxv = probs.Weight[i];
maxi = i;
}
}
var pred = maxi;
if (pred == 0) break; // END token predicted, break out
if (result.Count > max_word) { break; } // something is wrong
var letter2 = indexToWord[pred];
result.Add(letter2);
ix_input = pred;
}
return result;
}
public void Save()
{
ModelAttentionData tosave = new ModelAttentionData();
tosave.bd = this.bd;
tosave.clipval = this.clipval;
tosave.decoder = this.decoder;
tosave.Depth = this.Depth;
tosave.encoder = this.encoder;
tosave.hidden_sizes = this.hidden_size;
tosave.learning_rate = this.learning_rate;
tosave.letter_size = this.word_size;
tosave.max_chars_gen = this.max_word;
tosave.regc = this.regc;
tosave.ReversEncoder = this.ReversEncoder;
tosave.UseDropout = this.UseDropout;
tosave.Whd = this.Whd;
tosave.Wil = this.Embedding;
BinaryFormatter bf = new BinaryFormatter();
FileStream fs = new FileStream("Model.bin", FileMode.OpenOrCreate, FileAccess.Write);
bf.Serialize(fs, tosave);
fs.Close();
fs.Dispose();
}
public void Load()
{
ModelAttentionData tosave = new ModelAttentionData();
BinaryFormatter bf = new BinaryFormatter();
FileStream fs = new FileStream("Model.bin", FileMode.Open, FileAccess.Read);
tosave = bf.Deserialize(fs) as ModelAttentionData;
fs.Close();
fs.Dispose();
this.bd = tosave.bd;
this.clipval = tosave.clipval;
this.decoder = tosave.decoder;
this.Depth = tosave.Depth;
this.encoder = tosave.encoder;
this.hidden_size = tosave.hidden_sizes;
this.learning_rate = tosave.learning_rate;
this.word_size = tosave.letter_size;
this.max_word = 100;
this.regc = tosave.regc;
this.ReversEncoder = tosave.ReversEncoder;
this.UseDropout = tosave.UseDropout;
this.Whd = tosave.Whd;
this.Embedding = tosave.Wil;
}
}
public class Optimizer
{
public double decay_rate = 0.999;
public double smooth_eps = 1e-8;
List<WeightMatrix> step_cache = new List<WeightMatrix>();
public void setp(List<WeightMatrix> model, double step_size, double regc, double clipval)
{
var num_clipped = 0;
var num_tot = 0;
foreach (var k in model)
{
if (k == null)
{
continue;
}
var m = k; // mat ref
var s = k.Cash;
for (int i = 0, n = m.Weight.Length; i < n; i++)
{
// rmsprop adaptive learning rate
var mdwi = m.Gradient[i];
s[i] = s[i] * this.decay_rate + (1.0 - this.decay_rate)
* mdwi * mdwi;
// gradient clip
if (mdwi > clipval)
{
mdwi = clipval;
num_clipped++;
}
if (mdwi < -clipval)
{
mdwi = -clipval;
num_clipped++;
}
num_tot++;
// update (and regularize)
m.Weight[i] += -step_size *
mdwi / Math.Sqrt(s[i] + this.smooth_eps) -
regc * m.Weight[i];
m.Gradient[i] = 0; // reset gradients for next iteration
}
}
}
}
public class CostEventArg : EventArgs
{
public double Cost { get; set; }
public int Iteration { get; set; }
}
[Serializable]
public class LSTMCell
{
public WeightMatrix Wix { get; set; }
public WeightMatrix Wih { get; set; }
public WeightMatrix bi { get; set; }
public WeightMatrix Wfx { get; set; }
public WeightMatrix Wfh { get; set; }
public WeightMatrix bf { get; set; }
public WeightMatrix Wox { get; set; }
public WeightMatrix Woh { get; set; }
public WeightMatrix bo { get; set; }
public WeightMatrix Wcx { get; set; }
public WeightMatrix Wch { get; set; }
public WeightMatrix bc { get; set; }
public WeightMatrix ht { get; set; }
public WeightMatrix ct { get; set; }
public int hdim { get; set; }
public int dim { get; set; }
public LSTMCell(int hdim, int dim)
{
this.Wix = new WeightMatrix(dim, hdim, true);
this.Wih = new WeightMatrix(hdim, hdim, true);
this.bi = new WeightMatrix(1, hdim, 0);
this.Wfx = new WeightMatrix(dim, hdim, true);
this.Wfh = new WeightMatrix(hdim, hdim, true);
this.bf = new WeightMatrix(1, hdim, 0);
this.Wox = new WeightMatrix(dim, hdim, true);
this.Woh = new WeightMatrix(hdim, hdim, true);
this.bo = new WeightMatrix(1, hdim, 0);
this.Wcx = new WeightMatrix(dim, hdim, true);
this.Wch = new WeightMatrix(hdim, hdim, true);
this.bc = new WeightMatrix(1, hdim, 0);
this.ht = new WeightMatrix(1, hdim, 0);
this.ct = new WeightMatrix(1, hdim, 0);
this.hdim = hdim;
this.dim = dim;
}
public WeightMatrix Step(WeightMatrix input, ComputeGraph innerGraph)
{
var hidden_prev = ht;
var cell_prev = ct;
var cell = this;
var h0 = innerGraph.mul(input, cell.Wix);
var h1 = innerGraph.mul(hidden_prev, cell.Wih);
var input_gate = innerGraph.sigmoid(
innerGraph.add(
innerGraph.add(h0, h1),
cell.bi
)
);
var h2 = innerGraph.mul(input, cell.Wfx);
var h3 = innerGraph.mul(hidden_prev, cell.Wfh);
var forget_gate = innerGraph.sigmoid(
innerGraph.add(
innerGraph.add(h2, h3),
cell.bf
)
);
var h4 = innerGraph.mul(input, cell.Wox);
var h5 = innerGraph.mul(hidden_prev, cell.Woh);
var output_gate = innerGraph.sigmoid(
innerGraph.add(
innerGraph.add(h4, h5),
cell.bo
)
);
var h6 = innerGraph.mul(input, cell.Wcx);
var h7 = innerGraph.mul(hidden_prev, cell.Wch);
var cell_write = innerGraph.tanh(
innerGraph.add(
innerGraph.add(h6, h7),
cell.bc
)
);
// compute new cell activation
var retain_cell = innerGraph.eltmul(forget_gate, cell_prev); // what do we keep from cell
var write_cell = innerGraph.eltmul(input_gate, cell_write); // what do we write to cell
var cell_d = innerGraph.add(retain_cell, write_cell); // new cell contents
// compute hidden state as gated, saturated cell activations
var hidden_d = innerGraph.eltmul(output_gate, innerGraph.tanh(cell_d));
this.ht = hidden_d;
this.ct = cell_d;
return ht;
}
public virtual List<WeightMatrix> getParams()
{
List<WeightMatrix> response = new List<WeightMatrix>();
response.Add(this.bc);
response.Add(this.bf);
response.Add(this.bi);
response.Add(this.bo);
response.Add(this.Wch);
response.Add(this.Wcx);
response.Add(this.Wfh);
response.Add(this.Wfx);
response.Add(this.Wih);
response.Add(this.Wix);
response.Add(this.Woh);
response.Add(this.Wox);
return response;
}
public void Reset()
{
ht = new WeightMatrix(1, hdim, 0);
ct = new WeightMatrix(1, hdim, 0);
}
}
[Serializable]
public class AttentionUnit
{
public WeightMatrix V { get; set; }
public WeightMatrix Ua { get; set; }
public WeightMatrix bUa { get; set; }
public WeightMatrix Wa { get; set; }
public WeightMatrix bWa { get; set; }
public int MaxIndex { get; set; }
public int batchSize { get; set; }
public AttentionUnit()
{
this.batchSize = 1;
}
public AttentionUnit(int size)
{
this.batchSize = 1;
this.Ua = new WeightMatrix((size * 2), size, true);
this.Wa = new WeightMatrix(size, size, true);
this.bUa = new WeightMatrix(1, size, 0);
this.bWa = new WeightMatrix(1, size, 0);
this.V = new WeightMatrix(size, 1, true);
}
public WeightMatrix Perform(List<WeightMatrix> input, WeightMatrix state, ComputeGraph g)
{
WeightMatrix context;
List<WeightMatrix> atten = new List<WeightMatrix>();
foreach (var h_j in input)
{
var uh = g.add(g.mul(h_j, Ua), bUa);
var wc = g.add(g.mul(state, Wa), bWa);
var gg = g.tanh(g.add(uh, wc));
var aa = g.mul(gg, V);
atten.Add(aa);
}
var res = g.Softmax(atten);
var cmax = res[0].Weight[0];
int maxAtt = 0;
for (int i = 1; i < res.Count; i++)
{
if (res[i].Weight[0] > cmax)
{
cmax = res[i].Weight[0];
maxAtt = i;
}
}
this.MaxIndex = maxAtt;
context = g.scalemul(input[0], res[0]);
for (int hj = 1; hj < input.Count; hj++)
{
context = g.add(context, g.scalemul(input[hj], res[hj]));
}
return context;
}
public WeightMatrix Perform(WeightMatrix input, WeightMatrix state, ComputeGraph g)
{
WeightMatrix context;
List<WeightMatrix> atten = new List<WeightMatrix>();
var stateRepeat = g.RepeatRows(state, input.Rows);
var baiseInput = new WeightMatrix(input.Rows, 1, 1);
var inputb = g.concatColumns(input, baiseInput);
var uh = g.mul(inputb, Ua);
baiseInput = new WeightMatrix(stateRepeat.Rows, 1, 1);
stateRepeat = g.concatColumns(stateRepeat, baiseInput);
var wc = g.mul(stateRepeat, Wa);
var gg = g.tanh(g.add(uh, wc));
var aa = g.mul(gg, V);
var res = g.Softmax(aa);
var weighted = g.weightRows(input, res); ;
context = g.sumColumns(weighted);
return context;
}
public virtual List<WeightMatrix> getParams()
{
List<WeightMatrix> response = new List<WeightMatrix>();
response.Add(this.Ua);
response.Add(this.Wa);
response.Add(this.bUa);
response.Add(this.bWa);
response.Add(this.V);
return response;
}
}
[Serializable]
public class LSTMAttentionDecoderCell : LSTMCell
{
public WeightMatrix WiC { get; set; }
public WeightMatrix WfC { get; set; }
public WeightMatrix WoC { get; set; }
public WeightMatrix WcC { get; set; }
public LSTMAttentionDecoderCell(int hdim, int dim)
: base(hdim, dim)
{
int contextSize = hdim * 2;
this.WiC = new WeightMatrix(contextSize, hdim, true);
this.WfC = new WeightMatrix(contextSize, hdim, true);
this.WoC = new WeightMatrix(contextSize, hdim, true);
this.WcC = new WeightMatrix(contextSize, hdim, true);
}
public WeightMatrix Step(WeightMatrix context, WeightMatrix input, ComputeGraph innerGraph)
{
var hidden_prev = ht;
var cell_prev = ct;
var cell = this;
var h0 = innerGraph.mul(input, cell.Wix);
var h1 = innerGraph.mul(hidden_prev, cell.Wih);
var h11 = innerGraph.mul(context, cell.WiC);
var input_gate = innerGraph.sigmoid(
innerGraph.add(innerGraph.add(innerGraph.add(h0, h1), h11), cell.bi));
var h2 = innerGraph.mul(input, cell.Wfx);
var h3 = innerGraph.mul(hidden_prev, cell.Wfh);
var h33 = innerGraph.mul(context, cell.WfC);
var forget_gate = innerGraph.sigmoid(
innerGraph.add(innerGraph.add(innerGraph.add(h3, h2), h33),
cell.bf
)
);
var h4 = innerGraph.mul(input, cell.Wox);
var h5 = innerGraph.mul(hidden_prev, cell.Woh);
var h55 = innerGraph.mul(context, cell.WoC);
var output_gate = innerGraph.sigmoid(
innerGraph.add(
innerGraph.add(innerGraph.add(h5, h4), h55),
cell.bo
)
);
var h6 = innerGraph.mul(input, cell.Wcx);
var h7 = innerGraph.mul(hidden_prev, cell.Wch);
var h77 = innerGraph.mul(context, cell.WcC);
var cell_write = innerGraph.tanh(
innerGraph.add(
innerGraph.add(innerGraph.add(h7, h6), h77),
cell.bc
)
);
// compute new cell activation
var retain_cell = innerGraph.eltmul(forget_gate, cell_prev); // what do we keep from cell
var write_cell = innerGraph.eltmul(input_gate, cell_write); // what do we write to cell
var cell_d = innerGraph.add(retain_cell, write_cell); // new cell contents
// compute hidden state as gated, saturated cell activations
var hidden_d = innerGraph.eltmul(output_gate, innerGraph.tanh(cell_d));
this.ht = hidden_d;
this.ct = cell_d;
return ht;
}
public override List<WeightMatrix> getParams()
{
List<WeightMatrix> response = new List<WeightMatrix>();
response.AddRange(base.getParams());
response.Add(this.WiC);
response.Add(this.WfC);
response.Add(this.WoC);
response.Add(this.WcC);
return response;
}
}
[Serializable]
public class AttentionDecoder
{
public List<LSTMAttentionDecoderCell> decoders = new List<LSTMAttentionDecoderCell>();
public int hdim { get; set; }
public int dim { get; set; }
public int depth { get; set; }
public AttentionUnit Attention { get; set; }
public AttentionDecoder(int hdim, int dim, int depth)
{
decoders.Add(new LSTMAttentionDecoderCell(hdim, dim));
for (int i = 1; i < depth; i++)
{
decoders.Add(new LSTMAttentionDecoderCell(hdim, hdim));
}
Attention = new AttentionUnit(hdim);
this.hdim = hdim;
this.dim = dim;
this.depth = depth;
}
public void Reset()
{
foreach (var item in decoders)
{
item.Reset();
}
}
public WeightMatrix Decode(WeightMatrix input, List<WeightMatrix> encoderOutput, ComputeGraph g)
{
var V = input;
var lastStatus = this.decoders.FirstOrDefault().ct;
var context = Attention.Perform(encoderOutput, lastStatus, g);
foreach (var encoder in decoders)
{
var e = encoder.Step(context, V, g);
V = e;
}
return V;
}
public WeightMatrix Decode(WeightMatrix input, WeightMatrix encoderOutput, ComputeGraph g)
{
var V = input;
var lastStatus = this.decoders.FirstOrDefault().ct;
var context = Attention.Perform(encoderOutput, lastStatus, g);
foreach (var encoder in decoders)
{
var e = encoder.Step(context, V, g);
V = e;
}
return V;
}
public List<WeightMatrix> getParams()
{
List<WeightMatrix> response = new List<WeightMatrix>();
foreach (var item in decoders)
{
response.AddRange(item.getParams());
}
response.AddRange(Attention.getParams());
return response;
}
}
[Serializable]
public class Encoder
{
public List<LSTMCell> encoders = new List<LSTMCell>();
public int hdim { get; set; }
public int dim { get; set; }
public int depth { get; set; }
public Encoder(int hdim, int dim, int depth)
{
encoders.Add(new LSTMCell(hdim, dim));
//for (int i = 1; i < depth; i++)
//{
// encoders.Add(new LSTMCell(hdim, hdim));
//}
this.hdim = hdim;
this.dim = dim;
this.depth = depth;
}
public void Reset()
{
foreach (var item in encoders)
{
item.Reset();
}
}
public WeightMatrix Encode(WeightMatrix V, ComputeGraph g)
{
foreach (var encoder in encoders)
{
var e = encoder.Step(V, g);
V = e;
}
return V;
}
public List<WeightMatrix> Encode2(WeightMatrix V, ComputeGraph g)
{
List<WeightMatrix> res = new List<WeightMatrix>();
foreach (var encoder in encoders)
{
var e = encoder.Step(V, g);
V = e;
res.Add(e);
}
return res;
}
public List<WeightMatrix> getParams()
{
List<WeightMatrix> response = new List<WeightMatrix>();
foreach (var item in encoders)
{
response.AddRange(item.getParams());
}
return response;
}
}
[Serializable]
public class ModelAttentionData
{
public int max_chars_gen = 100; // max length of generated sentences
public int hidden_sizes;
public int letter_size;
// optimization
public double regc = 0.000001; // L2 regularization strength
public double learning_rate = 0.01; // learning rate
public double clipval = 5.0; // clip gradients at this value
public WeightMatrix Wil;
public Encoder encoder;
public Encoder ReversEncoder;
public AttentionDecoder decoder;
public bool UseDropout { get; set; }
//Output Layer Weights
public WeightMatrix Whd { get; set; }
public WeightMatrix bd { get; set; }
public int Depth { get; set; }
}
public class ComputeGraph
{
List<Action> backprop = new List<Action>();
public bool needs_backprop { get; set; }
public ComputeGraph(bool needBack = true)
{
this.needs_backprop = needBack;
}
public WeightMatrix tanh(WeightMatrix m)
{
// tanh nonlinearity
var res = new WeightMatrix(m.Rows, m.Columns, 0);
var n = m.Weight.Length;
for (var i = 0; i < n; i++)
{
res.Weight[i] = Math.Tanh(m.Weight[i]);
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < n; i++)
{
// grad for z = tanh(x) is (1 - z^2)
var mwi = res.Weight[i];
m.Gradient[i] += (1.0 - mwi * mwi) * res.Gradient[i];
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix mergeSY(List<WeightMatrix> vols)
{
WeightMatrix merged = new WeightMatrix(vols.Count, vols[0].Columns, 0);
for (int j = 0; j < vols.Count; j++)
{
var item = vols[j];
int n = item.Columns;
for (int i = 0; i < n; i++)
{
merged.Set(j, i, item.Weight[i]);
//merged.Set_Grad(j, i, 0, item.DW[i]);
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int j = 0; j < vols.Count; j++)
{
var item = vols[j];
int n = item.Columns;
for (int i = 0; i < n; i++)
{
item.Gradient[i] += merged.Get_Grad(j, i);
}
}
};
this.backprop.Add(backward);
}
return merged;
}
public List<WeightMatrix> splitSY(WeightMatrix merged)
{
List<WeightMatrix> vols = new List<WeightMatrix>();
for (int j = 0; j < merged.Rows; j++)
{
var item = new WeightMatrix(1, merged.Columns, 0);
int n = merged.Columns;
for (int i = 0; i < n; i++)
{
item.Weight[i] = merged.Get(j, i);
//item.DW[i] = merged.Get_Grad(j, i, 0);
}
vols.Add(item);
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int j = 0; j < vols.Count; j++)
{
var item = vols[j];
int n = item.Columns;
for (int i = 0; i < n; i++)
{
merged.Add_Grad(j, i, item.Gradient[i]);
}
}
};
this.backprop.Add(backward);
}
return vols;
}
public WeightMatrix concatRows(List<WeightMatrix> m1)
{
var res = new WeightMatrix(m1.Count, m1[0].Columns, 0);
for (var i = 0; i < m1.Count; i++)
{
for (int j = 0; j < m1[i].Columns; j++)
{
var el = m1[i].Get(0, j);
res.Set(i, j, el);
//res.Set_Grad(i, j, 0, m1.Get_Grad(i,j,0));
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < m1.Count; i++)
{
for (int j = 0; j < m1[i].Columns; j++)
{
var el = res.Get_Grad(i, j);
m1[i].Add_Grad(0, j, el);
}
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix RepeatRows(WeightMatrix m1, int rows)
{
var res = new WeightMatrix(rows, m1.Columns, 0);
for (var i = 0; i < rows; i++)
{
for (int j = 0; j < m1.Columns; j++)
{
var el = m1.Get(0, j);
res.Set(i, j, el);
// res.Set_Grad(i, j,m1.Get_Grad(i,j));
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < rows; i++)
{
for (int j = 0; j < m1.Columns; j++)
{
var el = res.Get_Grad(i, j);
m1.Add_Grad(0, j, el);
}
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix concatRows(WeightMatrix m1, WeightMatrix m2)
{
int sx = 1;
int sy = 1;
sx = m1.Rows + m2.Rows;
sy = m1.Columns;
var res = new WeightMatrix(sx, sy, 0);
var n = m1.Weight.Length;
for (var i = 0; i < m1.Rows; i++)
{
for (int j = 0; j < m1.Columns; j++)
{
var el = m1.Get(i, j);
res.Set(i, j, el);
//res.Set_Grad(i, j, 0, m1.Get_Grad(i,j,0));
}
}
for (var i = m1.Rows; i < m2.Rows + m1.Rows; i++)
{
for (int j = 0; j < m2.Columns; j++)
{
var el = m2.Get(i - m1.Rows, j);
res.Set(i, j, el);
//res.Set_Grad(i, j, 0, m2.Get_Grad(i - m1.SX, j, 0));
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < m1.Rows; i++)
{
for (int j = 0; j < m1.Columns; j++)
{
var el = res.Get_Grad(i, j);
m1.Add_Grad(i, j, el);
}
}
for (var i = m1.Rows; i < m2.Rows + m1.Rows; i++)
{
for (int j = 0; j < m2.Columns; j++)
{
var el = res.Get_Grad(i, j);
m2.Add_Grad(i - m1.Rows, j, el);
}
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix concatColumns(WeightMatrix m1, WeightMatrix m2)
{
int sx = 1;
int sy = 1;
sy = m1.Columns + m2.Columns;
sx = m1.Rows;
var res = new WeightMatrix(sx, sy, 0);
var n = m1.Weight.Length;
for (var i = 0; i < m1.Rows; i++)
{
for (int j = 0; j < m1.Columns; j++)
{
var el = m1.Get(i, j);
res.Set(i, j, el);
//res.Set_Grad(i, j, 0, m1.Get_Grad(i, j, 0));
}
}
for (var i = 0; i < m2.Rows; i++)
{
for (int j = m1.Columns; j < m2.Columns + m1.Columns; j++)
{
var el = m2.Get(i, j - m1.Columns);
res.Set(i, j, el);
//res.Set_Grad(i, j, 0, m2.Get_Grad(i, j - m1.SY, 0));
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < m1.Rows; i++)
{
for (int j = 0; j < m1.Columns; j++)
{
var el = res.Get_Grad(i, j);
m1.Add_Grad(i, j, el);
}
}
for (var i = 0; i < m2.Rows; i++)
{
for (int j = m1.Columns; j < m2.Columns + m1.Columns; j++)
{
var el = res.Get_Grad(i, j);
m2.Add_Grad(i, j - m1.Columns, el);
}
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix rowPluck(WeightMatrix m, int ix)
{
var d = m.Columns;
var res = new WeightMatrix(d, 1, 0);
for (int i = 0, n = d; i < n; i++) { res.Weight[i] = m.Weight[d * ix + i]; } // copy over the data
if (this.needs_backprop)
{
Action backward = () =>
{
for (int i = 0, n = d; i < n; i++) { m.Gradient[d * ix + i] += res.Gradient[i]; }
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix PeekRow(WeightMatrix m, int ix)
{
var d = m.Columns;
var res = new WeightMatrix(1, d, 0);
for (int i = 0, n = d; i < n; i++) { res.Weight[i] = m.Weight[d * ix + i]; } // copy over the data
if (this.needs_backprop)
{
Action backward = () =>
{
for (int i = 0, n = d; i < n; i++) { m.Gradient[d * ix + i] += res.Gradient[i]; }
};
this.backprop.Add(backward);
}
return res;
}
private double sig(double x)
{
// helper function for computing sigmoid
return 1.0 / (1 + Math.Exp(-x));
}
public WeightMatrix sigmoid(WeightMatrix m)
{
// sigmoid nonlinearity
WeightMatrix res = new WeightMatrix(m.Rows, m.Columns, 0);
var n = m.Weight.Length;
for (var i = 0; i < n; i++)
{
res.Weight[i] = sig(m.Weight[i]);
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < n; i++)
{
// grad for z = tanh(x) is (1 - z^2)
var mwi = res.Weight[i];
m.Gradient[i] += mwi * (1.0 - mwi) * res.Gradient[i];
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix relu(WeightMatrix m)
{
var res = new WeightMatrix(m.Rows, m.Columns, 0);
var n = m.Weight.Length;
for (var i = 0; i < n; i++)
{
res.Weight[i] = Math.Max(0, m.Weight[i]); // relu
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < n; i++)
{
m.Gradient[i] += m.Weight[i] > 0 ? res.Gradient[i] : 0.0;
}
};
this.backprop.Add(backward);
}
return res;
}
Random ra = new Random();
public WeightMatrix Dropout(WeightMatrix V, double drop_prob)
{
var res = new WeightMatrix(V.Rows, V.Columns, 0);
var N = V.Weight.Length;
bool[] dropped = new bool[V.Rows * V.Columns];
var V2 = V.Clone();
for (var i = 0; i < N; i++)
{
if (ra.NextDouble() < drop_prob) { V2.Weight[i] = 0; dropped[i] = true; } // drop!
else { dropped[i] = false; }
}
res = V2;
if (this.needs_backprop)
{
Action backward = () =>
{
var chain_grad = res;
V.Gradient = new double[N]; // zero out gradient wrt data
for (var i = 0; i < N; i++)
{
if (!(dropped[i]))
{
V.Gradient[i] += chain_grad.Gradient[i]; // copy over the gradient
}
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix Dropout(WeightMatrix V, bool[] droppedMask)
{
var res = new WeightMatrix(V.Rows, V.Columns, 0);
var N = V.Weight.Length;
var V2 = V.Clone();
for (var i = 0; i < N; i++)
{
if (droppedMask[i]) { V2.Weight[i] = 0; } // drop!
}
res = V2;
if (this.needs_backprop)
{
Action backward = () =>
{
var chain_grad = res;
V.Gradient = new double[N]; // zero out gradient wrt data
for (var i = 0; i < N; i++)
{
if (!(droppedMask[i]))
{
V.Gradient[i] += chain_grad.Gradient[i]; // copy over the gradient
}
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix mul2(WeightMatrix m1, WeightMatrix m2)
{
var res = mulParalel(m1, m2);
if (this.needs_backprop)
{
Action backward = () =>
{
DmulParalel(m1, m2, res);
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix mul(WeightMatrix m1, WeightMatrix m2)
{
var n = m1.Rows;
var d = m2.Columns;
var res = new WeightMatrix(n, d, 0);
for (var i = 0; i < m1.Rows; i++)
{ // loop over rows of m1
for (var j = 0; j < m2.Columns; j++)
{ // loop over cols of m2
var dot = 0.0;
for (var k = 0; k < m1.Columns; k++)
{ // dot product loop
dot += m1.Weight[m1.Columns * i + k] * m2.Weight[m2.Columns * k + j];
}
res.Weight[d * i + j] = dot;
}
}
// var res = mulParalel(m1, m2);
if (this.needs_backprop)
{
Action backward = () =>
{
for (var i = 0; i < m1.Rows; i++)
{ // loop over rows of m1
for (var j = 0; j < m2.Columns; j++)
{ // loop over cols of m2
for (var k = 0; k < m1.Columns; k++)
{ // dot product loop
var b = res.Gradient[d * i + j];
m1.Gradient[m1.Columns * i + k] += m2.Weight[m2.Columns * k + j] * b;
m2.Gradient[m2.Columns * k + j] += m1.Weight[m1.Columns * i + k] * b;
}
}
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix mulParalel(WeightMatrix m1, WeightMatrix m2)
{
var n = m1.Rows;
var d = m2.Columns;
var res = new WeightMatrix(n, d, 0);
var source = Enumerable.Range(0, n);
var pquery = from num in source.AsParallel()
select num;
pquery.ForAll((e) => MulKernel(m1, m2, d, res, e));
return res;
}
private static void MulKernel(WeightMatrix m1, WeightMatrix m2, int d, WeightMatrix res, int i)
{
for (var j = 0; j < m2.Columns; j++)
{ // loop over cols of m2
var dot = 0.0;
for (var k = 0; k < m1.Columns; k++)
{ // dot product loop
dot += m1.Weight[m1.Columns * i + k] * m2.Weight[m2.Columns * k + j];
}
res.Weight[d * i + j] = dot;
}
}
public void DmulParalel(WeightMatrix m1, WeightMatrix m2, WeightMatrix res)
{
var d = m2.Columns;
var source = Enumerable.Range(0, m1.Rows);
var pquery = from num in source.AsParallel()
select num;
pquery.ForAll((e) => DmulKernel(m1, m2, res, d, e));
}
private static void DmulKernel(WeightMatrix m1, WeightMatrix m2, WeightMatrix res, int d, int i)
{
for (var j = 0; j < m2.Columns; j++)
{ // loop over cols of m2
for (var k = 0; k < m1.Columns; k++)
{ // dot product loop
var b = res.Gradient[d * i + j];
m1.Gradient[m1.Columns * i + k] += m2.Weight[m2.Columns * k + j] * b;
m2.Gradient[m2.Columns * k + j] += m1.Weight[m1.Columns * i + k] * b;
}
}
}
public WeightMatrix add(WeightMatrix m1, WeightMatrix m2)
{
var res = new WeightMatrix(m1.Rows, m1.Columns, 0);
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
res.Weight[i] = m1.Weight[i] + m2.Weight[i];
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
m1.Gradient[i] += res.Gradient[i];
m2.Gradient[i] += res.Gradient[i];
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix eladd(WeightMatrix m1, WeightMatrix m2)
{
var res = new WeightMatrix(m1.Rows, m1.Columns, 0);
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
res.Weight[i] = m1.Weight[i] + m2.Weight[0];
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
m1.Gradient[i] += res.Gradient[i];
m2.Gradient[0] += res.Gradient[i];
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix eltmul(WeightMatrix m1, WeightMatrix m2)
{
var res = new WeightMatrix(m1.Rows, m1.Columns, 0);
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
res.Weight[i] = m1.Weight[i] * m2.Weight[i];
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
m1.Gradient[i] += m2.Weight[i] * res.Gradient[i];
m2.Gradient[i] += m1.Weight[i] * res.Gradient[i];
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix scalemul(WeightMatrix m1, WeightMatrix m2)
{
var res = new WeightMatrix(m1.Rows, m1.Columns, 0);
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
res.Weight[i] = m1.Weight[i] * m2.Weight[0];
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int i = 0, n = m1.Weight.Length; i < n; i++)
{
m1.Gradient[i] += m2.Weight[0] * res.Gradient[i];
m2.Gradient[0] += m1.Weight[i] * res.Gradient[i];
}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix SoftmaxWithCrossEntropy(WeightMatrix m)
{
var res = new WeightMatrix(m.Rows, m.Columns, 0); // probability volume
var maxval = -999999.0;
for (int i = 0, n = m.Weight.Length; i < n; i++)
{
if (m.Weight[i] > maxval) maxval = m.Weight[i];
}
var s = 0.0;
for (int i = 0, n = m.Weight.Length; i < n; i++)
{
res.Weight[i] = Math.Exp(m.Weight[i] - maxval);
s += res.Weight[i];
}
for (int i = 0, n = m.Weight.Length; i < n; i++) { res.Weight[i] /= s; }
// no backward pass here needed
// since we will use the computed probabilities outside
// to set gradients directly on m
return res;
}
public WeightMatrix Softmax(WeightMatrix m)
{
var res = new WeightMatrix(m.Rows, m.Columns, 0); // probability volume
var maxval = -999999.0;
for (int i = 0, n = m.Weight.Length; i < n; i++)
{
if (m.Weight[i] > maxval) maxval = m.Weight[i];
}
var s = 0.0;
for (int i = 0, n = m.Weight.Length; i < n; i++)
{
res.Weight[i] = Math.Exp(m.Weight[i] - maxval);
s += res.Weight[i];
}
for (int i = 0, n = m.Weight.Length; i < n; i++) { res.Weight[i] /= s; }
if (this.needs_backprop)
{
Action backward = () =>
{
double ss = 0.0;
for (int ix = 0; ix < m.Weight.Length; ix++)
{
m.Gradient[ix] = res.Gradient[ix] * res.Weight[ix];
ss += res.Gradient[ix] * res.Weight[ix];
}
for (int ix = 0; ix < m.Weight.Length; ix++)
{
m.Gradient[ix] -= ss * res.Weight[ix];
}
//for (int i = 0; i < m.Count; i++)
//{
// m[i].Gradient[0] = res[i].Gradient[0] * res[i].Weight[0];
// ss += res[i].Gradient[0] * res[i].Weight[0];
//}
//for (int i = 0; i < m.Count; i++)
//{
// m[i].Gradient[0] -= ss * res[i].Weight[0];
//}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix weightRows(WeightMatrix m1, WeightMatrix weightRow)
{
var res = new WeightMatrix(m1.Rows, m1.Columns, 0);
for (int x = 0; x < m1.Rows; x++)
{
for (int y = 0; y < m1.Columns; y++)
{
var ix = ((m1.Columns * x) + y);
res.Weight[ix] = m1.Weight[ix] * weightRow.Weight[x];
// res.Add(x, y, m1.Get(x, y) * weightRow.Weight[x]);
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int x = 0; x < m1.Rows; x++)
{
for (int y = 0; y < m1.Columns; y++)
{
var ix = ((m1.Columns * x) + y);
m1.Gradient[ix] += weightRow.Weight[x] * res.Gradient[ix];
weightRow.Gradient[x] += m1.Weight[ix] * res.Gradient[ix];
}
}
//for (int i = 0, n = m1.Weight.Length; i < n; i++)
//{
// m1.Gradient[i] += weightRow.Weight[0] * res.Gradient[i];
// weightRow.Gradient[0] += m1.Weight[i] * res.Gradient[i];
//}
};
this.backprop.Add(backward);
}
return res;
}
public WeightMatrix sumColumns(WeightMatrix m1)
{
var res = new WeightMatrix(1, m1.Columns, 0);
for (int x = 0; x < m1.Rows; x++)
{
for (int y = 0; y < m1.Columns; y++)
{
//var ix = ((m1.Columns * x) + y);
//res.Weight[y] += m1.Weight[ix] ;
// res.Add(x, y, m1.Get(x, y) * weightRow.Weight[x]);
res.Add(0, y, m1.Get(x, y));
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int x = 0; x < m1.Rows; x++)
{
for (int y = 0; y < m1.Columns; y++)
{
//var ix = ((m1.Columns * x) + y);
//m1.Gradient[ix] += res.Gradient[y];
m1.Add_Grad(x, y, res.Get_Grad(0, y));
}
}
//for (int i = 0, n = m1.Weight.Length; i < n; i++)
//{
// m1.Gradient[i] += weightRow.Weight[0] * res.Gradient[i];
// weightRow.Gradient[0] += m1.Weight[i] * res.Gradient[i];
//}
};
this.backprop.Add(backward);
}
return res;
}
public List<WeightMatrix> Softmax(List<WeightMatrix> m)
{
var res = new List<WeightMatrix>(); // probability volume
var maxval = -999999.0;
for (int i = 0, n = m.Count; i < n; i++)
{
if (m[i].Weight[0] > maxval) maxval = m[i].Weight[0];
res.Add(new WeightMatrix(m[i].Rows, m[i].Columns, 0));
}
var s = 0.0;
for (int i = 0, n = m.Count; i < n; i++)
{
res[i].Weight[0] = Math.Exp(m[i].Weight[0] - maxval);
s += res[i].Weight[0];
}
for (int i = 0, n = m.Count; i < n; i++) { res[i].Weight[0] /= s; }
if (this.needs_backprop)
{
Action backward = () =>
{
double ss = 0.0;
for (int i = 0; i < m.Count; i++)
{
m[i].Gradient[0] += res[i].Gradient[0] * res[i].Weight[0];
ss += res[i].Gradient[0] * res[i].Weight[0];
}
for (int i = 0; i < m.Count; i++)
{
m[i].Gradient[0] -= ss * res[i].Weight[0];
}
};
this.backprop.Add(backward);
}
return res;
}
public void backward()
{
for (var i = this.backprop.Count - 1; i >= 0; i--)
{
this.backprop[i](); // tick!
}
}
public WeightMatrix repeatSX(WeightMatrix m, int p)
{
var res = new WeightMatrix(p, m.Columns, 0);
for (int i = 0; i < p; i++)
{
for (int j = 0; j < m.Columns; j++)
{
res.Set(i, j, m.Get(0, j));
//res.Set_Grad(i, j, 0, m.Get_Grad(0, j, 0));
}
}
if (this.needs_backprop)
{
Action backward = () =>
{
for (int i = 0; i < p; i++)
{
for (int j = 0; j < m.Columns; j++)
{
m.Add_Grad(0, j, res.Get_Grad(i, j));
}
}
};
this.backprop.Add(backward);
}
return res;
}
}
public static class Matrix
{
public static void MultiplyScalar(double[][] A, double c)
{
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
A[i][j] *= c;
}
}
}
public static void SubtractMatrix(double[][] A, double[][] c)
{
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
A[i][j] -= c[i][j];
}
}
}
public static double[][] CreateCSRMatrix(double[] data, double[] rows, double[] columns, int rowcount)
{
double[][] csr = MatrixCreate(rowcount, (int)columns.Max() + 1);
for (int i = 0; i < data.Length; i++)
{
int row = (int)rows[i];
if (row == 10)
{
row = 0;
}
csr[row][(int)columns[i]] = data[i];
}
return csr;
}
public static double[][] CreateCSRMatrix(double[] data, double[] rows, double[] columns)
{
double[][] csr = MatrixCreate((int)rows.Max() + 1, (int)columns.Max() + 1);
for (int i = 0; i < data.Length; i++)
{
int row = (int)rows[i];
if (row == 10)
{
row = 0;
}
csr[row][(int)columns[i]] = data[i];
}
return csr;
}
public static double[] RangeArray(double m)
{
double[] csr = new double[(int)m];
for (int i = 0; i < m; i++)
{
csr[i] = i;
}
return csr;
}
public static double[] Ones(double m)
{
double[] csr = new double[(int)m];
for (int i = 0; i < m; i++)
{
csr[i] = 1;
}
return csr;
}
public static double[][] Korn(double[][] A, double[][] B)
{
double[][] rotate = Matrix.MatrixCreate(A.Length * B.Length, A[0].Length * B[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
var element = A[i][j];
var newInd = i * B.Length;
var newIndj = j * B[0].Length;
for (int ii = newInd; ii < B.Length + newInd; ii++)
{
for (int jj = newIndj; jj < B[0].Length + newIndj; jj++)
{
rotate[ii][jj] = A[i][j] * B[ii - newInd][jj - newIndj];
}
}
}
}
return rotate;
}
public static double[] Scale(this double[] arr, double min, double max)
{
double m = (max - min) / (arr.Max() - arr.Min());
double c = min - arr.Min() * m;
var newarr = new double[arr.Length];
for (int i = 0; i < newarr.Length; i++)
newarr[i] = m * arr[i] + c;
return newarr;
}
public static double[][] Whitening(double[][] X, double epsilon)
{
var avg = X.Mean(1);
X = X.Subtract(avg.repmat(X.Length).Transpose());
//sigma = x * x' / size(x, 2);
var sigma = X.Multiply(X.Transpose()).Divide(X[0].Length);
//[U,S,V] = svd(sigma);
SingularValueDecomposition svd = new SingularValueDecomposition(sigma);
var U = svd.getU();
var S = svd.getS();
var xRot = U.Transpose().Multiply(X);
//xPCAwhite = diag(1./sqrt(diag(S) + epsilon)) * U' * x;
var xPCAwhite = (1.0d).Divide(S.Diagonal().Add(epsilon).Sqrt()).Diagonal().Multiply(U.Transpose()).Multiply(X);
var xZCAwhite2 = U.Multiply((1.0d).Divide(S.Diagonal().Add(epsilon).Sqrt()).Diagonal().Multiply(U.Transpose()));
var xZCAwhite = xZCAwhite2.Multiply(X);
return xZCAwhite;
}
public static double[][] PCA(double[][] X, int epsilon)
{
var avg = X.Mean(1);
X = X.Subtract(avg.repmat(X.Length).Transpose());
//sigma = x * x' / size(x, 2);
var sigma = X.Multiply(X.Transpose()).Divide(X[0].Length);
//[U,S,V] = svd(sigma);
SingularValueDecomposition svd = new SingularValueDecomposition(sigma);
var U = svd.getU();
var S = svd.getS();
List<int> indcies = new List<int>();
for (int i = 0; i < 60; i++)
{
indcies.Add(i);
}
var xRot = U.GetColumns(indcies.ToArray()).Transpose().GetColumns(indcies.ToArray()).Multiply(X);
//xPCAwhite = diag(1./sqrt(diag(S) + epsilon)) * U' * x;
// var xPCAwhite = (1.0d).Divide(S.Diagonal().Add(epsilon).Sqrt()).Diagonal().Multiply(U.Transpose()).Multiply(X);
// var xZCAwhite2 = U.Multiply((1.0d).Divide(S.Diagonal().Add(epsilon).Sqrt()).Diagonal().Multiply(U.Transpose()));
// var xZCAwhite = xZCAwhite2.Multiply(X);
return xRot.Transpose();
}
private static double[][] repmat(this double[] b1, int m)
{
double[][] r = new double[m][];
for (int i = 0; i < m; i++)
{
r[i] = b1;
}
return r.Transpose();
}
public static double[][] Diagonal(this double[] A)
{
double[][] means = new double[A.Length][];
for (int i = 0; i < A.Length; i++)
{
means[i] = new double[A.Length];
means[i][i] = A[i];
}
return means;
}
public static double[][] Reshape(this double[] A, int x, int y)
{
double[][] means = MatrixCreate(x, y);
for (int i = 0; i < x; i++)
{
for (int j = 0; j < y; j++)
{
means[i][j] = A[i * x + j];
}
}
return means;
}
public static double[] Diagonal(this double[][] A)
{
double[] means = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
means[i] = A[i][i];
}
return means;
}
public static double Max(this double[][] A)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i].Max();
}
return C.Max();
}
public static double[][] Sqrt(this double[][] A)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = Math.Sqrt(A[i][j]);
}
}
return C;
}
public static double[][] Abs(this double[][] A)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = Math.Abs(A[i][j]);
}
}
return C;
}
public static double[] Sqrt(this double[] A)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = Math.Sqrt(A[i]);
}
return C;
}
public static double Variance(this double[] A)
{
double average = A.Average();
double sumOfSquaresOfDifferences = A.Select(val => (val - average) * (val - average)).Sum();
double sd = Math.Sqrt(sumOfSquaresOfDifferences / A.Length);
return sd * sd;
}
public static double[] Mean(this double[][] A)
{
double[] means = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
means[i] += A[i][j];
}
}
for (int i = 0; i < means.Length; i++)
{
means[i] /= A.Length;
}
return means;
}
public static double[] Mean(this double[][] A, int dim)
{
if (dim == 0)
{
return A.Mean();
}
else
{
double[] means = new double[A[0].Length];
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
means[j] += A[i][j];
}
}
for (int i = 0; i < means.Length; i++)
{
means[i] /= A[0].Length;
}
return means;
}
}
public static double[] ElementwisePower(this double[] A, int pow)
{
double[] res = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
res[i] = Math.Pow(A[i], pow);
}
return res;
}
public static double Mean(this double[] A)
{
double average = A.Average();
return average;
}
public static double StandardDeviation(this double[] A)
{
double average = A.Average();
double sumOfSquaresOfDifferences = A.Select(val => (val - average) * (val - average)).Sum();
double sd = Math.Sqrt(sumOfSquaresOfDifferences / A.Length);
return sd;
}
public static double StandardDeviation(this double[] A, double average)
{
double sumOfSquaresOfDifferences = A.Select(val => (val - average) * (val - average)).Sum();
double sd = Math.Sqrt(sumOfSquaresOfDifferences / A.Length);
return sd;
}
public static double[] Sum(this double[][] A, int c)
{
double[] C;
if (c == 0)
{
C = new double[A[0].Length];
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[j] += A[i][j];
}
}
return C;
}
else
{
C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i] += A[i][j];
}
}
return C;
}
}
public static double[] Sum(this double[][] A)
{
double[] C;
C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i] += A[i][j];
}
}
return C;
}
public static double[][] Log(this double[][] A)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = Math.Log(A[i][j]);
}
}
return C;
}
public static double[][] Exp(this double[][] A)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = Math.Exp(A[i][j]);
}
}
return C;
}
public static double[][] Divide(this double c, double[][] A)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = c / A[i][j];
}
}
return C;
}
public static double[][] Subtract(this double c, double[][] A)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = c - A[i][j];
}
}
return C;
}
public static double[] Divide(this double c, double[] A)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = c / A[i];
}
return C;
}
public static double[][] ElementwiseDivide(this double[][] A, double[] b, int dimension = 0, bool inPlace = false)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
if (dimension == 0)
{
C[i][j] = A[i][j] / b[i];
}
else
{
C[i][j] = A[i][j] / b[j];
}
}
}
return C;
}
public static double[][] ElementwiseMultiply(this double[][] A, double[][] c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] * c[i][j];
}
}
return C;
}
public static double[][] ElementwiseDivide(this double[][] A, double[][] c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] / c[i][j];
}
}
return C;
}
public static double[][] Add(this double[][] A, double c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] + c;
}
}
return C;
}
public static double[][] Divide(this double[][] A, double c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] / c;
}
}
return C;
}
public static double[][] Subtract(this double[][] A, double[][] c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] - c[i][j];
}
}
return C;
}
public static double[][] Subtract(this double[][] A, double c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] - c;
}
}
return C;
}
public static double[][] Subtract(this double[][] A, double[] c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
if (c.Length == A[0].Length)
{
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] - c[j];
}
}
}
else
{
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] - c[i];
}
}
}
return C;
}
public static double[][] Add(this double[][] A, double[] c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] + c[j];
}
}
return C;
}
public static double[] Subtract(this double[] A, double c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] - c;
}
return C;
}
public static double[][] Transpose(this double[][] A)
{
double[][] C = MatrixCreate(A[0].Length, A.Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[j][i] = A[i][j];
}
}
return C;
}
public static double[] Subtract(this double c, double[] A)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] - c;
}
return C;
}
public static double[] Subtract(this double[] A, double[] c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] - c[i];
}
return C;
}
public static double[] Add(this double[] A, double[] c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] + c[i];
}
return C;
}
public static double[] Add(this double[] A, double c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] + c;
}
return C;
}
public static double[] Divide(this double[] A, double c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] / c;
}
return C;
}
public static double[] ElementwiseDivide(this double[] A, double[] c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] / c[i];
}
return C;
}
public static double[][] Multiply(this double[][] A, double[][] B)
{
double[][] C = MatrixCreate(A.Length, B[0].Length);
var source = Enumerable.Range(0, A.Length);
var pquery = from num in source.AsParallel()
select num;
pquery.ForAll((e) => MultiplyKernel(A, B, C, e));
return C;
}
public static double[][] Multiply(this double[][] A, double c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] * c;
}
}
return C;
}
public static double[][] Multiply(this double c, double[][] A)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] * c;
}
}
return C;
}
public static double[] Multiply(this double c, double[] A)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] * c;
}
return C;
}
public static double[][] Pow(this double[][] A, int d)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = Math.Pow(A[i][j], d);
}
}
return C;
}
public static double[] Pow(this double[] A, int d)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = Math.Pow(A[i], d);
}
return C;
}
public static double[] Multiply(this double[] A, double c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i] * c;
}
return C;
}
static void MultiplyKernel(double[][] A, double[][] B, double[][] C, int i)
{
double[] iRowA = A[i];
double[] iRowC = C[i];
for (int k = 0; k < A[0].Length; k++)
{
double[] kRowB = B[k];
double ikA = iRowA[k];
for (int j = 0; j < B[0].Length; j++)
{
iRowC[j] += ikA * kRowB[j];
}
}
}
public static double[][] Add(this double[][] A, double[][] c)
{
double[][] C = MatrixCreate(A.Length, A[0].Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
C[i][j] = A[i][j] + c[i][j];
}
}
return C;
}
private static Random rand = new Random(); //reuse this if you are generating many
public static double[][] RandomN(int rows, int cols, double min, double max)
{
// do error checking here
double[][] result = new double[rows][];
for (int i = 0; i < rows; ++i)
{
result[i] = new double[cols];
for (int j = 0; j < cols; j++)
{
double mean = 0;
double stdDev = Math.Pow(10, -4);
double u1 = rand.NextDouble(); //these are uniform(0,1) random doubles
double u2 = rand.NextDouble();
double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) *
Math.Sin(2.0 * Math.PI * u2); //random normal(0,1)
double randNormal =
mean + stdDev * randStdNormal; //random normal(mean,stdDev^2)
result[i][j] = randNormal;
}
}
return result;
}
public static double[][] MatrixCreate(int rows, int cols)
{
// do error checking here
double[][] result = new double[rows][];
for (int i = 0; i < rows; ++i)
result[i] = new double[cols];
return result;
}
static Random rng = new Random(3);
public static double[][] Random(int rows, int cols, double min, double max)
{
// do error checking here
double[][] result = new double[rows][];
for (int i = 0; i < rows; ++i)
{
result[i] = new double[cols];
for (int j = 0; j < cols; j++)
{
result[i][j] = min + (rng.NextDouble() * (max - min));
}
}
return result;
}
public static double Norm2(this double[][] A)
{
double norm = 0.0;
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < A[0].Length; j++)
{
norm += A[i][j] * A[i][j];
}
}
return (double)Math.Sqrt(norm);
}
public static double Norm2(this double[] A)
{
double norm = 0.0;
for (int i = 0; i < A.Length; i++)
{
norm += A[i] * A[i];
}
return (double)Math.Sqrt(norm);
}
public static double[][] GetColumns(this double[][] A, int[] c)
{
double[][] C = MatrixCreate(A.Length, c.Length);
for (int i = 0; i < A.Length; i++)
{
for (int j = 0; j < c.Length; j++)
{
C[i][j] = A[i][c[j]];
}
}
return C;
}
public static double[] GetColumn(this double[][] A, int c)
{
double[] C = new double[A.Length];
for (int i = 0; i < A.Length; i++)
{
C[i] = A[i][c];
}
return C;
}
public static double[] GetRow(this double[][] A, int c)
{
double[] C = new double[A[0].Length];
for (int i = 0; i < C.Length; i++)
{
C[i] = A[c][i];
}
return C;
}
public static double[] Abs(this double[] A)
{
double[] C = new double[A.Length];
for (int i = 0; i < C.Length; i++)
{
C[i] = Math.Abs(A[i]);
}
return C;
}
public static double[] Ones(int n)
{
double[] res = new double[n];
for (int i = 0; i < n; i++)
{
res[i] = 1;
}
return res;
}
public static double[][] Ones(int n, int m)
{
double[][] res = MatrixCreate(n, m);
for (int i = 0; i < n; i++)
{
for (int j = 0; j < m; j++)
{
res[i][j] = 1;
}
}
return res;
}
public static double[] Zeros(int n)
{
double[] res = new double[n];
for (int i = 0; i < n; i++)
{
res[i] = 0;
}
return res;
}
public static double[][] Zeros(int n, int m)
{
double[][] res = MatrixCreate(n, m);
//for (int i = 0; i < n; i++)
//{
// for (int j = 0; j < m; j++)
// {
// res[i][j] = 0;
// }
//}
return res;
}
public static double Get(this double[][] A, int x, int y)
{
return A[x][y];
}
public static void Set(this double[][] A, int x, int y, double val)
{
A[x][y] = val;
}
public static double[][] Rot90(this double[][] start)
{
double[][] rotate = Matrix.MatrixCreate(start[0].Length, start.Length);
for (int i = 0; i < start.Length; i++)
{
int ith = start[0].Length - 1;
for (int j = 0; j < start[0].Length; j++)
{
rotate[ith][i] = start[i][j];
ith--;
}
}
return rotate;
}
public static double[][] Rot90(this double[][] start, int num)
{
double[][] res = start;
for (int i = 0; i < num; i++)
{
res = Rot90(res);
}
return res;
}
public static double[][] Copy(this double[][] copy)
{
double[][] res = MatrixCreate(copy.Length, copy[0].Length);
for (int i = 0; i < res.Length; i++)
{
for (int j = 0; j < res[0].Length; j++)
{
res[i][j] = copy[i][j];
}
}
return res;
}
public static double[][] ConcatColumnVector(this double[][] first, double[][] second)
{
double[][] res = MatrixCreate(1, first[0].Length + second[0].Length);
for (int i = 0; i < first[0].Length; i++)
{
res[0][i] = first[0][i];
}
for (int i = first[0].Length; i < second[0].Length; i++)
{
res[0][i] = second[0][i - first[0].Length];
}
return res;
}
public static Tuple<double[][], double[][]> SplitColumnVector(this double[][] res, int firstlen)
{
double[][] first = MatrixCreate(1, firstlen);
double[][] second = MatrixCreate(1, res[0].Length - firstlen);
for (int i = 0; i < first[0].Length; i++)
{
first[0][i] = res[0][i];
}
for (int i = first[0].Length; i < second[0].Length; i++)
{
second[0][i - first[0].Length] = res[0][i];
}
return new Tuple<double[][], double[][]>(first, second);
}
}
[Serializable]
public class WeightMatrix
{
public int Rows { get; set; }
public int Columns { get; set; }
public double[] Weight { get; set; }
public double[] Gradient { get; set; }
public double[] Cash { get; set; }
public WeightMatrix()
{
}
public WeightMatrix(double[] weights)
{
this.Rows = weights.Length;
this.Columns = 1;
this.Weight = new double[this.Rows];
this.Gradient = new double[this.Rows];
this.Cash = new double[this.Rows];
this.Weight = weights;
}
public WeightMatrix(int rows, int columns, bool normal = false)
{
this.Rows = rows;
this.Columns = columns;
var n = rows * columns;
this.Weight = new double[n];
this.Gradient = new double[n];
this.Cash = new double[n];
var scale = Math.Sqrt(1.0 / (rows * columns));
if (normal)
{
scale = 0.08;
}
for (int i = 0; i < n; i++)
{
this.Weight[i] = RandomGenerator.NormalRandom(0.0, scale);
}
}
public WeightMatrix(int rows, int columns, double c)
{
this.Rows = rows;
this.Columns = columns;
var n = rows * columns;
this.Weight = new double[n];
this.Gradient = new double[n];
this.Cash = new double[n];
for (int i = 0; i < n; i++)
{
this.Weight[i] = c;
}
}
public override string ToString()
{
return "{" + Rows.ToString() + "," + Columns.ToString() + "}";
}
public double Get(int x, int y)
{
var ix = ((this.Columns * x) + y);
return this.Weight[ix];
}
public void Set(int x, int y, double v)
{
var ix = ((this.Columns * x) + y);
this.Weight[ix] = v;
}
public void Add(int x, int y, double v)
{
var ix = ((this.Columns * x) + y);
this.Weight[ix] += v;
}
public double Get_Grad(int x, int y)
{
var ix = ((this.Columns * x) + y);
return this.Gradient[ix];
}
public void Set_Grad(int x, int y, double v)
{
var ix = ((this.Columns * x) + y);
this.Gradient[ix] = v;
}
public void Add_Grad(int x, int y, double v)
{
var ix = ((this.Columns * x) + y);
this.Gradient[ix] += v;
}
public WeightMatrix CloneAndZero()
{
return new WeightMatrix(this.Rows, this.Columns, 0.0);
}
public WeightMatrix Clone()
{
var v = new WeightMatrix(this.Rows, this.Columns, 0.0);
var n = this.Weight.Length;
for (int i = 0; i < n; i++)
{
v.Weight[i] = this.Weight[i];
}
return v;
}
}
[Serializable]
public static class RandomGenerator
{
public static bool Return_V { get; set; }
public static double V_Val { get; set; }
private static Random random = new Random(3);
public static double GaussRandom()
{
if (Return_V)
{
Return_V = false;
return V_Val;
}
var u = 2 * random.NextDouble() - 1;
var v = 2 * random.NextDouble() - 1;
var r = (u * u) + (v * v);
if (r == 0 || r > 1) return GaussRandom();
var c = Math.Sqrt(-2 * Math.Log(r) / r);
V_Val = v * c;
Return_V = true;
return u * c;
}
public static double FloatRandom(double a, double b)
{
return random.NextDouble() * (b - a) + a;
}
public static double IntegarRandom(double a, double b)
{
return Math.Floor(random.NextDouble() * (b - a) + a);
}
public static double NormalRandom(double mu, double std)
{
return mu + GaussRandom() * std;
}
}
public class SingularValueDecomposition
{
/* ------------------------
Class variables
* ------------------------ */
/** Arrays for internal storage of U and V.
@serial internal storage of U.
@serial internal storage of V.
*/
private double[][] U, V;
/** Array for internal storage of singular values.
@serial internal storage of singular values.
*/
private double[] s;
/** Row and column dimensions.
@serial row dimension.
@serial column dimension.
*/
private int m, n;
/* ------------------------
Constructor
* ------------------------ */
/** Construct the singular value decomposition
@param A Rectangular matrix
@return Structure to access U, S and V.
*/
public SingularValueDecomposition(double[][] Arg)
{
// Derived from LINPACK code.
// Initialize.
double[][] A = Arg;
m = Arg.Length;
n = Arg[0].Length;
int nu = Math.Max(m, n);
s = new double[Math.Min(m + 1, n)];
U = new double[m][];
for (int i = 0; i < m; i++)
{
U[i] = new double[nu];
}
V = new double[n][];
for (int i = 0; i < n; i++)
{
V[i] = new double[n];
}
double[] e = new double[n];
double[] work = new double[m];
Boolean wantu = true;
Boolean wantv = true;
// Reduce A to bidiagonal form, storing the diagonal elements
// in s and the super-diagonal elements in e.
int nct = Math.Min(m - 1, n);
int nrt = Math.Max(0, Math.Min(n - 2, m));
for (int k = 0; k < Math.Max(nct, nrt); k++)
{
if (k < nct)
{
// Compute the transformation for the k-th column and
// place the k-th diagonal in s[k].
// Compute 2-norm of k-th column without under/overflow.
s[k] = 0;
for (int i = k; i < m; i++)
{
s[k] = MathTools.hypot(s[k], A[i][k]);
}
if (s[k] != 0.0)
{
if (A[k][k] < 0.0)
{
s[k] = -s[k];
}
for (int i = k; i < m; i++)
{
A[i][k] /= s[k];
}
A[k][k] += 1.0;
}
s[k] = -s[k];
}
for (int j = k + 1; j < n; j++)
{
if ((k < nct) & (s[k] != 0.0))
{
// Apply the transformation.
double t = 0;
for (int i = k; i < m; i++)
{
t += A[i][k] * A[i][j];
}
t = -t / A[k][k];
for (int i = k; i < m; i++)
{
A[i][j] += t * A[i][k];
}
}
// Place the k-th row of A into e for the
// subsequent calculation of the row transformation.
e[j] = A[k][j];
}
if (wantu & (k < nct))
{
// Place the transformation in U for subsequent back
// multiplication.
for (int i = k; i < m; i++)
{
U[i][k] = A[i][k];
}
}
if (k < nrt)
{
// Compute the k-th row transformation and place the
// k-th super-diagonal in e[k].
// Compute 2-norm without under/overflow.
e[k] = 0;
for (int i = k + 1; i < n; i++)
{
e[k] = MathTools.hypot(e[k], e[i]);
}
if (e[k] != 0.0)
{
if (e[k + 1] < 0.0)
{
e[k] = -e[k];
}
for (int i = k + 1; i < n; i++)
{
e[i] /= e[k];
}
e[k + 1] += 1.0;
}
e[k] = -e[k];
if ((k + 1 < m) & (e[k] != 0.0))
{
// Apply the transformation.
for (int i = k + 1; i < m; i++)
{
work[i] = 0.0;
}
for (int j = k + 1; j < n; j++)
{
for (int i = k + 1; i < m; i++)
{
work[i] += e[j] * A[i][j];
}
}
for (int j = k + 1; j < n; j++)
{
double t = -e[j] / e[k + 1];
for (int i = k + 1; i < m; i++)
{
A[i][j] += t * work[i];
}
}
}
if (wantv)
{
// Place the transformation in V for subsequent
// back multiplication.
for (int i = k + 1; i < n; i++)
{
V[i][k] = e[i];
}
}
}
}
// Set up the final bidiagonal matrix or order p.
int p = Math.Min(n, m + 1);
if (nct < n)
{
s[nct] = A[nct][nct];
}
if (m < p)
{
s[p - 1] = 0.0;
}
if (nrt + 1 < p)
{
e[nrt] = A[nrt][p - 1];
}
e[p - 1] = 0.0;
// If required, generate U.
if (wantu)
{
for (int j = nct; j < nu; j++)
{
for (int i = 0; i < m; i++)
{
U[i][j] = 0.0;
}
U[j][j] = 1.0;
}
for (int k = nct - 1; k >= 0; k--)
{
if (s[k] != 0.0)
{
for (int j = k + 1; j < nu; j++)
{
double t = 0;
for (int i = k; i < m; i++)
{
t += U[i][k] * U[i][j];
}
t = -t / U[k][k];
for (int i = k; i < m; i++)
{
U[i][j] += t * U[i][k];
}
}
for (int i = k; i < m; i++)
{
U[i][k] = -U[i][k];
}
U[k][k] = 1.0 + U[k][k];
for (int i = 0; i < k - 1; i++)
{
U[i][k] = 0.0;
}
}
else
{
for (int i = 0; i < m; i++)
{
U[i][k] = 0.0;
}
U[k][k] = 1.0;
}
}
}
// If required, generate V.
if (wantv)
{
for (int k = n - 1; k >= 0; k--)
{
if ((k < nrt) & (e[k] != 0.0))
{
for (int j = k + 1; j < nu; j++)
{
double t = 0;
for (int i = k + 1; i < n; i++)
{
t += V[i][k] * V[i][j];
}
t = -t / V[k + 1][k];
for (int i = k + 1; i < n; i++)
{
V[i][j] += t * V[i][k];
}
}
}
for (int i = 0; i < n; i++)
{
V[i][k] = 0.0;
}
V[k][k] = 1.0;
}
}
// Main iteration loop for the singular values.
int pp = p - 1;
int iter = 0;
double eps = Math.Pow(2.0, -52.0);
while (p > 0)
{
int k, kase;
// Here is where a test for too many iterations would go.
// This section of the program inspects for
// negligible elements in the s and e arrays. On
// completion the variables kase and k are set as follows.
// kase = 1 if s(p) and e[k-1] are negligible and k<p
// kase = 2 if s(k) is negligible and k<p
// kase = 3 if e[k-1] is negligible, k<p, and
// s(k), ..., s(p) are not negligible (qr step).
// kase = 4 if e(p-1) is negligible (convergence).
for (k = p - 2; k >= -1; k--)
{
if (k == -1)
{
break;
}
if (Math.Abs(e[k]) <= eps * (Math.Abs(s[k]) + Math.Abs(s[k + 1])))
{
e[k] = 0.0;
break;
}
}
if (k == p - 2)
{
kase = 4;
}
else
{
int ks;
for (ks = p - 1; ks >= k; ks--)
{
if (ks == k)
{
break;
}
double t = (ks != p ? Math.Abs(e[ks]) : 0d) +
(ks != k + 1 ? Math.Abs(e[ks - 1]) : 0d);
if (Math.Abs(s[ks]) <= eps * t)
{
s[ks] = 0.0;
break;
}
}
if (ks == k)
{
kase = 3;
}
else if (ks == p - 1)
{
kase = 1;
}
else
{
kase = 2;
k = ks;
}
}
k++;
// Perform the task indicated by kase.
switch (kase)
{
// Deflate negligible s(p).
case 1:
{
double f = e[p - 2];
e[p - 2] = 0.0;
for (int j = p - 2; j >= k; j--)
{
double t = MathTools.hypot(s[j], f);
double cs = s[j] / t;
double sn = f / t;
s[j] = t;
if (j != k)
{
f = -sn * e[j - 1];
e[j - 1] = cs * e[j - 1];
}
if (wantv)
{
for (int i = 0; i < n; i++)
{
t = cs * V[i][j] + sn * V[i][p - 1];
V[i][p - 1] = -sn * V[i][j] + cs * V[i][p - 1];
V[i][j] = t;
}
}
}
}
break;
// Split at negligible s(k).
case 2:
{
double f = e[k - 1];
e[k - 1] = 0.0;
for (int j = k; j < p; j++)
{
double t = MathTools.hypot(s[j], f);
double cs = s[j] / t;
double sn = f / t;
s[j] = t;
f = -sn * e[j];
e[j] = cs * e[j];
if (wantu)
{
for (int i = 0; i < m; i++)
{
t = cs * U[i][j] + sn * U[i][k - 1];
U[i][k - 1] = -sn * U[i][j] + cs * U[i][k - 1];
U[i][j] = t;
}
}
}
}
break;
// Perform one qr step.
case 3:
{
// Calculate the shift.
double scale = Math.Max(Math.Max(Math.Max(Math.Max(
Math.Abs(s[p - 1]), Math.Abs(s[p - 2])), Math.Abs(e[p - 2])),
Math.Abs(s[k])), Math.Abs(e[k]));
double sp = s[p - 1] / scale;
double spm1 = s[p - 2] / scale;
double epm1 = e[p - 2] / scale;
double sk = s[k] / scale;
double ek = e[k] / scale;
double b = ((spm1 + sp) * (spm1 - sp) + epm1 * epm1) / 2.0;
double c = (sp * epm1) * (sp * epm1);
double shift = 0.0;
if ((b != 0.0) | (c != 0.0))
{
shift = Math.Sqrt(b * b + c);
if (b < 0.0)
{
shift = -shift;
}
shift = c / (b + shift);
}
double f = (sk + sp) * (sk - sp) + shift;
double g = sk * ek;
// Chase zeros.
for (int j = k; j < p - 1; j++)
{
double t = MathTools.hypot(f, g);
double cs = f / t;
double sn = g / t;
if (j != k)
{
e[j - 1] = t;
}
f = cs * s[j] + sn * e[j];
e[j] = cs * e[j] - sn * s[j];
g = sn * s[j + 1];
s[j + 1] = cs * s[j + 1];
if (wantv)
{
for (int i = 0; i < n; i++)
{
t = cs * V[i][j] + sn * V[i][j + 1];
V[i][j + 1] = -sn * V[i][j] + cs * V[i][j + 1];
V[i][j] = t;
}
}
t = MathTools.hypot(f, g);
cs = f / t;
sn = g / t;
s[j] = t;
f = cs * e[j] + sn * s[j + 1];
s[j + 1] = -sn * e[j] + cs * s[j + 1];
g = sn * e[j + 1];
e[j + 1] = cs * e[j + 1];
if (wantu && (j < m - 1))
{
for (int i = 0; i < m; i++)
{
t = cs * U[i][j] + sn * U[i][j + 1];
U[i][j + 1] = -sn * U[i][j] + cs * U[i][j + 1];
U[i][j] = t;
}
}
}
e[p - 2] = f;
iter = iter + 1;
}
break;
// Convergence.
case 4:
{
// Make the singular values positive.
if (s[k] <= 0.0)
{
s[k] = (s[k] < 0.0 ? -s[k] : 0.0);
if (wantv)
{
for (int i = 0; i <= pp; i++)
{
V[i][k] = -V[i][k];
}
}
}
// Order the singular values.
while (k < pp)
{
if (s[k] >= s[k + 1])
{
break;
}
double t = s[k];
s[k] = s[k + 1];
s[k + 1] = t;
if (wantv && (k < n - 1))
{
for (int i = 0; i < n; i++)
{
t = V[i][k + 1]; V[i][k + 1] = V[i][k]; V[i][k] = t;
}
}
if (wantu && (k < m - 1))
{
for (int i = 0; i < m; i++)
{
t = U[i][k + 1]; U[i][k + 1] = U[i][k]; U[i][k] = t;
}
}
k++;
}
iter = 0;
p--;
}
break;
}
}
}
/* ------------------------
Public Methods
* ------------------------ */
/** Return the left singular vectors
@return U
*/
public double[][] getU()
{
return U;
}
/** Return the right singular vectors
@return V
*/
public double[][] getV()
{
return V;
}
/** Return the one-dimensional array of singular values
@return diagonal of S.
*/
public double[] getSingularValues()
{
return s;
}
/** Return the diagonal matrix of singular values
@return S
*/
public double[][] getS()
{
double[][] X = new double[n][];
for (int i = 0; i < n; i++)
{
X[i] = new double[n];
}
double[][] S = X;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
S[i][j] = 0.0;
}
S[i][i] = this.s[i];
}
return X;
}
public double[][] getDiagonal()
{
double[][] X = new double[n][];
for (int i = 0; i < n; i++)
{
X[i] = new double[n];
}
double[][] S = X;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
S[i][j] = 0.0;
}
S[i][i] = this.s[i];
}
return X;
}
/** Two norm
@return max(S)
*/
public double norm2()
{
return s[0];
}
/** Two norm condition number
@return max(S)/min(S)
*/
public double cond()
{
return s[0] / s[Math.Min(m, n) - 1];
}
/** Effective numerical matrix rank
@return Number of nonnegligible singular values.
*/
public int rank()
{
double eps = Math.Pow(2.0, -52.0);
double tol = Math.Max(m, n) * s[0] * eps;
int r = 0;
for (int i = 0; i < s.Length; i++)
{
if (s[i] > tol)
{
r++;
}
}
return r;
}
}
public class EigenvalueDecomposition
{
/* ------------------------
Class variables
* ------------------------ */
/** Row and column dimension (square matrix).
@serial matrix dimension.
*/
private int n;
/** Symmetry flag.
@serial internal symmetry flag.
*/
private Boolean issymmetric;
/** Arrays for internal storage of eigenvalues.
@serial internal storage of eigenvalues.
*/
private double[] d, e;
/** Array for internal storage of eigenvectors.
@serial internal storage of eigenvectors.
*/
private double[][] V;
/** Array for internal storage of nonsymmetric Hessenberg form.
@serial internal storage of nonsymmetric Hessenberg form.
*/
private double[][] H;
/** Working storage for nonsymmetric algorithm.
@serial working storage for nonsymmetric algorithm.
*/
private double[] ort;
/* ------------------------
Private Methods
* ------------------------ */
// Symmetric Householder reduction to tridiagonal form.
private void tred2()
{
// This is derived from the Algol procedures tred2 by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
for (int j = 0; j < n; j++)
{
d[j] = V[n - 1][j];
}
// Householder reduction to tridiagonal form.
for (int i = n - 1; i > 0; i--)
{
// Scale to avoid under/overflow.
double scale = 0.0;
double h = 0.0;
for (int k = 0; k < i; k++)
{
scale = scale + Math.Abs(d[k]);
}
if (scale == 0.0)
{
e[i] = d[i - 1];
for (int j = 0; j < i; j++)
{
d[j] = V[i - 1][j];
V[i][j] = 0.0;
V[j][i] = 0.0;
}
}
else
{
// Generate Householder vector.
for (int k = 0; k < i; k++)
{
d[k] /= scale;
h += d[k] * d[k];
}
double f = d[i - 1];
double g = Math.Sqrt(h);
if (f > 0)
{
g = -g;
}
e[i] = scale * g;
h = h - f * g;
d[i - 1] = f - g;
for (int j = 0; j < i; j++)
{
e[j] = 0.0;
}
// Apply similarity transformation to remaining columns.
for (int j = 0; j < i; j++)
{
f = d[j];
V[j][i] = f;
g = e[j] + V[j][j] * f;
for (int k = j + 1; k <= i - 1; k++)
{
g += V[k][j] * d[k];
e[k] += V[k][j] * f;
}
e[j] = g;
}
f = 0.0;
for (int j = 0; j < i; j++)
{
e[j] /= h;
f += e[j] * d[j];
}
double hh = f / (h + h);
for (int j = 0; j < i; j++)
{
e[j] -= hh * d[j];
}
for (int j = 0; j < i; j++)
{
f = d[j];
g = e[j];
for (int k = j; k <= i - 1; k++)
{
V[k][j] -= (f * e[k] + g * d[k]);
}
d[j] = V[i - 1][j];
V[i][j] = 0.0;
}
}
d[i] = h;
}
// Accumulate transformations.
for (int i = 0; i < n - 1; i++)
{
V[n - 1][i] = V[i][i];
V[i][i] = 1.0;
double h = d[i + 1];
if (h != 0.0)
{
for (int k = 0; k <= i; k++)
{
d[k] = V[k][i + 1] / h;
}
for (int j = 0; j <= i; j++)
{
double g = 0.0;
for (int k = 0; k <= i; k++)
{
g += V[k][i + 1] * V[k][j];
}
for (int k = 0; k <= i; k++)
{
V[k][j] -= g * d[k];
}
}
}
for (int k = 0; k <= i; k++)
{
V[k][i + 1] = 0.0;
}
}
for (int j = 0; j < n; j++)
{
d[j] = V[n - 1][j];
V[n - 1][j] = 0.0;
}
V[n - 1][n - 1] = 1.0;
e[0] = 0.0;
}
// Symmetric tridiagonal QL algorithm.
private void tql2()
{
// This is derived from the Algol procedures tql2, by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
for (int i = 1; i < n; i++)
{
e[i - 1] = e[i];
}
e[n - 1] = 0.0;
double f = 0.0;
double tst1 = 0.0;
double eps = Math.Pow(2.0, -52.0);
for (int l = 0; l < n; l++)
{
// Find small subdiagonal element
tst1 = Math.Max(tst1, Math.Abs(d[l]) + Math.Abs(e[l]));
int m = l;
while (m < n)
{
if (Math.Abs(e[m]) <= eps * tst1)
{
break;
}
m++;
}
// If m == l, d[l] is an eigenvalue,
// otherwise, iterate.
if (m > l)
{
int iter = 0;
do
{
iter = iter + 1; // (Could check iteration count here.)
// Compute implicit shift
double g = d[l];
double p = (d[l + 1] - g) / (2.0 * e[l]);
double r = MathTools.hypot(p, 1.0);
if (p < 0)
{
r = -r;
}
d[l] = e[l] / (p + r);
d[l + 1] = e[l] * (p + r);
double dl1 = d[l + 1];
double h = g - d[l];
for (int i = l + 2; i < n; i++)
{
d[i] -= h;
}
f = f + h;
// Implicit QL transformation.
p = d[m];
double c = 1.0;
double c2 = c;
double c3 = c;
double el1 = e[l + 1];
double s = 0.0;
double s2 = 0.0;
for (int i = m - 1; i >= l; i--)
{
c3 = c2;
c2 = c;
s2 = s;
g = c * e[i];
h = c * p;
r = MathTools.hypot(p, e[i]);
e[i + 1] = s * r;
s = e[i] / r;
c = p / r;
p = c * d[i] - s * g;
d[i + 1] = h + s * (c * g + s * d[i]);
// Accumulate transformation.
for (int k = 0; k < n; k++)
{
h = V[k][i + 1];
V[k][i + 1] = s * V[k][i] + c * h;
V[k][i] = c * V[k][i] - s * h;
}
}
p = -s * s2 * c3 * el1 * e[l] / dl1;
e[l] = s * p;
d[l] = c * p;
// Check for convergence.
} while (Math.Abs(e[l]) > eps * tst1);
}
d[l] = d[l] + f;
e[l] = 0.0;
}
// Sort eigenvalues and corresponding vectors.
for (int i = 0; i < n - 1; i++)
{
int k = i;
double p = d[i];
for (int j = i + 1; j < n; j++)
{
if (d[j] < p)
{
k = j;
p = d[j];
}
}
if (k != i)
{
d[k] = d[i];
d[i] = p;
for (int j = 0; j < n; j++)
{
p = V[j][i];
V[j][i] = V[j][k];
V[j][k] = p;
}
}
}
}
// Nonsymmetric reduction to Hessenberg form.
private void orthes()
{
// This is derived from the Algol procedures orthes and ortran,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutines in EISPACK.
int low = 0;
int high = n - 1;
for (int m = low + 1; m <= high - 1; m++)
{
// Scale column.
double scale = 0.0;
for (int i = m; i <= high; i++)
{
scale = scale + Math.Abs(H[i][m - 1]);
}
if (scale != 0.0)
{
// Compute Householder transformation.
double h = 0.0;
for (int i = high; i >= m; i--)
{
ort[i] = H[i][m - 1] / scale;
h += ort[i] * ort[i];
}
double g = Math.Sqrt(h);
if (ort[m] > 0)
{
g = -g;
}
h = h - ort[m] * g;
ort[m] = ort[m] - g;
// Apply Householder similarity transformation
// H = (I-u*u'/h)*H*(I-u*u')/h)
for (int j = m; j < n; j++)
{
double f = 0.0;
for (int i = high; i >= m; i--)
{
f += ort[i] * H[i][j];
}
f = f / h;
for (int i = m; i <= high; i++)
{
H[i][j] -= f * ort[i];
}
}
for (int i = 0; i <= high; i++)
{
double f = 0.0;
for (int j = high; j >= m; j--)
{
f += ort[j] * H[i][j];
}
f = f / h;
for (int j = m; j <= high; j++)
{
H[i][j] -= f * ort[j];
}
}
ort[m] = scale * ort[m];
H[m][m - 1] = scale * g;
}
}
// Accumulate transformations (Algol's ortran).
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
V[i][j] = (i == j ? 1.0 : 0.0);
}
}
for (int m = high - 1; m >= low + 1; m--)
{
if (H[m][m - 1] != 0.0)
{
for (int i = m + 1; i <= high; i++)
{
ort[i] = H[i][m - 1];
}
for (int j = m; j <= high; j++)
{
double g = 0.0;
for (int i = m; i <= high; i++)
{
g += ort[i] * V[i][j];
}
// Double division avoids possible underflow
g = (g / ort[m]) / H[m][m - 1];
for (int i = m; i <= high; i++)
{
V[i][j] += g * ort[i];
}
}
}
}
}
// Complex scalar division.
private double cdivr, cdivi;
private void cdiv(double xr, double xi, double yr, double yi)
{
double r, d;
if (Math.Abs(yr) > Math.Abs(yi))
{
r = yi / yr;
d = yr + r * yi;
cdivr = (xr + r * xi) / d;
cdivi = (xi - r * xr) / d;
}
else
{
r = yr / yi;
d = yi + r * yr;
cdivr = (r * xr + xi) / d;
cdivi = (r * xi - xr) / d;
}
}
// Nonsymmetric reduction from Hessenberg to real Schur form.
private void hqr2()
{
// This is derived from the Algol procedure hqr2,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
// Initialize
int nn = this.n;
int n = nn - 1;
int low = 0;
int high = nn - 1;
double eps = Math.Pow(2.0, -52.0);
double exshift = 0.0;
double p = 0, q = 0, r = 0, s = 0, z = 0, t, w, x, y;
// Store roots isolated by balanc and compute matrix norm
double norm = 0.0;
for (int i = 0; i < nn; i++)
{
if (i < low | i > high)
{
d[i] = H[i][i];
e[i] = 0.0;
}
for (int j = Math.Max(i - 1, 0); j < nn; j++)
{
norm = norm + Math.Abs(H[i][j]);
}
}
// Outer loop over eigenvalue index
int iter = 0;
while (n >= low)
{
// Look for single small sub-diagonal element
int l = n;
while (l > low)
{
s = Math.Abs(H[l - 1][l - 1]) + Math.Abs(H[l][l]);
if (s == 0.0)
{
s = norm;
}
if (Math.Abs(H[l][l - 1]) < eps * s)
{
break;
}
l--;
}
// Check for convergence
// One root found
if (l == n)
{
H[n][n] = H[n][n] + exshift;
d[n] = H[n][n];
e[n] = 0.0;
n--;
iter = 0;
// Two roots found
}
else if (l == n - 1)
{
w = H[n][n - 1] * H[n - 1][n];
p = (H[n - 1][n - 1] - H[n][n]) / 2.0;
q = p * p + w;
z = Math.Sqrt(Math.Abs(q));
H[n][n] = H[n][n] + exshift;
H[n - 1][n - 1] = H[n - 1][n - 1] + exshift;
x = H[n][n];
// Real pair
if (q >= 0)
{
if (p >= 0)
{
z = p + z;
}
else
{
z = p - z;
}
d[n - 1] = x + z;
d[n] = d[n - 1];
if (z != 0.0)
{
d[n] = x - w / z;
}
e[n - 1] = 0.0;
e[n] = 0.0;
x = H[n][n - 1];
s = Math.Abs(x) + Math.Abs(z);
p = x / s;
q = z / s;
r = Math.Sqrt(p * p + q * q);
p = p / r;
q = q / r;
// Row modification
for (int j = n - 1; j < nn; j++)
{
z = H[n - 1][j];
H[n - 1][j] = q * z + p * H[n][j];
H[n][j] = q * H[n][j] - p * z;
}
// Column modification
for (int i = 0; i <= n; i++)
{
z = H[i][n - 1];
H[i][n - 1] = q * z + p * H[i][n];
H[i][n] = q * H[i][n] - p * z;
}
// Accumulate transformations
for (int i = low; i <= high; i++)
{
z = V[i][n - 1];
V[i][n - 1] = q * z + p * V[i][n];
V[i][n] = q * V[i][n] - p * z;
}
// Complex pair
}
else
{
d[n - 1] = x + p;
d[n] = x + p;
e[n - 1] = z;
e[n] = -z;
}
n = n - 2;
iter = 0;
// No convergence yet
}
else
{
// Form shift
x = H[n][n];
y = 0.0;
w = 0.0;
if (l < n)
{
y = H[n - 1][n - 1];
w = H[n][n - 1] * H[n - 1][n];
}
// Wilkinson's original ad hoc shift
if (iter == 10)
{
exshift += x;
for (int i = low; i <= n; i++)
{
H[i][i] -= x;
}
s = Math.Abs(H[n][n - 1]) + Math.Abs(H[n - 1][n - 2]);
x = y = 0.75 * s;
w = -0.4375 * s * s;
}
// MATLAB's new ad hoc shift
if (iter == 30)
{
s = (y - x) / 2.0;
s = s * s + w;
if (s > 0)
{
s = Math.Sqrt(s);
if (y < x)
{
s = -s;
}
s = x - w / ((y - x) / 2.0 + s);
for (int i = low; i <= n; i++)
{
H[i][i] -= s;
}
exshift += s;
x = y = w = 0.964;
}
}
iter = iter + 1; // (Could check iteration count here.)
// Look for two consecutive small sub-diagonal elements
int m = n - 2;
while (m >= l)
{
z = H[m][m];
r = x - z;
s = y - z;
p = (r * s - w) / H[m + 1][m] + H[m][m + 1];
q = H[m + 1][m + 1] - z - r - s;
r = H[m + 2][m + 1];
s = Math.Abs(p) + Math.Abs(q) + Math.Abs(r);
p = p / s;
q = q / s;
r = r / s;
if (m == l)
{
break;
}
if (Math.Abs(H[m][m - 1]) * (Math.Abs(q) + Math.Abs(r)) <
eps * (Math.Abs(p) * (Math.Abs(H[m - 1][m - 1]) + Math.Abs(z) +
Math.Abs(H[m + 1][m + 1]))))
{
break;
}
m--;
}
for (int i = m + 2; i <= n; i++)
{
H[i][i - 2] = 0.0;
if (i > m + 2)
{
H[i][i - 3] = 0.0;
}
}
// Double QR step involving rows l:n and columns m:n
for (int k = m; k <= n - 1; k++)
{
Boolean notlast = (k != n - 1);
if (k != m)
{
p = H[k][k - 1];
q = H[k + 1][k - 1];
r = (notlast ? H[k + 2][k - 1] : 0.0);
x = Math.Abs(p) + Math.Abs(q) + Math.Abs(r);
if (x != 0.0)
{
p = p / x;
q = q / x;
r = r / x;
}
}
if (x == 0.0)
{
break;
}
s = Math.Sqrt(p * p + q * q + r * r);
if (p < 0)
{
s = -s;
}
if (s != 0)
{
if (k != m)
{
H[k][k - 1] = -s * x;
}
else if (l != m)
{
H[k][k - 1] = -H[k][k - 1];
}
p = p + s;
x = p / s;
y = q / s;
z = r / s;
q = q / p;
r = r / p;
// Row modification
for (int j = k; j < nn; j++)
{
p = H[k][j] + q * H[k + 1][j];
if (notlast)
{
p = p + r * H[k + 2][j];
H[k + 2][j] = H[k + 2][j] - p * z;
}
H[k][j] = H[k][j] - p * x;
H[k + 1][j] = H[k + 1][j] - p * y;
}
// Column modification
for (int i = 0; i <= Math.Min(n, k + 3); i++)
{
p = x * H[i][k] + y * H[i][k + 1];
if (notlast)
{
p = p + z * H[i][k + 2];
H[i][k + 2] = H[i][k + 2] - p * r;
}
H[i][k] = H[i][k] - p;
H[i][k + 1] = H[i][k + 1] - p * q;
}
// Accumulate transformations
for (int i = low; i <= high; i++)
{
p = x * V[i][k] + y * V[i][k + 1];
if (notlast)
{
p = p + z * V[i][k + 2];
V[i][k + 2] = V[i][k + 2] - p * r;
}
V[i][k] = V[i][k] - p;
V[i][k + 1] = V[i][k + 1] - p * q;
}
} // (s != 0)
} // k loop
} // check convergence
} // while (n >= low)
// Backsubstitute to find vectors of upper triangular form
if (norm == 0.0)
{
return;
}
for (n = nn - 1; n >= 0; n--)
{
p = d[n];
q = e[n];
// Real vector
if (q == 0)
{
int l = n;
H[n][n] = 1.0;
for (int i = n - 1; i >= 0; i--)
{
w = H[i][i] - p;
r = 0.0;
for (int j = l; j <= n; j++)
{
r = r + H[i][j] * H[j][n];
}
if (e[i] < 0.0)
{
z = w;
s = r;
}
else
{
l = i;
if (e[i] == 0.0)
{
if (w != 0.0)
{
H[i][n] = -r / w;
}
else
{
H[i][n] = -r / (eps * norm);
}
// Solve real equations
}
else
{
x = H[i][i + 1];
y = H[i + 1][i];
q = (d[i] - p) * (d[i] - p) + e[i] * e[i];
t = (x * s - z * r) / q;
H[i][n] = t;
if (Math.Abs(x) > Math.Abs(z))
{
H[i + 1][n] = (-r - w * t) / x;
}
else
{
H[i + 1][n] = (-s - y * t) / z;
}
}
// Overflow control
t = Math.Abs(H[i][n]);
if ((eps * t) * t > 1)
{
for (int j = i; j <= n; j++)
{
H[j][n] = H[j][n] / t;
}
}
}
}
// Complex vector
}
else if (q < 0)
{
int l = n - 1;
// Last vector component imaginary so matrix is triangular
if (Math.Abs(H[n][n - 1]) > Math.Abs(H[n - 1][n]))
{
H[n - 1][n - 1] = q / H[n][n - 1];
H[n - 1][n] = -(H[n][n] - p) / H[n][n - 1];
}
else
{
cdiv(0.0, -H[n - 1][n], H[n - 1][n - 1] - p, q);
H[n - 1][n - 1] = cdivr;
H[n - 1][n] = cdivi;
}
H[n][n - 1] = 0.0;
H[n][n] = 1.0;
for (int i = n - 2; i >= 0; i--)
{
double ra, sa, vr, vi;
ra = 0.0;
sa = 0.0;
for (int j = l; j <= n; j++)
{
ra = ra + H[i][j] * H[j][n - 1];
sa = sa + H[i][j] * H[j][n];
}
w = H[i][i] - p;
if (e[i] < 0.0)
{
z = w;
r = ra;
s = sa;
}
else
{
l = i;
if (e[i] == 0)
{
cdiv(-ra, -sa, w, q);
H[i][n - 1] = cdivr;
H[i][n] = cdivi;
}
else
{
// Solve complex equations
x = H[i][i + 1];
y = H[i + 1][i];
vr = (d[i] - p) * (d[i] - p) + e[i] * e[i] - q * q;
vi = (d[i] - p) * 2.0 * q;
if (vr == 0.0 & vi == 0.0)
{
vr = eps * norm * (Math.Abs(w) + Math.Abs(q) +
Math.Abs(x) + Math.Abs(y) + Math.Abs(z));
}
cdiv(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi);
H[i][n - 1] = cdivr;
H[i][n] = cdivi;
if (Math.Abs(x) > (Math.Abs(z) + Math.Abs(q)))
{
H[i + 1][n - 1] = (-ra - w * H[i][n - 1] + q * H[i][n]) / x;
H[i + 1][n] = (-sa - w * H[i][n] - q * H[i][n - 1]) / x;
}
else
{
cdiv(-r - y * H[i][n - 1], -s - y * H[i][n], z, q);
H[i + 1][n - 1] = cdivr;
H[i + 1][n] = cdivi;
}
}
// Overflow control
t = Math.Max(Math.Abs(H[i][n - 1]), Math.Abs(H[i][n]));
if ((eps * t) * t > 1)
{
for (int j = i; j <= n; j++)
{
H[j][n - 1] = H[j][n - 1] / t;
H[j][n] = H[j][n] / t;
}
}
}
}
}
}
// Vectors of isolated roots
for (int i = 0; i < nn; i++)
{
if (i < low | i > high)
{
for (int j = i; j < nn; j++)
{
V[i][j] = H[i][j];
}
}
}
// Back transformation to get eigenvectors of original matrix
for (int j = nn - 1; j >= low; j--)
{
for (int i = low; i <= high; i++)
{
z = 0.0;
for (int k = low; k <= Math.Min(j, high); k++)
{
z = z + V[i][k] * H[k][j];
}
V[i][j] = z;
}
}
}
/* ------------------------
Constructor
* ------------------------ */
/** Check for symmetry, then construct the eigenvalue decomposition
@param A Square matrix
@return Structure to access D and V.
*/
public EigenvalueDecomposition(double[][] Arg)
{
double[][] A = Arg;
n = Arg[0].Length;
V = new double[n][];
for (int i = 0; i < n; i++)
{
V[i] = new double[n];
}
d = new double[n];
e = new double[n];
issymmetric = true;
for (int j = 0; (j < n) & issymmetric; j++)
{
for (int i = 0; (i < n) & issymmetric; i++)
{
issymmetric = (A[i][j] == A[j][i]);
}
}
if (issymmetric)
{
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
V[i][j] = A[i][j];
}
}
// Tridiagonalize.
tred2();
// Diagonalize.
tql2();
}
else
{
H = new double[n][];
for (int i = 0; i < n; i++)
{
H[i] = new double[n];
}
ort = new double[n];
for (int j = 0; j < n; j++)
{
for (int i = 0; i < n; i++)
{
H[i][j] = A[i][j];
}
}
// Reduce to Hessenberg form.
orthes();
// Reduce Hessenberg to real Schur form.
hqr2();
}
}
/* ------------------------
Public Methods
* ------------------------ */
/** Return the eigenvector matrix
@return V
*/
public double[][] getV()
{
return V;
}
/** Return the real parts of the eigenvalues
@return real(diag(D))
*/
public double[] getRealEigenvalues()
{
return d;
}
/** Return the imaginary parts of the eigenvalues
@return imag(diag(D))
*/
public double[] getImagEigenvalues()
{
return e;
}
/** Return the block diagonal eigenvalue matrix
@return D
*/
public double[][] getD()
{
double[][] X = new double[n][];//[](n, n);
for (int i = 0; i < n; i++)
{
X[i] = new double[n];
}
double[][] D = X;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
D[i][j] = 0.0;
}
D[i][i] = d[i];
if (e[i] > 0)
{
D[i][i + 1] = e[i];
}
else if (e[i] < 0)
{
D[i][i - 1] = e[i];
}
}
return X;
}
}
public static class MathTools
{
/** sqrt(a^2 + b^2) without under/overflow. **/
public static double hypot(double a, double b)
{
double r;
if (Math.Abs(a) > Math.Abs(b))
{
r = b / a;
r = Math.Abs(a) * Math.Sqrt(1 + r * r);
}
else if (b != 0)
{
r = a / b;
r = Math.Abs(b) * Math.Sqrt(1 + r * r);
}
else
{
r = 0.0;
}
return r;
}
}
#endregion
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment