Skip to content

Instantly share code, notes, and snippets.

@tdcosta100
Last active August 21, 2019 15:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tdcosta100/3cd0c3298c072815568edbf9d74166b5 to your computer and use it in GitHub Desktop.
Save tdcosta100/3cd0c3298c072815568edbf9d74166b5 to your computer and use it in GitHub Desktop.
Needleman-Wunsch algorithm
using System;
using System.Collections.Generic;
using System.Linq;
namespace Algorithms
{
public class NeedlemanWunsch
{
private enum CorrespondenceType
{
None = 0,
Substitution = 1,
Deletion = 2,
Insertion = 3
}
public static void AlignSequences<TSeq1, TSeq2>(TSeq1[] sequence1, TSeq2[] sequence2, TSeq1 gapValueSequence1, TSeq2 gapValueSequence2, Func<TSeq1, TSeq2, bool> sequenceComparer, out List<TSeq1> sequence1Alignment, out List<TSeq2> sequence2Alignment)
{
var mismatchCost = -1;
var insertionDeletionCost = -1;
var matchCost = 1;
var costMatrix = new int[sequence1.Length + 1, sequence2.Length + 1];
var correspondenceTypeMatrix = new CorrespondenceType[sequence1.Length + 1, sequence2.Length + 1];
for (int i = 0; i <= sequence1.Length; i++)
{
costMatrix[i, 0] = i * insertionDeletionCost;
correspondenceTypeMatrix[i, 0] = CorrespondenceType.None;
}
for (int j = 0; j <= sequence2.Length; j++)
{
costMatrix[0, j] = j * insertionDeletionCost;
correspondenceTypeMatrix[0, j] = CorrespondenceType.None;
}
for (int i = 1; i < sequence1.Length + 1; i++)
{
for (int j = 1; j < sequence2.Length + 1; j++)
{
var costCorrespondenceType =
new[]
{
(
cost: costMatrix[i - 1, j - 1] + (sequenceComparer(sequence1[i - 1], sequence2[j - 1]) ? matchCost : mismatchCost),
correspondenceType: CorrespondenceType.Substitution
),
(
cost: costMatrix[i - 1, j] + insertionDeletionCost,
correspondenceType: CorrespondenceType.Deletion
),
(
cost: costMatrix[i, j - 1] + insertionDeletionCost,
correspondenceType: CorrespondenceType.Insertion
)
}
.OrderByDescending(c => c.cost)
.ThenBy(c => (int)c.correspondenceType)
.First();
costMatrix[i, j] = costCorrespondenceType.cost;
correspondenceTypeMatrix[i, j] = costCorrespondenceType.correspondenceType;
}
}
var iResult = sequence1.Length;
var jResult = sequence2.Length;
sequence1Alignment = new List<TSeq1>();
sequence2Alignment = new List<TSeq2>();
while (iResult > 0 || jResult > 0)
{
switch (correspondenceTypeMatrix[iResult, jResult])
{
case CorrespondenceType.None:
if (iResult > 0)
{
sequence1Alignment.Insert(0, sequence1[--iResult]);
sequence2Alignment.Insert(0, gapValueSequence2);
}
else if (jResult > 0)
{
sequence1Alignment.Insert(0, gapValueSequence1);
sequence2Alignment.Insert(0, sequence2[--jResult]);
}
break;
case CorrespondenceType.Substitution:
if (iResult > 0 && jResult > 0)
{
sequence1Alignment.Insert(0, sequence1[--iResult]);
sequence2Alignment.Insert(0, sequence2[--jResult]);
}
break;
case CorrespondenceType.Deletion:
if (iResult > 0)
{
sequence1Alignment.Insert(0, sequence1[--iResult]);
sequence2Alignment.Insert(0, gapValueSequence2);
}
break;
case CorrespondenceType.Insertion:
if (jResult > 0)
{
sequence1Alignment.Insert(0, gapValueSequence1);
sequence2Alignment.Insert(0, sequence2[--jResult]);
}
break;
default:
break;
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment