Created
October 5, 2010 19:30
-
-
Save NOLFXceptMe/612170 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<algorithm> | |
#include<iostream> | |
#include<string> | |
#include<utility> | |
#include<cmath> | |
using namespace std; | |
string BlockSeqAlignment(string seqA, string seqB); | |
pair<string, string> NeedlemanWunsch(string seqA, string seqB); | |
int calcScoreLinear(string seqA, string seqB); | |
const int A = 0, G = 1, C = 2, T = 3; | |
size_t convToIndex['T'+1], d, S[4][4]; | |
char convFromIndex[4]; | |
int main() | |
{ | |
convToIndex['A'] = A, convFromIndex[0] = 'A'; | |
convToIndex['G'] = G, convFromIndex[1] = 'G'; | |
convToIndex['C'] = C, convFromIndex[2] = 'C'; | |
convToIndex['T'] = T, convFromIndex[3] = 'T'; | |
string seqA, seqB, seqAlignment; | |
cout<<"Enter the similarity matrix: "<<endl; | |
for(int i=0; i<4; ++i) | |
for(int j=0; j<4; ++j) | |
cin>>S[i][j]; | |
cout<<"Enter the gap penalty: "<<endl; | |
cin>>d; | |
cout<<"Enter the sequence A: "<<endl; | |
cin>>seqA; | |
cout<<"Enter the sequence B: "<<endl; | |
cin>>seqB; | |
seqAlignment = BlockSeqAlignment(seqA, seqB); | |
cout<<"The alignment is: "<<seqAlignment; | |
return 0; | |
} | |
void calcIntToStr(string IntToStrArray[], size_t p, size_t t) | |
{ | |
string str; | |
size_t k; | |
for(size_t i=0; i<p; ++i){ | |
str = ""; | |
k = i; | |
for(size_t j=0; j<t; ++j){ | |
str = convFromIndex[k%4] + str; | |
k = k/4; | |
} | |
IntToStrArray[i] = str; | |
} | |
} | |
size_t calcStrToInt(string str) | |
{ | |
size_t value = 0; | |
for(size_t i=0; i<str.length(); ++i){ | |
value = value*4 + convToIndex[(size_t)str[i]]; | |
} | |
return value; | |
} | |
string BlockSeqAlignment(string seqA, string seqB) | |
{ | |
size_t n = seqB.length(); | |
size_t t = (size_t)(log2(n)/4); | |
size_t a = n/t; | |
string alignmentA = "", alignmentB = "", seqAlignment; | |
pair<string, string> subAlign; | |
int params[3]; | |
size_t p = (size_t)pow(4, t); | |
string aux(t, '-'); | |
int SS, SDiag, SUp, SLeft; | |
int **Beta = new int*[p], **Score = new int*[a]; | |
string *IntToStrArray = new string[p]; | |
for(size_t i=0; i<=a; ++i) | |
Beta[i] = new int[p], Score[i] = new int[a]; | |
calcIntToStr(IntToStrArray, p, t); | |
for(size_t i=0; i<p; ++i){ | |
for(size_t j=0; j<p; ++j){ | |
Beta[i][j] = calcScoreLinear(IntToStrArray[i], IntToStrArray[j]); | |
} | |
} | |
for(size_t i=0; i<=a; ++i) | |
Score[i][0] = i*t*d; | |
for(size_t j=0; j<=a; ++j) | |
Score[0][j] = j*t*d; | |
/* Compute scores */ | |
for(size_t i=1; i<=a; ++i){ | |
for(size_t j=1; j<=a; ++j){ | |
params[0] = Score[i-1][j] + t*d; | |
params[1] = Score[i][j-1] + t*d; | |
params[2] = Score[i-1][j-1] + Beta[calcStrToInt(seqA.substr((i-1)*t, t))][calcStrToInt(seqB.substr((j-1)*t, t))]; | |
Score[i][j] = *(max_element(params, params+3)); | |
} | |
} | |
size_t i=a, j=a; | |
/* Calculate the sequences */ | |
while(i>0 && j>0){ | |
SS = Score[i][j]; | |
SDiag = Score[i-1][j-1]; | |
SUp = Score[i-1][j]; | |
SLeft = Score[i][j-1]; | |
if(SS == SDiag + Beta[calcStrToInt(seqA.substr((i-1)*t, t))][calcStrToInt(seqB.substr((j-1)*t, t))]){ | |
subAlign = NeedlemanWunsch(seqA.substr((i-1)*t, t), seqB.substr((j-1)*t, t)); | |
alignmentA = subAlign.first + alignmentA; | |
alignmentB = subAlign.second + alignmentB; | |
--i, --j; | |
} else if(SS == SUp + t*d){ | |
alignmentA = seqA.substr((i-1)*t, t) + alignmentA; | |
alignmentB = aux + alignmentB; | |
--i; | |
} else if(SS == SLeft + t*d){ | |
alignmentA = aux + alignmentA; | |
alignmentB = seqB.substr((j-1)*t, t) + alignmentB; | |
--j; | |
} else { | |
} | |
} | |
while(i>0){ | |
alignmentA = seqA.substr((i-1)*t, t) + alignmentA; | |
alignmentB = aux + alignmentB; | |
--i; | |
} | |
while(j>0){ | |
alignmentA = aux + alignmentA; | |
alignmentB = seqB.substr((j-1)*t, t) + alignmentB; | |
--j; | |
} | |
//cout<<"Overall score: "<<Score[a][b]<<endl; | |
return "\n"+alignmentA+"\n"+alignmentB+"\n"; | |
} | |
/* Calculate partial alignment using Needleman-Wunsch */ | |
pair<string, string> NeedlemanWunsch(string seqA, string seqB) | |
{ | |
size_t m = seqA.length(); | |
size_t n = seqB.length(); | |
int params[3]; | |
string alignmentA = "", alignmentB = ""; | |
int **F = new int*[m+1]; | |
for(size_t i=0; i<=m; ++i) | |
F[i] = new int[n+1]; | |
for(size_t i=0; i<=m; ++i) | |
F[i][0] = d*i; | |
for(size_t j=0; j<=n; ++j) | |
F[0][j] = d*j; | |
//Compute Fij | |
for(size_t i=1; i<=m; ++i) | |
for(size_t j=1; j<=n; ++j){ | |
params[0] = F[i-1][j-1] + S[convToIndex[(size_t) seqA[i-1]]][convToIndex[(size_t) seqB[j-1]]]; | |
params[1] = F[i-1][j] + d; | |
params[2] = F[i][j-1] + d; | |
F[i][j] = *(max_element(params, params+3)); | |
} | |
//Done computing Fij | |
int i=m, j=n; | |
int Score, ScoreDiag, ScoreUp, ScoreLeft; | |
while(i>0 && j>0){ | |
Score = F[i][j]; | |
ScoreDiag = F[i-1][j-1]; | |
ScoreUp = F[i][j-1]; | |
ScoreLeft= F[i-1][j]; | |
if(Score == ScoreDiag + S[convToIndex[(size_t) seqA[i-1]]][convToIndex[(size_t) seqB[j-1]]]){ | |
alignmentA = seqA[i-1]+ alignmentA; | |
alignmentB = seqB[j-1] + alignmentB; | |
i--, j--; | |
} else if(Score == ScoreLeft + d) { | |
alignmentA = seqA[i-1] + alignmentA; | |
alignmentB = "-" + alignmentB; | |
i--; | |
} else if(Score == ScoreUp + d) { | |
alignmentA = "-" + alignmentA; | |
alignmentB = seqB[j-1] + alignmentB; | |
j--; | |
} else { | |
} | |
} | |
while(i>0){ | |
alignmentA = seqA[i-1] + alignmentA; | |
alignmentB = "-" + alignmentB; | |
i--; | |
} | |
while(j>0){ | |
alignmentA = "-" + alignmentA; | |
alignmentB = seqB[j-1] + alignmentB; | |
j--; | |
} | |
return make_pair(alignmentA, alignmentB); | |
} | |
/* Calculate score using linear memory space */ | |
int calcScoreLinear(string seqA, string seqB) | |
{ | |
size_t m = seqA.length(); | |
size_t n = seqB.length(); | |
int params[3]; | |
string alignmentA = "", alignmentB = "", seqAlignment; | |
int auxF; | |
int *F = new int[n+1]; | |
for(size_t j=0; j<=n; ++j) | |
F[j] = d*j; | |
//Compute Fj | |
for(size_t i=1; i<=m; ++i){ //ith iteration | |
auxF = F[0]; | |
F[0] = d*i; | |
for(size_t j=1; j<=n; ++j){ | |
params[0] = auxF + S[convToIndex[(size_t) seqA[i-1]]][convToIndex[(size_t) seqB[j-1]]]; | |
params[1] = F[j] + d; | |
params[2] = F[j-1] + d; | |
auxF = F[j]; | |
F[j] = *(max_element(params, params+3)); | |
} | |
} | |
//Done computing Fj | |
return F[n]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Input for the above program:
10 -1 -3 -4
-1 7 -5 -3
-3 -5 9 0
-4 -3 0 8
-5
AGACTAGTTACACGTT
CGAGACGTCCCACGTT