Created
February 17, 2024 12:41
-
-
Save piplus2/a93e1c8c82b4c80ba274ea5c61c3df30 to your computer and use it in GitHub Desktop.
Needleman Wunsch sequence alignmnet with affine gap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*-coding:utf-8 -*- | |
""" | |
@File : needleman-wunsch.py | |
@Author : Paolo Inglese | |
@Version : 1.0 | |
@Contact : p.inglese14@imperial.ac.uk | |
@Desc : Needleman-Wunsch algorithm for global sequence alignment with affine gap penalties | |
""" | |
from math import inf | |
def needleman_wunsch(seq1, seq2, match=1, mismatch=-1, gap_open_penalty=-5, gap_extend_penalty=-2): | |
m, n = len(seq1), len(seq2) | |
# Initialise matrices | |
M = [[0] * (n + 1) for _ in range(m + 1)] | |
Ix = [[0] * (n + 1) for _ in range(m + 1)] | |
Iy = [[0] * (n + 1) for _ in range(m + 1)] | |
# Initialise first row and column of matrices | |
for i in range(1, m + 1): | |
Ix[i][0] = gap_open_penalty + gap_extend_penalty * (i - 1) | |
M[i][0] = gap_open_penalty + gap_extend_penalty * (i - 1) | |
for j in range(1, n + 1): | |
Iy[i][j] = -inf | |
for j in range(1, n + 1): | |
for i in range(1, m + 1): | |
Ix[i][j] = -inf | |
Iy[0][j] = gap_open_penalty + gap_extend_penalty * (j - 1) | |
M[0][j] = gap_open_penalty + gap_extend_penalty * (j - 1) | |
# Fill in matrices | |
for i in range(1, m + 1): | |
for j in range(1, n + 1): | |
if seq1[i - 1] == seq2[j - 1]: | |
score = match | |
else: | |
score = mismatch | |
Ix[i][j] = max(M[i - 1][j] + gap_open_penalty + gap_extend_penalty, Ix[i - 1][j] + gap_extend_penalty) | |
Iy[i][j] = max(M[i][j - 1] + gap_open_penalty + gap_extend_penalty, Iy[i][j - 1] + gap_extend_penalty) | |
M[i][j] = max(M[i - 1][j - 1] + score, Ix[i][j], Iy[i][j]) | |
# Traceback | |
align1, align2 = "", "" | |
i, j = m, n | |
while i > 0 or j > 0: | |
current_score = M[i][j] | |
diag_score = M[i - 1][j - 1] | |
if ( | |
i > 0 | |
and j > 0 | |
and current_score | |
== diag_score + (seq1[i - 1] == seq2[j - 1]) * match + (seq1[i - 1] != seq2[j - 1]) * mismatch | |
): | |
align1 = seq1[i - 1] + align1 | |
align2 = seq2[j - 1] + align2 | |
i -= 1 | |
j -= 1 | |
elif i > 0 and current_score == Ix[i][j]: | |
align1 = seq1[i - 1] + align1 | |
align2 = "-" + align2 | |
i -= 1 | |
else: | |
align1 = "-" + align1 | |
align2 = seq2[j - 1] + align2 | |
j -= 1 | |
return align1, align2 | |
if __name__ == "__main__": | |
seq1 = "GAAAAAAAT" | |
seq2 = "GTAAAT" | |
align1, align2 = needleman_wunsch(seq1, seq2) | |
print("Sequence 1:", align1) | |
print("Sequence 2:", align2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment