Created
March 31, 2024 05:44
-
-
Save bfpill/0ec6c686f2fabaf19c0edc0b27c69c69 to your computer and use it in GitHub Desktop.
Smith Waterman affine local sequence alignment on DNA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def align(s1, s2, gap_penalty, match_score = 1, mismatch_penalty = -1): | |
m, n = len(s1), len(s2) | |
H = [[0] * (n + 1) for _ in range(m + 1)] | |
max_score = 0 | |
max_pos = (0, 0) | |
for i in range(1, len(s1) + 1): | |
for j in range(1, len(s2) + 1): | |
match = H[i-1][j-1] + (match_score if s1[i-1] == s2[j-1] | |
else mismatch_penalty) | |
delete_gap_length = 0 | |
insert_gap_length = 0 | |
k = i - 1 | |
# trace backwards up and down and count | |
while k > 0 and H[k][j] == H[k-1][j] + gap_penalty(1): | |
delete_gap_length += 1 | |
k -= 1 | |
k = j - 1 | |
while k > 0 and H[i][k] == H[i][k-1] + gap_penalty(1): | |
insert_gap_length += 1 | |
k -= 1 | |
delete = H[i-1][j] - gap_penalty(delete_gap_length + 1) | |
insert = H[i][j-1] - gap_penalty(insert_gap_length + 1) | |
score = max(match, insert, delete, 0) | |
if score > max_score: | |
max_score = score | |
max_pos = (i, j) | |
H[i][j] = score | |
align1 = "" | |
align2 = "" | |
i, j = max_pos | |
for row in H: | |
print(row) | |
while H[i][j] != 0: | |
if H[i][j] == H[i-1][j-1] + (match_score if s1[i-1] == s2[j-1] | |
else mismatch_penalty): | |
align1 = s1[i-1] + align1 | |
align2 = s2[j-1] + align2 | |
i -= 1 | |
j -= 1 | |
# | |
elif H[i][j] == H[i-1][j] + gap_penalty(1): | |
align1 = s1[i-1] + align1 | |
align2 = "-" + align2 | |
i -= 1 | |
else: | |
align1 = "-" + align1 | |
align2 = s2[j-1] + align2 | |
j -= 1 | |
print(align1) | |
print(align2) | |
s2 = "ATGCATGCATCGATGCTAGCTAGCATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGGTCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG" | |
s2 = "ATGCATGCATCGATGCTAGCTAGCATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGGTCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG" | |
def affine_gap_penalty(gap_length, extension_penalty=1, opening_penalty=2): | |
return opening_penalty + (extension_penalty * gap_length) | |
align(s1[::-1], s2, affine_gap_penalty) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment