Last active
October 14, 2021 14:52
-
-
Save ggirelli/5bf8a9a1bf51d90eb07aa6596b6c6ecc to your computer and use it in GitHub Desktop.
Align strings (brute force)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Tuple | |
BEST_ALIGNMENT = Tuple[int, slice, slice] | |
def best_alignment_visualized(seq1: str, seq2: str, verbose: bool = True) -> BEST_ALIGNMENT: | |
best_match: BEST_ALIGNMENT = (0, slice(0), slice(0)) | |
if verbose: | |
print(f"A: {seq1}\nB: {seq2}\n") | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq1)-i+len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {spacer}{seq2}\n") | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {' '*(i-1)}{seq2}\n") | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
return best_match | |
def best_alignment(seq1: str, seq2: str) -> BEST_ALIGNMENT: | |
best_match: BEST_ALIGNMENT = (0, slice(0), slice(0)) | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
return best_match |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from typing import DefaultDict, Tuple | |
def count_alignment_visualized(seq1: str, seq2: str, verbose: bool = True) -> DefaultDict[int, int]: | |
alignment_counts: DefaultDict[int, int] = defaultdict(lambda: 0) | |
if verbose: | |
print(f"A: {seq1}\nB: {seq2}\n") | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq1)-i+len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {spacer}{seq2}\n") | |
alignment_counts[match_count] += 1 | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {' '*(i-1)}{seq2}\n") | |
alignment_counts[match_count] += 1 | |
return alignment_counts | |
def count_alignment(seq1: str, seq2: str) -> DefaultDict[int, int]: | |
alignment_counts: DefaultDict[int, int] = defaultdict(lambda: 0) | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
alignment_counts[match_count] += 1 | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
alignment_counts[match_count] += 1 | |
return alignment_counts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment