Skip to content

Instantly share code, notes, and snippets.

@ggirelli
Last active October 14, 2021 14:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ggirelli/5bf8a9a1bf51d90eb07aa6596b6c6ecc to your computer and use it in GitHub Desktop.
Save ggirelli/5bf8a9a1bf51d90eb07aa6596b6c6ecc to your computer and use it in GitHub Desktop.
Align strings (brute force)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from typing import Tuple
BEST_ALIGNMENT = Tuple[int, slice, slice]
def best_alignment_visualized(seq1: str, seq2: str, verbose: bool = True) -> BEST_ALIGNMENT:
best_match: BEST_ALIGNMENT = (0, slice(0), slice(0))
if verbose:
print(f"A: {seq1}\nB: {seq2}\n")
for i in range(1, len(seq1)+1):
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1)
B = slice(0, min(len(seq2), i), 1)
match_string = ""
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
match_string += "|"
else:
match_string += " "
if verbose:
spacer = ' '*(len(seq1)-i+len(seq2)-1)
print(f"Matches: {match_count}")
print(f"1: {' '*(len(seq2)-1)}{seq1}")
print(f"A: {spacer}{seq1[A]} {A}")
print(f"M: {spacer}{match_string}")
print(f"B: {spacer}{seq2[B]} {B}")
print(f"2: {spacer}{seq2}\n")
if match_count > best_match[0]:
best_match = (match_count, A, B)
for i in range(1, len(seq2))[::-1]:
A = slice(0, min(len(seq1), i), 1)
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1)
match_string = ""
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
match_string += "|"
else:
match_string += " "
if verbose:
spacer = ' '*(len(seq2)-1)
print(f"Matches: {match_count}")
print(f"1: {' '*(len(seq2)-1)}{seq1}")
print(f"A: {spacer}{seq1[A]} {A}")
print(f"M: {spacer}{match_string}")
print(f"B: {spacer}{seq2[B]} {B}")
print(f"2: {' '*(i-1)}{seq2}\n")
if match_count > best_match[0]:
best_match = (match_count, A, B)
return best_match
def best_alignment(seq1: str, seq2: str) -> BEST_ALIGNMENT:
best_match: BEST_ALIGNMENT = (0, slice(0), slice(0))
for i in range(1, len(seq1)+1):
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1)
B = slice(0, min(len(seq2), i), 1)
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
if match_count > best_match[0]:
best_match = (match_count, A, B)
for i in range(1, len(seq2))[::-1]:
A = slice(0, min(len(seq1), i), 1)
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1)
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
if match_count > best_match[0]:
best_match = (match_count, A, B)
return best_match
from collections import defaultdict
from typing import DefaultDict, Tuple
def count_alignment_visualized(seq1: str, seq2: str, verbose: bool = True) -> DefaultDict[int, int]:
alignment_counts: DefaultDict[int, int] = defaultdict(lambda: 0)
if verbose:
print(f"A: {seq1}\nB: {seq2}\n")
for i in range(1, len(seq1)+1):
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1)
B = slice(0, min(len(seq2), i), 1)
match_string = ""
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
match_string += "|"
else:
match_string += " "
if verbose:
spacer = ' '*(len(seq1)-i+len(seq2)-1)
print(f"Matches: {match_count}")
print(f"1: {' '*(len(seq2)-1)}{seq1}")
print(f"A: {spacer}{seq1[A]} {A}")
print(f"M: {spacer}{match_string}")
print(f"B: {spacer}{seq2[B]} {B}")
print(f"2: {spacer}{seq2}\n")
alignment_counts[match_count] += 1
for i in range(1, len(seq2))[::-1]:
A = slice(0, min(len(seq1), i), 1)
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1)
match_string = ""
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
match_string += "|"
else:
match_string += " "
if verbose:
spacer = ' '*(len(seq2)-1)
print(f"Matches: {match_count}")
print(f"1: {' '*(len(seq2)-1)}{seq1}")
print(f"A: {spacer}{seq1[A]} {A}")
print(f"M: {spacer}{match_string}")
print(f"B: {spacer}{seq2[B]} {B}")
print(f"2: {' '*(i-1)}{seq2}\n")
alignment_counts[match_count] += 1
return alignment_counts
def count_alignment(seq1: str, seq2: str) -> DefaultDict[int, int]:
alignment_counts: DefaultDict[int, int] = defaultdict(lambda: 0)
for i in range(1, len(seq1)+1):
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1)
B = slice(0, min(len(seq2), i), 1)
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
alignment_counts[match_count] += 1
for i in range(1, len(seq2))[::-1]:
A = slice(0, min(len(seq1), i), 1)
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1)
match_count = 0
for j in range(len(seq1[A])):
if seq1[A][j] == seq2[B][j]:
match_count += 1
alignment_counts[match_count] += 1
return alignment_counts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment