Last active
October 14, 2021 14:52
-
-
Save ggirelli/5bf8a9a1bf51d90eb07aa6596b6c6ecc to your computer and use it in GitHub Desktop.
Align strings (brute force)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from best_alignment import *\n", | |
"from typing import Tuple" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"A: GCGCGACCTAGCCCGTACTGT\n", | |
"B: GAGTTGACC\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: T slice(20, 21, 1)\n", | |
"M: \n", | |
"B: G slice(0, 1, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GT slice(19, 21, 1)\n", | |
"M: | \n", | |
"B: GA slice(0, 2, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: TGT slice(18, 21, 1)\n", | |
"M: \n", | |
"B: GAG slice(0, 3, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CTGT slice(17, 21, 1)\n", | |
"M: ||\n", | |
"B: GAGT slice(0, 4, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: ACTGT slice(16, 21, 1)\n", | |
"M: |\n", | |
"B: GAGTT slice(0, 5, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: TACTGT slice(15, 21, 1)\n", | |
"M: | | \n", | |
"B: GAGTTG slice(0, 6, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 3\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GTACTGT slice(14, 21, 1)\n", | |
"M: | || \n", | |
"B: GAGTTGA slice(0, 7, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CGTACTGT slice(13, 21, 1)\n", | |
"M: \n", | |
"B: GAGTTGAC slice(0, 8, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CCGTACTGT slice(12, 21, 1)\n", | |
"M: || \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CCCGTACTG slice(11, 20, 1)\n", | |
"M: | \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 3\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCCCGTACT slice(10, 19, 1)\n", | |
"M: | || \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: AGCCCGTAC slice(9, 18, 1)\n", | |
"M: | |\n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: TAGCCCGTA slice(8, 17, 1)\n", | |
"M: || \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CTAGCCCGT slice(7, 16, 1)\n", | |
"M: \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CCTAGCCCG slice(6, 15, 1)\n", | |
"M: | \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 4\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: ACCTAGCCC slice(5, 14, 1)\n", | |
"M: | | ||\n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 5\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GACCTAGCC slice(4, 13, 1)\n", | |
"M: || | ||\n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CGACCTAGC slice(3, 12, 1)\n", | |
"M: | |\n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCGACCTAG slice(2, 11, 1)\n", | |
"M: | | \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: CGCGACCTA slice(1, 10, 1)\n", | |
"M: \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 3\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCGCGACCT slice(0, 9, 1)\n", | |
"M: | | | \n", | |
"B: GAGTTGACC slice(0, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 4\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCGCGACC slice(0, 8, 1)\n", | |
"M: ||||\n", | |
"B: AGTTGACC slice(1, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCGCGAC slice(0, 7, 1)\n", | |
"M: | |\n", | |
"B: GTTGACC slice(2, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCGCGA slice(0, 6, 1)\n", | |
"M: | \n", | |
"B: TTGACC slice(3, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCGCG slice(0, 5, 1)\n", | |
"M: | \n", | |
"B: TGACC slice(4, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCGC slice(0, 4, 1)\n", | |
"M: | |\n", | |
"B: GACC slice(5, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GCG slice(0, 3, 1)\n", | |
"M: | \n", | |
"B: ACC slice(6, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: GC slice(0, 2, 1)\n", | |
"M: |\n", | |
"B: CC slice(7, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GCGCGACCTAGCCCGTACTGT\n", | |
"A: G slice(0, 1, 1)\n", | |
"M: \n", | |
"B: C slice(8, 9, 1)\n", | |
"2: GAGTTGACC\n", | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"(5, slice(4, 13, 1), slice(0, 9, 1))" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"best_alignment_visualized('GCGCGACCTAGCCCGTACTGT', 'GAGTTGACC')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"A: GAGTTGACC\n", | |
"B: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GAGTTGACC\n", | |
"A: C slice(8, 9, 1)\n", | |
"M: \n", | |
"B: G slice(0, 1, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: CC slice(7, 9, 1)\n", | |
"M: |\n", | |
"B: GC slice(0, 2, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: ACC slice(6, 9, 1)\n", | |
"M: | \n", | |
"B: GCG slice(0, 3, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GACC slice(5, 9, 1)\n", | |
"M: | |\n", | |
"B: GCGC slice(0, 4, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: TGACC slice(4, 9, 1)\n", | |
"M: | \n", | |
"B: GCGCG slice(0, 5, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: TTGACC slice(3, 9, 1)\n", | |
"M: | \n", | |
"B: GCGCGA slice(0, 6, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GTTGACC slice(2, 9, 1)\n", | |
"M: | |\n", | |
"B: GCGCGAC slice(0, 7, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 4\n", | |
"1: GAGTTGACC\n", | |
"A: AGTTGACC slice(1, 9, 1)\n", | |
"M: ||||\n", | |
"B: GCGCGACC slice(0, 8, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 3\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | | | \n", | |
"B: GCGCGACCT slice(0, 9, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: \n", | |
"B: CGCGACCTA slice(1, 10, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | | \n", | |
"B: GCGACCTAG slice(2, 11, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | |\n", | |
"B: CGACCTAGC slice(3, 12, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 5\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: || | ||\n", | |
"B: GACCTAGCC slice(4, 13, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 4\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | | ||\n", | |
"B: ACCTAGCCC slice(5, 14, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | \n", | |
"B: CCTAGCCCG slice(6, 15, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: \n", | |
"B: CTAGCCCGT slice(7, 16, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: || \n", | |
"B: TAGCCCGTA slice(8, 17, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | |\n", | |
"B: AGCCCGTAC slice(9, 18, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 3\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | || \n", | |
"B: GCCCGTACT slice(10, 19, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: | \n", | |
"B: CCCGTACTG slice(11, 20, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGACC slice(0, 9, 1)\n", | |
"M: || \n", | |
"B: CCGTACTGT slice(12, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGAC slice(0, 8, 1)\n", | |
"M: \n", | |
"B: CGTACTGT slice(13, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 3\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTGA slice(0, 7, 1)\n", | |
"M: | || \n", | |
"B: GTACTGT slice(14, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTTG slice(0, 6, 1)\n", | |
"M: | | \n", | |
"B: TACTGT slice(15, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: GAGTT slice(0, 5, 1)\n", | |
"M: |\n", | |
"B: ACTGT slice(16, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 2\n", | |
"1: GAGTTGACC\n", | |
"A: GAGT slice(0, 4, 1)\n", | |
"M: ||\n", | |
"B: CTGT slice(17, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GAGTTGACC\n", | |
"A: GAG slice(0, 3, 1)\n", | |
"M: \n", | |
"B: TGT slice(18, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 1\n", | |
"1: GAGTTGACC\n", | |
"A: GA slice(0, 2, 1)\n", | |
"M: | \n", | |
"B: GT slice(19, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n", | |
"Matches: 0\n", | |
"1: GAGTTGACC\n", | |
"A: G slice(0, 1, 1)\n", | |
"M: \n", | |
"B: T slice(20, 21, 1)\n", | |
"2: GCGCGACCTAGCCCGTACTGT\n", | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"(5, slice(0, 9, 1), slice(4, 13, 1))" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"best_alignment_visualized('GAGTTGACC', 'GCGCGACCTAGCCCGTACTGT')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(5, slice(0, 9, 1), slice(4, 13, 1))" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"best_alignment('GAGTTGACC', 'GCGCGACCTAGCCCGTACTGT')" | |
] | |
} | |
], | |
"metadata": { | |
"interpreter": { | |
"hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3.8.10 64-bit", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Tuple | |
BEST_ALIGNMENT = Tuple[int, slice, slice] | |
def best_alignment_visualized(seq1: str, seq2: str, verbose: bool = True) -> BEST_ALIGNMENT: | |
best_match: BEST_ALIGNMENT = (0, slice(0), slice(0)) | |
if verbose: | |
print(f"A: {seq1}\nB: {seq2}\n") | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq1)-i+len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {spacer}{seq2}\n") | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {' '*(i-1)}{seq2}\n") | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
return best_match | |
def best_alignment(seq1: str, seq2: str) -> BEST_ALIGNMENT: | |
best_match: BEST_ALIGNMENT = (0, slice(0), slice(0)) | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
if match_count > best_match[0]: | |
best_match = (match_count, A, B) | |
return best_match |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from typing import DefaultDict, Tuple | |
def count_alignment_visualized(seq1: str, seq2: str, verbose: bool = True) -> DefaultDict[int, int]: | |
alignment_counts: DefaultDict[int, int] = defaultdict(lambda: 0) | |
if verbose: | |
print(f"A: {seq1}\nB: {seq2}\n") | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq1)-i+len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {spacer}{seq2}\n") | |
alignment_counts[match_count] += 1 | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_string = "" | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
match_string += "|" | |
else: | |
match_string += " " | |
if verbose: | |
spacer = ' '*(len(seq2)-1) | |
print(f"Matches: {match_count}") | |
print(f"1: {' '*(len(seq2)-1)}{seq1}") | |
print(f"A: {spacer}{seq1[A]} {A}") | |
print(f"M: {spacer}{match_string}") | |
print(f"B: {spacer}{seq2[B]} {B}") | |
print(f"2: {' '*(i-1)}{seq2}\n") | |
alignment_counts[match_count] += 1 | |
return alignment_counts | |
def count_alignment(seq1: str, seq2: str) -> DefaultDict[int, int]: | |
alignment_counts: DefaultDict[int, int] = defaultdict(lambda: 0) | |
for i in range(1, len(seq1)+1): | |
A = slice(len(seq1)-i, min(len(seq1), len(seq1)-i+len(seq2)), 1) | |
B = slice(0, min(len(seq2), i), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
alignment_counts[match_count] += 1 | |
for i in range(1, len(seq2))[::-1]: | |
A = slice(0, min(len(seq1), i), 1) | |
B = slice(len(seq2)-i, min(len(seq2), len(seq2)-i+len(seq1)), 1) | |
match_count = 0 | |
for j in range(len(seq1[A])): | |
if seq1[A][j] == seq2[B][j]: | |
match_count += 1 | |
alignment_counts[match_count] += 1 | |
return alignment_counts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment