Created
February 9, 2022 18:57
-
-
Save nimaa77/8a5f55b5bbbfeca9c95ea74c00909713 to your computer and use it in GitHub Desktop.
find motifs in DNA sequences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
input_strings = [ | |
"AATATCCTGTATAGATATACCTTGCCCTGCACCTAATTCTTCGAGCATCGATAGACGTGCAACTCGTTCATAGGCGCCTGAGGACAAGTCTTCAATCTGATCACTTCTCTCAGGCGTGTAAAAGCGCCTTTAGCTTGCAATGCCGAAGACCTAGGTCCTTAGATAGAAGTGGGGTAACCACACAATAGAAGTGAGTGTACTCTGAAATCGCTCCTTCCAGTAGGTGGCCGTGCATAAGGTTGGTGTCGACGCGAGTGAATTCGCATAAAAACCGTCAGCAATACTTAAGGCTAATATGAGCGATGTAGCAGGCGAGACTGATGTCGAACCGAGAGGCAGACTCTTAGTCTCTTAGTAACAGGAAGAAGCTAATTCAGCTCGGTCTGTAGAGCAAGGTCGC", | |
"CACCAGCATTTTATAAGCATTTGCCTGCCTCCACTGAAACCCCTGTTTTCTCTGTAGGTCATAATATTACTAACATCAGTCAACTGCCGTTACCTGTCGCCACATATAGTGATGATTTGGGCAAGGGGCGGTGTTCGCTTATGTGGTGTATCGGCGCTTTCACAATTCAAGATGCGTGTGAACCTGCGCAACACTGTGACGAAGGACGACCGTGTCCTTTCCTGTGTCTCTGGGTAAGTTCGATCCGCCAGATAAAAGGTTACGTCAGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGGCGATAAGAGCATCCAGCTTTATATGCACTTTGACGGACTATACTATCTTCGGCGTTGACATGGAAGCCGAGATGTTCAGTCCAATTGTCCCTATG", | |
"TGCGTCCTCGTATAGAATCCACTCCCGGGCGTAGCCTGATAATGTTATTGCACTATACTGCGATTACTCATCTCGGTATCAAACTAACCCAGACCGGCAACAGGTGAGTAGGACCGCCTGCGCGAGCGTCTGAGGTAAGGAATACTGGACAGCGGCCTGTTAGCAGCCGAGTCAATTGTGTCCTCGAGCTGCCCGTTCTGGAGCGCTTATCGCTTCTTACCATTAGAATCTTAGTCTCAACTACTTTGATCATATTAGTCTAGCTTACAACGCGCTATTGCGATGCTCACGCTGACAAAATTTCGAAAGTAGCCATTCCATGCGGCCCGGCGCTGAAGTATTGACCTGGGAGCTTGCAATGCGGATGACCTAGGTCCATAGATAGCACTGATTTCGGGTT", | |
"CCAGTCGGTGGTACAGGTCTACGCGGCTCACGAGTGGGCCCAGGTCAGCCGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGTGGGCCAGTTATTGTAGGCAACGAGACACCCAGCCTATATAGAACAACACGCATGTTATACCCCTACAAGTTCCTGTAGAAACCAGAGTCCTTGCTCATCTGACCTTACCAACGATCAGGGCGGTCCCCTCACTAGGGGCAAGTGAATCTAGGAGAAACGTCAGCAGCACGGCAGCTTAACCAGCATCCGCCCATCGTTAACGTGTTTCAGGGGAGTAAGAAACTAAATCCGGATCAGTGTGTGACGGCTAGCATAGATATTTCGCTCGCGCAGACGAGCTGGGTTGCGGGGGTCAGGTGTTTAGGCTTGCCTA", | |
"ATGCGAATGATGGTCCACGTGGGGCGCTAGATCTGCGGGGGAAACTTCCAATTCCTGTTGTAACATTGGAAATGATATAGAGGAAGAGTTATATCACTCACTGAGCCGTATGAGGTGGTGAATGAACTAGTACTGGCTCGACCGCGGGGCGTGGATGACTCCTCTGGCACCCCAGATGTGGCCGTCAAATACCCAGACCTCTATGATTTTGAGGATTGGACAGAGTCTGTAACTTAGTCTTAGGCCCCTGCTGGGGCGCATTCTTAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGGACTTCCGACCTATCGTTTGAATTATAAGCACTGTTGGGCTTTAAACCTATAAGGTATGCAATCGCTCATATTGAAAAGAACGGAGCGATTCCCACTA", | |
"CCCTCCTATTGCCTACGGCAAAGCACGACCAATCGTGAAGGGGCACGTTTACATTGCCCATCGGACTGGTGTTATTGACGGTCAGTTGATAAGCGTTGCATAGAGGCCCTGCCCAATCGTGTTTCGTGTCGGGGGTTGGTGAGGGTGCCTAGTGGGAATCTTTACTGGAGTCCCATTATCGAGCACCTATTGGTTGCGCCAAGCCTAGTTGGGTACGACAGGTTATCATATCTTACACGGCCTTGTGCGACACAAAAAGTGTAACTCGCGGTGAGCCAGAACAACGCTCCGGGCATCGGCCCAGCATTACGTAATTTGAAATTACTCGCCAATACTATGTAGATAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGTCGCTTGGCGACATCCAAATC", | |
"ATAGTCACCTTGGCCGTATGGATATAATATGACACATGCATAGGTACAGTGGGGATATGCGATTCATCTGGGTTGTAGTCTCGAAGGTGCTAGTAACTTACGAAATATCCTATTTTGATCTTTGGGTAACATGGAGATCATCTACGGGCCCGCTTTTACCAAGTGAAGCGTGCTGTAGCACACTCACAAAGTCGCGCTAGCATGTCCAGGTGTAGATGAACGATAAGTCACCCTTGACTGTGGCAAACGAGGCCAGTGGTAACGTTTGCCACGGGAAACCTATGGGAGGACGACTAGTCGATTCTACTTAAATTAGGTTTGAATCTCCCTACCCACAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGCAGCGTGATCTCAAGTCCACCCCGTTATC", | |
"TGGTAACCCTCCTATCTTTGTGACTAAACTTGTCTCTATAAGGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGTAAACACTTGTAAGCGCAGACCCCCGTCCCGAAAACTCCTTTTTTCGGATAGCTCTAGTAAAGCAGTTCTTCAGTAGTTGGTGGTCACCATGCGTTGTACTAAATGTTAGCTATCACAAACCCACACTTACGATTACCGGGCAGTGGTGGTGGCCGACCAATCACTGCGCGTCCGTGTGCATTGCCCGTGATGGACGGCATCAACATGCACATCAGTATGATAGGGTTAGGCTGAATACTGTAAGTAATAAGCGTGAAATTGGTGGCTGTAGGCAATCTAAAGAGCTCTTACGTGTTACTTCTACGTATCAAAGAATAGACA", | |
"GAGGAGTAGCTCTCAGCAAGTCCCCTTGCCTAATGCGTCGTCAACGTTCAGCGGCCATAAGAAGCTTCGATCCCATCTCTGAAGAAATTACCGCTCTCGTATGCCCTACAACGAGAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGTGATTGTTATCTTGCGTTAAGTGTTGTCAGTAGAAGTGATTCATACCAATCTAATGAGGTCACACCCCCTAACAAGCTGATGTCGTAATTTTACACATTGGTGTCAAGCTGATCAAACTTATAAGATAGGTACGCAGTCATTGGCTGATAGCGCTTACAATGTGCGGAACGGAGGCTGGCTAGTGGCTGCTTCAGCCCCTACAGGCCGCAGCTGGTAAAATGCGTTCTCTAGTGACACAGTCAAACGCG", | |
"GATATAACTCACACATACGTGTCTTTCAAGGACTCCTTCCGGATCGACCTGATGATGGAACCATCCCTTTACTATAAAGAAGACAGGGCGGTGGACCGGGAGTGGTGATGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGCATAAGATTTGGTCCTGATTTGCGATATAGAGAAGTCAGTTAGGTCATTTGTTAAATACGATAGATCGGTCACGATCTCGTTGTAGCTGCTTCCTGAGGCCAAAATCGTAAAACTTTTTACCGGGTCGCGAGCCCGGCCAGAACCGAAAGAGTCTTAGTAGCTCACTCCCCCTGAATTCACACATCCAATGCCATATTGCCAGGACGGTACAAGTTCTGTGGCTGACCTGTATCCCTGACGGTAGAGAATAATAG", | |
"CGGATTAGGTTCCGTCCCCAAGGAGGTGTCAACCGAGGGCGGCTATCCTTTTAACGAGCCACTGACCCGTGAGAATCCGTCGGGCCATATCCACGTGTCCGATGTTGCTCCCGGTTTATTCTCGGAACCGGAACGCAATCGTAGCGTAGACAGCTTGCAATGCCGATGACCTAGGTCGTTAGATAGTCATATGCGTCAAGTTAAACGATGGGACCTTACGTGCGGAATCTTCTCGGCTCACACGGAGAGACGGTCACTTCGGGCGTAAGTATTGGCCTCCTTTTCCACCTTCCGAACGTCATATATGTTCACACCAATATATCGAATCTCATAAGCAAACCCAGGCGCCGCTAGATGTAGTCCCACATCACTACCTGGATCCAGGCCCGAGAGTGACTCC", | |
"TAAAGCCTACCTGAATCTCTATAAGTGGTTGAAAAGAAGTCATTTCCGCCTCAAAAGGCATCTCGAATTATCTCCGGCACGAAAACTACGTCACAGTCAGGTCTTCACATTCCAGTTCGCATAACCAGACTGATTCGCAGACTAACAAACTACCACTTAGAAAGCGTCAAAGTTTCATCTCCGGCTCGGTGATAGACTGGGGCGGTGCGCGACTTTTACGCAGTCGGGGGTTCTATCAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGTGCAGGGTCCGGGGGATCGTGCTTTCTCCTTGGCTGCCGCTCATGATGATGTGATAAAGTGGAATAACTAGTATGTTTGCAATTTCGACTTGGTTGTCCAACTGTTTGCAACTGATTAAGAGTACGT", | |
"TATACCTGAGAGATAAATCCCGATCCCGAGTAACGAAGGATACATTGGTTACTCCCAATATTACGGCTGAGGAGGTGAATTGCTTTGCCATGCTCGCGACAAACAGTGTTATACTGATACATCTGTGCCAGTTGATTTCGGTCATATAACAAGGGAGTTGTAACCACGGTGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGATTTGCTGTAGAGCGCGCTAATAGACGCTGCCGACTTGCAGTTCAGGCGCCCAGAAATTATTTGGACGGTTATAAGAACAAACATGTACGCGCCGTGGCGAGCTCGTTGTTAGTGGGCTGGTCGCGTCCGGTTTAACTTAAAGACGGGAACGACGATGGGAAGAATAGTCTTCATCTACACGTCAACTTCACG", | |
"CGCGGACAGACCCATGGGTATTTATCGGCCCGAAGCTTGCAATGCCGATGAGCTAGGTCCTTAGATAGTAGTAATGTCAAGAAGCATTGAAAACAGACTAGCGAAAGTTTAGCCGGCGGAGGGGCGCGCGATACCTCCCCTTCCATCGCCATTCCATCCAACTGGTTGACTGCTTTGCGGCACCACGCTACAGTATATTCTGGGTCGAGCTTCCGCGGCTTTCCGCTTGAACACTAAGCTCCTCACGAGAGTCGCGATTAAAGCCTTAGCACGTCCCTGATCCTTTACGCTATTTGGCAGACAGAATTTGTGTACCTTCCAGTTCTACCCAACGTGGCAAAAGCATAAGGAAGTTATTGATGAGTGTTGGATCATGGACCACCGGTTAGCCAGCGAGCTT", | |
"ACGTCCGCATTGTTTAGTGCCAACCCTTCTGGTAGACCGGTTGGACGGTTGGCATAAGCCTCCAGTGTAATCGTAACGTCGATGGGTATAGTAGGCCTTAATTCCTGTTAGTAACCCACCTGGGTGAACGGACTAAATCATTTGAGCGGCACGTTGCTCCACGCAAAACGGCGTATATACAGGTAATTCAGCCGTAATTCAGGCGGGTATGGGTTATATAGGTAGTTATGGAGCGTCTTAATACCGTACTATTGTCCCTGTGAGTACTAGCTCATAAGGATCCTTACACAGGTCTCTTACCTGCCAAGGTAAATAGCGTACGGCACCCCCCTACACCTTCTACATGTAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGATCATAATTACCCATGC", | |
"TATAGTGTATTAATTGTCAGCTTAGTAATGAGGCTTCGCGGGGAACGAAGACAATATGCGACAAAATGCCTATCCGTGCCTTTCGCCGGATTGATTGGGTCACTTTCGACGCACGATGACCTTATGAATAATGAGTTCCCGACGGACACGGGCTTCTTTGCAGGCTACGAGACTTGGCCCTGGCGAACCGTAGTAATCTGCCGACGGGGCTTCGATAGTGCCAAATCGGTTTCATCCTGGGTCATAACGACTACATCGCTAGTCTCTCTACCCGCAGTACGAAAGAAGAGAACGTCGGGAGCTTGGTGACGGTTAGCTTGCAATGCCGATGACCTAGGTCGTTAGAAAGCTCTGCAGCCTGAAGGTACAGCCCCTCAGTACATTTTTAACAGCGAGATAG", | |
"CCAACTAACTGACGCGGGTGCCTTCAGAGGTCCATTGCGTATCCTAGACAATTAGCTTGCAATGCCGATGACCAAGGTCCTTAGATAGGCTTGTGTGCCCTCGAACCAATATTCTCGAGTTTTTCCCCCGCGATCACTGTGCGGGTGTGAATGTAGAAAGAATAGAACTTATTCCCTCAGAGGACTAGACTTCTTCCGAATATGGCCCACGTGCAGCTAACCGCGCCCTGCTTGGGGCGCGCCCCACAGGTTATCTCGAAAGGGGGCTTAAGTGCCCTCTCGCCGATCCCCTTGCCTCATATGCAAGAAATCGACACCTTACAATTATACAACCGCGCGGTGCTTTGATGCGCCACCACTGGTTAGTGCGGGTTGAAGCAATCTTCGGTTTTTCCGCCTG", | |
"GACGTGTAGTCTTCATTTCTCCATGCATCCTGTGATAAAGCTTGCAAAGCCGAAGACCTAGGTCCTTAGATAGCAACAACGCGCGCTGAACAACAAGGGTAAAGCTTTTCCTACGGTAGGATAGGACTAGTTATAATTAAGCAAGAATTAGGACGGGGCCTCATCCTTCTTTGTGTATAAATTTGCCATAGTATTGAGTCTAATAGAGTAGTGAGGTGAGGACGTTATAGCGAAGCCATACCTCATGCGTGTCACTTTCGTCTCTCCGTAGTTTTAAGATTTGTGAGTTAAGCGCTATATCATGAACAGAGCTTAGGTCATAGACTACCTCGTTGGGACCCCTATCTGCTATTTCGTCTCCTAGGTTGCGCAGGGGATGCTAGTGTTCTTCCCCCTGCCG", | |
"CCGTTCTCAGGAACCTGATCGCGCTCAGGGCCTTAAACTGTGAGTTTATAGTAGACACATAACTGCTAGCCGGAAGTTAAACGCGTTAAACAAGTGATCCGTCTTCTTACTTAAATAGCCGGTGAATGTTGGAAGGACGTAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGCCCTATCTAACGGATACGAACTAAGACCAACAAGTATTGAAACCGGTGAGCTGGTTGTCTGCAGGTACCTATGGCACTCTACCGACTAGTCACGCACGCAATGAGCATCGTCCATGTTTGAACGTCGACTGTCGTGCTATAATTACAGTCTGACACTATATTAACTGCTCCCACGTACGCTAAAATCCATCCGTACTGCAATAAATAAGACTGTAGTGGATGA", | |
"CTTTCGACAGATATACTTGGATGAGGCGGAGGCAGACTGGACACGGTTAGCAGCCAAGATCCGAAGACGTGCCATGCCCACTCCACCTATTCGTATACCTACGACACCATGGCCCAGCCTTCGAAGCAGGATGTCTCGGTAGGGACCACCAGTTCTCGCTCGCTGGCCCTGTCGCGAGGAATACTCGAGTGGTGGGCAATCAGTGGTACGGCGGTCGGCCGTTACCTCGATAATTTCACTGGTATTCCGGCTTGGGTCGATGCTTGCGGAACAAGCAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGGGCACGCCTATTCCTGATTAGGTTAAGTGCACGGTGCTAGCAGAGGGACAAGTCGTTTAGCCAAGGTAATGCTGGTCTTTCGTGCGTT", | |
"TCGCTTTTCTAAATCCGCTAGTGAAGGTTCTCCCTAAACAGTAAATCGACCCAATAGGTTCAAACAGGTAGGTGACGATCCCTATGCAAAATACTCCGAGGCAAGCCGATTATTAGGCTCAAGCGCGTAGGTAGCCATATCCCGGAGGACCATCGAACATCAGGTACGGGAACTGGATTGTGATTTTTAGTGGCCGGTTTGTTCATTGCCACGTTAACAATCGATCCAATTGTTCGAGATATGGCATGTCGCTAGGCACTGACCGTATATGCTAGTAATGCAAGCGTTTAATGCCTTCCATCGTTATAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGTTGTCATCGCGAGTTAAGTTGACGGAAGAGATTAGATCTCCTGAGGAGCCTTAAGCGT", | |
"ACTTGAGACCAGATGCATCAACAATACAGCCGATGTCAACTAAAAACAGCGTTACTTGCTCCAATTGTGTTTGTGCTATTTGGGATTGACCCAACCAGTTGGTGATACAAAACGGAATCCTGAGAAATCGTATTGGGTCCAACCTGAGAAGTCCCAACACGTATTAGTTCTGGAAAGGGAATGCCTTGTTTTTAGCACGATAACAGGCACGCTCTGCAGGCTCGCCAAGACACCGATAATTGAGCTGGGCACCAACGAGGCGGAACTTCGTGCGAGCGAAAGATGAGTGTTTGCGGGTATGGGGGAATTATTAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGAAATTTGTAGCGTGGCGGGCTGGGGGGCGGTTATACCAGAGTCGTGGGGCTGC", | |
"GTGCAACTGGTGCATCTCAGCCTGCAGCGACTGATTGTTAAAGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGAACGTGCACGACACCTGACGTTGGTTCGGGTGATTTGGGCCAACGCGGTGTCGATGCTCTTTGCAAAGCGCCCAGTGGCGCCCACGAGTCGAACGTATGTAATTCTACCGATGCGCGACCGAAGCTCTAATCTGAAACCTACTATCCTAATAGGGCATGTTAACTAGTTACGCCCCGTAATCGGAAGTGGGCCACCCATGATATACGGTCCCGTGAAGAACGTCTAGGTATTTACTAGGTACCAGTGCAGTGAACATCTCGCTTTGCGCGAGAGAGGTCGCAGGCCGACACCAATCAATTTTATAACATTCCCCAGCTGGGG", | |
"ACACTTCGAACCGTTGCCGCGATAAACACAGCTTCACTTAATTCACGTGCACTCACGTTCAAATGCACCGGAACTACAACTCCTTTTTTAACACCACCATGCTTGGGAGGTCCAGTTCACATTAACCGTTCAGGGGAAAACTGCACCGATTAAACTCAGTAACTTAGCATGCGCTCTAGTGGAGTAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGTGAAGAACCGGGGCTAGGCATCTAATCGGTTCTACGGTGCGTCTGGATGATTGTTAGAGAGGGACCTCCTACGGACTCTTCTTGCAGGTTACCGCTAACCTCACCCGTGCCCTCCAGGCTTGAAGCCGTCGCATGTAGGAGGACGAGCCAGATGAATTATACTTTACTATCTGACCTAGT", | |
"AGACAGGGCACTAATAGGGTGAAATGCGACTCATCAGGTAGACGCCCGATACCGTCCGTCCTATAGCGATCCACAAACCCGGCCCATGCGAGTCGAGATTATCGCAATATTATCTTTCGAGCTAGGAGGTGAAGGTTAGCCGGATCCCGACATTGAATATGCTAGACTAACCAAGCAAACCAGACTGCAACGACTCTCCTTCGTCGCTTAGTAGTGAATATTGGTTTATCCGGTTTTTCCCCGGAGATAATCGCCGTTACATCTGCAACGTGACCCCCCCCTATTTCACCCAGTGTACCACCTATTGAGCGCGTGGACTGCCCCAAAATTAGGGGCTTGGCGGCAAGCTTGGAATGCCGATGACCTAGGTCCTTAGATAGCACTGAAGCCCCCTGTTTGT", | |
"CGGCACATTATTCATTCACAGGTGAGCATGGATGCCACCTCTAGAAAGTGTACCATGATTTTCAACCGTCGCGGTACTTTGATTGAACATCTGCGGGTTGTGGATCCAACATAGTACTTTGCGGGTCGAAACCATCGTCACAGGCATCACCTATAAATCACGCCTTGCAGACTCCCTGACAGAAAAACTCCTATATCGAATGATGCGCGTAAGTTTCTCCCCCATACTCTGGGGTGGATGATAACGTATACACACGAGCTAGCAATGCCGATGACCTAGGTCCTTAGATAGCCCTTAAACGCAAAAACCAACACAGACTTAGGCCGTACTTATACATACCCCCCAATGGCAATGGCATGTGTCGTCCCACTGAGCTGTTAGGTGGACTGGTGACTCAGTT", | |
"GTAATTGGAACTAGGGAGGTTAGACTGGATTTTCTCTGGTATTCTCGTCAAAACGGACTCACGCGCTAGTCAGGCCAACACTGAACCGATGAGTAAATGGAGCCTCTCCACTGTCTTACCTGGGACAGTTTCATCCTGCGTGTTGGCGTGCTACGACTCGGGACGGGCGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGTCGTCCTGTGCAGGGTTTTCGCGGTGATATGCGGCGTCTGATCTAGCGGGGTAGACCAGTCTGACTCTTGAGATAGGCGCCAGTCAGGTAATGCGGTAACGGGGCTTGTAGCAATAAGAATGACCGTTCTCCGTCAAGACTTCTGTTTTATCGGCCTTTACCTCACAGATGCTGCAACAGAGGCGGTGCTAGGACT", | |
"AGTGGAAGTCATCCAACTATTACTGGGTGATCGTCAAGAACGCTTACTGAGTCAACGCTGGGAGTGTAGTACTCGGTCAGTCTGGCATTGTCACTCGACGGCGTTGATGAGGGTATTTTACTCTTACATACCAGTGCTATAACGTGTAGCTCCACTATCCTCAGTTCGGAGAAGTTGACTCGTTACCTCTGTCAAGCATCAGTGCGGACCGTCGGTGGAAATATTGCGCTCTGCCCGTGCGCATGAGGACAGGCCCGCTACCCAAAAGTTTATGACTCCTCTCGTTACTCTCAGAGCGCATTCGGTATCCTCGTTCTAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGATGAATTTCTATTGTCCGCTCCCATATAGCGTAAGCTGGGCTCGTT", | |
"ACGAAGCCCGAGACCCGACTCAGAACTTCTACTATGATAGGCGGTCAGCATGGAATGCCGATGACCTAGGTCCATAGATAGCGTGTGGTCCAGGAGGATGCAGTCCTAGGTGCGTCGACTGAAATTGGGAATTCTTCATCATCTGCAATGAGCATAATCGGCGCGCATACATTGCGATCGCCTTCAGAGTACCAAGAGGTACGTGTTAAATGGTGCTCCTTACGCTGCGGCTGTATCGCCGAGCGTACCGTCGCAGTAGGGGCGCATACAAGGGGGAATTGAGTACCACCGACAACGGCATACTACCTCAATTTTTTATGAATTGGCCCTACGGTGCTGCATCACAGTGTACACAGGATAGAGCGTGGACATAACAGTGACCCAAGTCAGAACATCCCGG", | |
"TACAACCATTATAATACACTATAGTGAGGCTACTCGATGTCGAGTAGGTTGTGATGAGTTTCCCTTTGGGTAATTGATCACCAAAAACCGGAACGTTCCAGCTCAGTCAGGCATAGGACCCAAGTGTTTAGACGAAGTCACTTTCCCCCAAGCAAGAGCGAGCGGAATAGTAACCTTGGAGGTTGCAATGCCGATGACCTAGGTCCTTAGATAGGGGCGCGCTCTCGCTCGCTGTGCGACTTCGGACTTAACGGATTGGTGGTCGGTGGGGCAGGTCGGCCCCGTTTAGGCATTTTGAGAGTGTGGTGTCGGGTGACGTGCATACACATAGCGGCCCCACTGAAGACCTACTATGTCATAACAAGAACAGACTATTCGTTGAGAGGCTAATTCTTTTATT", | |
"TTAATCGAGAGGTTTGAGAACCTGGCTGAAGCATGCAAAGCCGATGACCTAGGTCCTTAGATAGCGATTTAAGTGACCCTACTCCAGACTCTAAAGAGCCTTTGTCAGATACAGTATTTCTCAGTAGCCATTACTTTCCCACCGGAAGGAGTTTGGGCGACCTGTACCCGGGAAAAGTGAGTATTGATGGTCCGTCGGGTATAGCTGCAGTCGGCATAGGCAAGCAACGTACTCGCAAGAGCAGAGCATGAAGAGAATGTGTGCATGGTACGCCCCTCTATCTAAGTACATGGGCCGTTAACGAGACTCCCGACGGACTTCACTACAATTTTCGAAGGGTGCATGACGTATAAACGGGCTGCATACGGAGGCCGAGAGTATCACACAAACGTCCTCATCC", | |
"TTGGCCCCCGAGCGATACGCCCCCCATGGTTTCTGGGTCCGAATAGTCAAAGATTTAGTGCGACAGCAAAGCTGCCTTCTAATCTGGGGATTGATGGTGCTATCGTAGGAAAGGAAAAAAATCAATGGGGAGTTGGAATTTTATCAGACTAGTCGTGCACCCTATGCGGCGCCCCCCAAGTGGGACATGTGAGCCGCCAAGAAGAGTGTCAGCCAAGCCTGTGTGCATGTATCCAGTTTATAGCAATTGCCCTAGAGGGCACGGATGGTCAAGTCCAGAGGGACATGTCTATCCTCCTCTGCTTCAGCTTGCAATGCCGATGAGGTAGGTCCTTAGATAGTAGGTTTTTAATAACCGAGTAGGCGGCGCACAAATACACTGCCAGCAAGCGAGTAGTCAT", | |
"ATAGGGAACCGCACGTAACAGACGAGACCCAACACCGGTCGGCCGTCCTGTGATCAAAGCACGGATATCCCTTTCGCGGAATGAATATTCGTAGACTTAAACTTCATGGGTTGTTTCCGTATGCCACCTGACGATCTACTTGAGTTGTTACAGCGGATTGATAATTGGTAAAGCCATGTAACTGGGCTGGTGGTGCTGTAACTAGTGGAAGACCAATATGTAAGCACAATTACTACAATCATAATCAGTGAATAGTGTTGTGAGCTCCGTAGCACCAGCATGCAATGCCGATGACGTAGGTCCTTAGATAGAGCCATCCATACTAAGGGGTGAAGAGATAAAATCACTTTTAAACAACACTACAGGAGCTAAGCTACAGTCGAACAGGCTTGCCCGAAGT", | |
"GGCAACCCGAACATTGGATCTGGCCGATAGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGACAGTTCAATCCCTCCTCCTATACATTGCGGGACCAGGAAACTTTGCGATGGATGCTTTTTCCTTTGGGGTGTATCGGTACGGTGTCCAATAATCATTCGCTTCTCTCAGAGTTTTGAGGTACTTGCCCGCGTAGATCATCAGAGGACTTCCATATGCGGAGTCGTTGGCATTACTTCGATTTAATTCTCTAATTGAAGTGTAACAATGCTAGAGCGGCACCGACCTATTGAAAAGGATTATGGCTACATCAATCGGCACCAGCCGGGGTATAATTTTGAGAAGCTGTAGTGCTTGGCGGTCGCTATCACTAGAGAAAAGACGGATGTTGGATC", | |
"TCCTCCGTGACGGCCCGCCCTGATCACGGCTTGCCTAGCGTGGCTTCGTAACGATTATGCATGCGATGCCGGAGGCGCCTTGCTTCCGAGTGCCGTAAGCTACTCTCTCGTGTAACTTTTAACGTCACTTGATCCGCTGCAGTAAAAAAAAAGGCTACTAGACGGAGGAATACATACACTCTTCCAGAATAACGTACGTTCTCCAGATCTCAGGTGCGACTTCGAGTATAAATCGAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGCTACCCAGCGCACACATATCAGAGGAGCTAGCGCACTCATGATTTCTGAATTTAGGTTTCGGCGACGATGATCTTAGAGAACTTCCTATGAAATACTTGCCTAGCGAGCCATCGTCTTATAGCCGTTTC", | |
"GTAAATGACGAAACCTTTGACTCGCGGCTGACTTGTAATGCCACAGAAACGCTGAAGGATCAGAAGAGGCTCCCCTCACCGTCGAAAGCTCCTACTCTGCCGAATAGCTATTAGTTTTACCGACTCTCTGAGCGTGTGTTAAAGCACCCAGCTATATACAGGTGCACGCGCACTGAAAATAGACGCTAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGCCGATGTCAGCTCGCCAGCTACCGTGCGGTCAGCCTTAGGTCTAGGAACTCTGAATGGACAAGACTTGCAATTGGAGGGTCAAGCGACGTAGACCGGGGCGTGCTCTGATAACGTGGCTCATAGCCGTTGACTGTTGATACCTCGTTTTAGGCGCAGGTTAATGCTCAAACATTACAG", | |
"TTGTTTTCTATACTGCGAAGGTAGTTCAACTCTTTCTTAACGCGGATTATCTCAGTTTTAACATTAAAAGGATCCACAAAATGAGCTCAAGATAAGTGTTAAAATGGTTTGTTATAAAACCATAGCTTGCAATGGCGATGACCTAGGACCTTAGATAGTGCTCTTCCCTTCCCGGACTAGTATCGTGCGCGCTAGCAGACGTAGCTGGTCCCTCTGACACTAGTCCCTGGTAGAGAAAAATCCTGGAGGCCCACGGATCACTTCCAAAAGATCCTGGAAAATAGTAATTGGCTCGGTATGTTCCATAAAGGAGATGAGCGCCGAGCAAACAGAGAATCTATATGACCGGCCGGGTCATTCCATACCTGTAATTTATCGATGGTAGCGGTCCTGATAGGTT", | |
"ACAAAAAACCGTATTGTAGGCGGAATGGATGGACGATCAGAAGCCCTGTGAATTGTGCTGGTAAAGTTCCATGATTGACGCACGGCCATGTACTGGAGGGTGGTTAAGCTACACAAAATTGATTCTTGCCCTCTCGTACTTCGTCGAACTAATGGGCGACTACATTCATGGAGTCCACCCCAATGACATGCGGGGTCATCTTGTAAATCGCTGTCGGTCCTAAATGTGCACATGACGTCTTCCTGTCGTGCACCCAGACTCCCAGTGGATTAATGGCGAAGAGTGTGACTCTCAAACCACTGTGTCCGTGCGTCTCTAGAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGAATTACCCGCTCCCTAGGGTCTTAGAGGCTGCGGAAACGCTTAAC", | |
"GCATTCCCTGGTCCGACAACTGGCACGTATTAAGTTTGCGATACCCCCCAGAAATTAGCCGTTGCGCAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGCGGCTCCTCCGCGGCTTAACTCTTAAGAGTTTGAACGACACACTTGTCGGTACCGCAAATCTAGCTCGCTAAATTGCGGTGGTGCTCCGACCATGCGAGCTGTCGAGACATAGTCTGCTCCATATAGATTAATAGCCTATAACTGCAATGTCCAGGTCGTAACCCGCTACTAACGCTGGCCTCAATGCACTGACGGTGTGACGGGAAGCAGGCCCTCTGCTACGGGTGATGTCCGTCGCAGGATGCAATCCGAGCGTCGGATATAGAGATGACAAGGAAGTAAACGTTGCGACCGCAA", | |
"TATAAGCAGGCGTCTGTGCAAATCAAGGAACATATGGTCGCAAGAGCTACCGTGAGTCCCTTCTGTCCAATAAAATAGCGCGATCTGTGTCGCAACAACATTGTGTGTGAGCATTGGAGACGTAATAAATCCGGCCCTAAGATCTCGTGCCGGGGACCGCGGTGTTAAGGTTTCTGACGCAGTCGTGGTAGGGCCCTCCTACGGTAGTTGATTAGCTTGCAATGCCGATGACCAAGGTCCTTAGATAGCTTTACGATTTTGGGACCGCGCTGTGAACTCGGTTTATATCGTCCGTTCTTACGGAGCCGCAATCCCATAGTTGCTAGAACTCTTTAGACCCACCAGGCGCCCCCCTTTCAGGGTTAACCAACTGATAGTTCTACACTTCGCAAATACGCCG", | |
"TAGTTCAAGTACCCTGTGAAACTGTGTATTTACACAGATTTCTACTTCAAAGAGTGCAGTATCAAGGTTTACCGTGGGGATACCCGATGGAATCAGCTTTTGGTTTCTTCGCTCCCGGGGCTAGCCTGTTGGGTTTTACGCGTCTCTTTTCGTGTGCTGAAAAGCGAGGGTAAGGGGTGAGGAAGTGGTTGTTGGTCTCGCAGACTGAGGACTACACATGCCGATCCAGACATGCCATCGTCGACATTCCTACTACTGGTGCCAGATCGGATGCTCTACCCTGTACCGTCCATACTCTGCGTTGACACTACCTCTTCTATGCGTCTATAGTAGCTTGCAATGCCGATGACCTAGGTCCTTAGAAAGCTTGCTGTTCGACCGTACGATTTCGTGGATTGTT", | |
"GTATTTGTAAACCTTTGATCAGGTTCTCCGGTTCCATCCGGGAAAGCCCGCAACGGTACAAGCGCTGGAAAGTTTGCAATGAAACATCATCGGGGCCCAAAGTCGAGGTCGCTATAGCAAATCGTCGTGCGGCTCTAGCTAACTAGTGAGATTTAGGCCGGGGTCGTAACCTGCCATCAGAATTCTGATATTCACGGATTGCTGACGCTCAGTGCCGCCCCAAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGCTAGGTCTGACAATCAAAAGACATGTGAATGACACCGGTAGCAAAAGAAAACATTAACTTTTCAATCGGCTCGATAACTGGATCGCTACGCATAATTTTAAAGCCGGACAGGTGCCATACGAAGATCGTAATTCTATGCGTA", | |
"GCGGACTCATCGCACCATGTATTCAGAACAGTGGAGGACGAATGGGTAGTGTCCCGTATTGCTTGCGGTCCTTGTCACGACCCGGCTGTAGCTCTGGCACCTGCACTCGCCACGCCTTGATGACCATATCTTAACGATATTTGTACGAGAACCGTCGAGTGACGAACTCAATTTTTGGTAATGATTAGGACGACGCCAACCCGGTACTCAGGGTCTAGTGTTCTAGGATCATATATACGGGCTTCCACTTCCATGGTCAGAACCCTAGAAACATATCGTCCAACATATCCTAGGAAGGGGAAATTTATGATGAACGCAGTTATCATTGACGTGGGAAGGAGCTTGGAATGCCGATGACCTAGGTCGTTAGATAGGCACACCAGAATTTCCGCGGGGGGAA", | |
"TGTGAGCATGCAATGCCGATGACGAAGGTCCTTAGATAGAGATGCGACCCCTACCTTAGGAATGACTCGTTTAGGTGCTTTTCCGATTATTGTTATGTGCAAATGTGGCGCCGTGATACCACCAACACGGAGCTGGGCGGCCTAACGCTTTGGCGTTATGGACCGATGTAAAACTCATGAGGCAACGGTAAGAAGTACGTTGCGGTAACATTTTAAGGGTTTCCCAACCTAGGGCCCTTAAATGGTGGGATACATCTCTGTCGCGTGTGGTAAACCTCAAGTCCCTAGAGCGACCACATATCTGACGGTGCTGTAAGTACATCCGCGTGATTAAGTTCGATCAAAAACAACGGACTTGGTAACCACGAATAGATACACTAAACGGATATTACGACCATGA", | |
"TGGACTACCGGATTAGTCCAGTCTCAGATCTTATCGTGCATTAGGCCTGCACATGAGTTTGTACATATAGGATAAGTACCGCGAGCTGTTTACACTCTGTGACGCTCAGAGCGAGTGACGAGACCTGACCGGGTCAGGGTCTTAGCCCTAGGGCCCAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGATCAAGTAAGGGTCACTGCTCGTGGCCATTTTCCCCATACAGACCGCTATATATGGCGCGTGTAAGGGTCTTTTAATAAGAGTGGTCAGCGTCCGTTAAAAACTGTGCCAACGTCAGTGCCCTATCGAGTCTTATTACACGTATGACTGCCTGATGCGACCTGAACAACGTCGTTTGTGTGTTCGCCCAAGGAGCAGTTCTCCTTGCAG", | |
"AGAGATGAATTTCAAATAGAAAATACAGAGCATGCCCAGGCGGATCTAGCCCTGAATAAAGAATACAATATCAATCTTATACGCGGAACTGTTTATTTTTACGTCGCAGTCTGAATAGGCGAAAATCTGTTTTCTGGCTTTAATGCCAGACCTTCTAAATCAGGGCATTAAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGAACAGAATGGTGTACGGCACAACAACGTCCAACCCCATTCAGGCTCAATTGTGATACTCAGGTATGAAGTACTTACACATCGCACCAGTTTACGTGTCGAAACATGTTTTCCGCAAGAGTACCATCCGCTTAGGATGCGCGGGATTCGTCCGCTTAGGCTCGGAGTGGTGAACAAACCGATCGGCCATACGAG", | |
"AACAGTACTATGGGGCCGGATACTATAACGGCGAATGAATTGAAAGTAGGACGTTAAGTCGCACAGTAGTCACACAAGATCCGGGGGAAAATATTCAGAGCCTAAAGGTGAAACGCCTGGTTGACGGCGTTCGAGCCGGTTGGTTGCTATGGCCCCGACGATATTAAACATAGATGCTTCAGGCAAGTCGTAATTACTAGTAACTCAAGAACGGATACTGAGGTTTACACGCCGCCAGCTGTCTCAGCAGTACCCGCGGGATCCTGCTCCGGGATTTTCGGCTCGATCATAATGAGGCCGGTCCAGTGTCATTCGCCTTCCGGGTACTCCTTCTGTACCATCAGGGGACATGAAGCTTGCAATGCGGATGACCTAGGTCCTTAGATAGTGATACGGCCGG", | |
"CACGCCGCGCTGGGATCAAAGGGGCCTTCAGTAAGTAGTACCTGACATTCCCCTTTTACTACGAAGCTTGCAATGCCGATGACCTAGGTCCTTAGATAGGGTCAATTCTATGCTGCTGGACTTAACATTGAGTATGATGTTTTAATCAAACCGCAGCGTCTGTTCCCAGGAGGCGCTTGCTGGCTAGGTTCACGAAGAACGGGAATCGTTCGGAGCTCTGAGGATTCCTTCGTGCGCCTCTTGTTAGAGCTATCGGAATTAAATTTAGCGGGTCTCTGCCAACCCTTATCATAACAAGTAACAAGGCCACTTGAATCCAGCACATTTGAATTGATTGGATGTTGCGATAACATTCTCATTTGAGTGCAGGGACAGTCACGGCGTGTCCTGGGCCGCGAGT", | |
"TATTCGCTACTCGAGAGTAAGCTTCAGTTTAGCACAGCACACCAGCGATGTGATTCCAAGCGCGAATTTCTAGCTTCGAGTTGTCCCCTGTAACGAGTGTTGAGGTACGTCTGTATTAACACATCTGGCGTTGCGCGCCATGGACATGAAAGCTTGCAATGCCGATGACGTAGGTCGTTAGATAGTGAACTACCATGCGTACAAGCACATGACTAAAACTCTATGTGTATTATCGTGTAGCCGCTGGGTGCTCGTCGTCAGCGTTACGTTATCGACACCTGTGAAAGGCAAACGAGTTGCGGGACCTCGCCTAGTACAAAATCAATGGCCTTGTGGGGAAGCGCAGGCCCAAACATCCCTCAACGGCCATAGTGCGGCTTTGTTTCATATGAGCACTCTG", | |
"GATACAGTCGCGTACATTGGGTTTATAAGCCAGCTGGAGTGGTCTGAAGATCTAGTTAAAAAGGGATCGACAGCTTGCAATGCCGATGAGCTAGGTCCTTAGAAAGGTTGTGTCAGGACACTGTATAATTAGAGCTGAGGCGCAGGAGTTATTTAGATATATGTGACGCCGGTGACGCCGCCACAGATTCAATTAAGAAGCTTCGGTGTCGGTAGAATGTCTGCCCATTATCGGCCGAGCGCCTCGGAGCTCCATTTATCCGGACTTCCTGTGTGCCTGCTGGATGCCACCAGTCAGAGCTGGGTCGATGTTTTCCGTAACGGTCCAATGTTAAAGCTTACCTGCCTGGCCGAGTGTCAAAGACGTAAAAAGCAGAGATTGAATACGCTTCGCTTTTCCT" | |
] | |
def edit_distance(string1, string2): | |
if (len(string1) != len(string2)): | |
raise Exception("Strings must be the same length \nstring 1: ", len(string1), "string 2: ", len(string2)) | |
error = 0 | |
for i in range(len(string1)): | |
if string1[i] != string2[i]: | |
error += 1 | |
return error | |
maximum_distance = 4 | |
def find_motifs_from_input(motif): | |
for i in range(len(input_strings)): | |
lowset_distance = 0 | |
motif_index = 0 | |
original_motif = "" | |
found = False | |
for j in range(len(input_strings[i]) - len(motif) - 1): | |
maybe_motif = input_strings[i][j:j + len(motif)] | |
distance = edit_distance(motif, maybe_motif) | |
if (distance <= maximum_distance): | |
found = True | |
motif_index = j | |
lowset_distance = distance | |
original_motif = maybe_motif | |
if (found): | |
print("\nstinrg: ", i + 1, " index: ", motif_index + 1, " distance: ", lowset_distance, " motif in input: ", original_motif) | |
else: | |
print("\nstinrg: ", i + 1, " no motif found") | |
val = input("Enter the motif: ") | |
find_motifs_from_input(val) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment