Created
May 18, 2022 02:53
-
-
Save dasunsucharith/2afba3b692fde983ad5bb63276f2ceab to your computer and use it in GitHub Desktop.
CS50 2022 psets6 dna problem solution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import sys | |
def main(): | |
# TODO: Check for command-line usage | |
if len(sys.argv) != 3: | |
print("Usage: python dna.py data.csv sequence.txt") | |
exit() | |
# TODO: Read database file into a variable | |
with open(sys.argv[1]) as database_file: | |
reader = csv.DictReader(database_file) | |
database = list(reader) | |
# TODO: Read DNA sequence file into a variable | |
with open(sys.argv[2]) as sequence_file: | |
sequence = sequence_file.read() | |
# TODO: Find longest match of each STR in DNA sequence | |
matches = {} | |
for i in database[0]: | |
matches[i] = (longest_match(sequence, i)) | |
# TODO: Check database for matching profiles | |
suspect = 'No Match' | |
suspect_counter = 1 | |
for i in range(len(database)): | |
for j in matches: | |
if str(matches[j]) == database[i][j]: | |
suspect_counter += 1 | |
if suspect_counter == len(matches): | |
suspect = database[i]['name'] | |
break | |
else: | |
suspect_counter = 1 | |
print(suspect) | |
return | |
def longest_match(sequence, subsequence): | |
"""Returns length of longest run of subsequence in sequence.""" | |
# Initialize variables | |
longest_run = 0 | |
subsequence_length = len(subsequence) | |
sequence_length = len(sequence) | |
# Check each character in sequence for most consecutive runs of subsequence | |
for i in range(sequence_length): | |
# Initialize count of consecutive runs | |
count = 0 | |
# Check for a subsequence match in a "substring" (a subset of characters) within sequence | |
# If a match, move substring to next potential match in sequence | |
# Continue moving substring and checking for matches until out of consecutive matches | |
while True: | |
# Adjust substring start and end | |
start = i + count * subsequence_length | |
end = start + subsequence_length | |
# If there is a match in the substring | |
if sequence[start:end] == subsequence: | |
count += 1 | |
# If there is no match in the substring | |
else: | |
break | |
# Update most consecutive matches found | |
longest_run = max(longest_run, count) | |
# After checking for runs at each character in seqeuence, return longest run found | |
return longest_run | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment