Last active
October 28, 2024 14:33
-
-
Save JAS-Norway/5abb1b7826ffb20141f1cbf76da50913 to your computer and use it in GitHub Desktop.
Please give me all the feedback you can think of.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import time | |
# Averages around 0.9 seconds. | |
# Updated program where I tried to use the tips reddit gave me. | |
def count_overlap(path1: str, path2: str) -> int: | |
""" | |
Takes the input of 2 files which contain a number of DNA strings. | |
Each DNA string is guaranteed to be unique, meaning no two same DNA | |
strings will exist in the same file. | |
If they both contain the same string, count +1. | |
Returns the final amount of same strings. | |
""" | |
content1: list[str] = read(path1) | |
content2: list[str] = read(path2) | |
content1_set: set[str] = list_to_set(content1) | |
content2_set: set[str] = list_to_set(content2) | |
return len(content1_set & content2_set) | |
def read(path: str) -> list[str]: | |
with open(path, "r", encoding="utf-8") as reader: | |
content: list[str] = reader.readlines() | |
return content | |
def list_to_set(lst: list[str]) -> set[str]: | |
ret_set: set[str] = set() | |
for content in lst: | |
ret_set.add(content) | |
return ret_set | |
def main(): | |
start = time() | |
test_count_overlap_sample() | |
end = time() | |
print(end-start) # Averages around 0.9 seconds. | |
def test_count_overlap_sample(): | |
print('Tests count_overlap... ', end='') | |
assert 2 == count_overlap('sample1.txt', 'sample2.txt') | |
# Tests efficiency (the test will take a long time with the wrong solution): | |
assert 100001 == count_overlap('id1.txt', 'id2.txt') | |
print('OK') | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import time | |
# Averages around 1.2 seconds. | |
def count_overlap(path1: str, path2: str) -> int: | |
""" | |
Takes the input of 2 files which contain a number of DNA strings. | |
Each DNA string is guaranteed to be unique, meaning no two same DNA strings will exist in the same file. | |
If they both contain the same string, count +1. | |
Returns the final amount of same strings. | |
""" | |
dna_count: int = 0 | |
content1: list[str] = read(path1) | |
content1_map: dict[str, int] = list_to_dict(content1) | |
content2: list[str] = read(path2) | |
for line in content2: | |
if line in content1_map: | |
dna_count += 1 | |
return dna_count | |
def read(path: str) -> list[str]: | |
with open(path, "r", encoding="utf-8") as reader: | |
content: list[str] = reader.readlines() | |
return content | |
def list_to_dict(lst: list[str]) -> dict[str, int]: | |
ret_dict: dict[str, int] = {} | |
for i,v in enumerate(lst): | |
ret_dict.update({v: i}) | |
return ret_dict | |
def main(): | |
start = time() | |
test_count_overlap_sample() | |
end = time() | |
print(end-start) # Averages around 1.2 seconds. | |
def test_count_overlap_sample(): | |
print('Tests count_overlap... ', end='') | |
assert 2 == count_overlap('sample1.txt', 'sample2.txt') | |
# Tests efficiency (the test will take a long time with the wrong solution): | |
assert 100001 == count_overlap('id1.txt', 'id2.txt') | |
print('OK') | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import time | |
# Averages around 1.75 seconds. | |
def count_overlap(path1: str, path2: str) -> int: | |
""" | |
Takes the input of 2 files which contain a number of DNA strings. | |
Each DNA string is guaranteed to be unique, meaning no two same DNA strings will exist in the same file. | |
If they both contain the same string, count +1. | |
Returns the final amount of same strings as an int. | |
""" | |
content1: dict[str, int] = read(path1) | |
content2: dict[str, int] = read(path2) | |
total_len: int = len(content1) + len(content2) | |
content1.update(content2) | |
dna_count = total_len - len(content1) | |
return dna_count | |
def read(path: str) -> dict[str, int]: | |
""" | |
Reads a file of strings and turns it into a map. | |
""" | |
with open(path, "r", encoding="utf-8") as reader: | |
content: list[str] = reader.readlines() | |
return list_to_dict(content) | |
def list_to_dict(lst: list[str]) -> dict[str, int]: | |
ret_dict: dict[str, int] = {} | |
for i,v in enumerate(lst): | |
ret_dict.update({v.strip(): i}) | |
return ret_dict | |
def main(): | |
start = time() | |
test_count_overlap_sample() | |
end = time() | |
print(end-start) # Averages around 1.75 seconds. | |
def test_count_overlap_sample(): | |
print('Tests count_overlap... ', end='') | |
assert 2 == count_overlap('sample1.txt', 'sample2.txt') | |
# Tests efficiency (the test will take a long time with the wrong solution): | |
assert 100001 == count_overlap('id1.txt', 'id2.txt') | |
print('OK') | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TATT | |
GAGA | |
GAGG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AGGA | |
CATT | |
GAGA | |
TATT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
These are my 2 solutions to the same problem.
Please give me all the feedback you can think of, like:
I don't mind harsh feedback! I want to learn, and to learn you must humble and able to listen to people better than you. Thank you so much if you decide to help! <3
If you guys want to run the scripts for yourself, you will need the id1.txt and id2.txt files. They are kind of big, so I didn't want to attach them here.
They can be found here in my repo if interested!
https://github.com/JAS-Norway/reddit-feeback-genetics.py-