Last active
February 2, 2017 18:04
-
-
Save Swarchal/66646422faca77a163abca2cd6988c7a to your computer and use it in GitHub Desktop.
rosalind kmer solutions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.6 | |
# sensible solution | |
import re | |
import sys | |
import itertools | |
def get_seq(path): | |
"""return fasta sequence from file""" | |
lines = open(path).readlines() | |
seq = "" | |
for line in lines: | |
if not line.startswith(">"): | |
seq += line.strip() | |
return seq | |
def count_kmer(kmer, seq): | |
"""count number of kmer in seq with overlaps""" | |
return len(re.findall(f"(?={kmer})", seq)) | |
def kmers(k=4): | |
"""make generator of lexicographically ordered kmers""" | |
for i in itertools.product("ACGT", repeat=k): | |
yield "".join(i) | |
def main(): | |
seq = get_seq(sys.argv[1]) | |
ans = [count_kmer(k, seq) for k in kmers()] | |
print(*ans) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.6 | |
from re import findall | |
from sys import argv | |
from itertools import product | |
seq = "".join(i.strip() for i in open(argv[1]).readlines()[1:]) | |
all_kmers = ["".join(j) for j in product("ACGT", repeat=4)] | |
print(*map(lambda kmer: len(findall(f"(?={kmer})", seq)), all_kmers)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment