victorsanchezarevalo/challenge.py

## challenge.py
from collections import defaultdict
from pathlib import Path

def find_motifs_from_file(file_path, kmer_length=6):
    """
    Reads a text file with DNA sequences, one per line, using pathlib and finds the most frequent k-mers.

    Parameters:
    - file_path (str): The path to the text file containing the DNA sequences.
    - kmer_length (int): The length of the k-mers to consider in the motif search.

    Returns:
    - list: A list of tuples, where each tuple contains a k-mer and its frequency.
    """
    path = Path(file_path)

    # Check if the file exists
    if not path.exists():
        raise FileNotFoundError(f"The file '{file_path}' was not found.")

    # Read the sequences from the file using read_text()
    sequences = path.read_text().splitlines()

    # Count the frequency of each k-mer
    kmer_counts = defaultdict(int)
    for seq in sequences:
        for i in range(len(seq) - kmer_length + 1):
            kmer = seq[i:i+kmer_length]
            kmer_counts[kmer] += 1

    # Sort the k-mers by frequency and select the top 5
    sorted_kmers = sorted(kmer_counts.items(), key=lambda x: x[1], reverse=True)
    top_kmers = sorted_kmers[:5]  # Adjust as necessary

    return top_kmers


path = Path("./reads.txt")

consensus_candidates = find_motifs_from_file(path, kmer_length=6)
for kmer, count in consensus_candidates:
    print(kmer, count)
	from collections import defaultdict
	from pathlib import Path

	def find_motifs_from_file(file_path, kmer_length=6):
	"""
	Reads a text file with DNA sequences, one per line, using pathlib and finds the most frequent k-mers.

	Parameters:
	- file_path (str): The path to the text file containing the DNA sequences.
	- kmer_length (int): The length of the k-mers to consider in the motif search.

	Returns:
	- list: A list of tuples, where each tuple contains a k-mer and its frequency.
	"""
	path = Path(file_path)

	# Check if the file exists
	if not path.exists():
	raise FileNotFoundError(f"The file '{file_path}' was not found.")

	# Read the sequences from the file using read_text()
	sequences = path.read_text().splitlines()

	# Count the frequency of each k-mer
	kmer_counts = defaultdict(int)
	for seq in sequences:
	for i in range(len(seq) - kmer_length + 1):
	kmer = seq[i:i+kmer_length]
	kmer_counts[kmer] += 1

	# Sort the k-mers by frequency and select the top 5
	sorted_kmers = sorted(kmer_counts.items(), key=lambda x: x[1], reverse=True)
	top_kmers = sorted_kmers[:5] # Adjust as necessary

	return top_kmers


	path = Path("./reads.txt")

	consensus_candidates = find_motifs_from_file(path, kmer_length=6)
	for kmer, count in consensus_candidates:
	print(kmer, count)