Skip to content

Instantly share code, notes, and snippets.

@daviddamilola
Created October 13, 2023 21:46
Show Gist options
  • Save daviddamilola/c0d44e8b5f3c81d54ae7f653339ae0e4 to your computer and use it in GitHub Desktop.
Save daviddamilola/c0d44e8b5f3c81d54ae7f653339ae0e4 to your computer and use it in GitHub Desktop.
# print(loadFile('./Downloads/Vibrio_cholerae.txt', 'CTTGATCAT'))
"""
FindClumps(Text, k, L, t)
Patterns ← an array of strings of length 0
n ← |Text|
for every integer i between 0 and n − L
Window ← Text(i, L)
freqMap ← FrequencyTable(Window, k)
for every key s in freqMap
if freqMap[s] ≥ t
append s to Patterns
remove duplicates from Patterns
return Patterns
}
"""
def getFreqMap(text, k):
frequencyMap = {}
textLength = len(text)
for i in range(0, (textLength-k)+1):
pattern = text[i:(i+k)]
if pattern in frequencyMap:
frequencyMap[pattern] = frequencyMap[pattern] + 1
else:
frequencyMap[pattern] = 1
return frequencyMap
def findClumps(genome, k, windowLength, leastFrequency):
patterns = []
genomeLength = len(genome)
print(genomeLength)
for i in range(0, (genomeLength-windowLength)+1):
window = genome[i: i + windowLength]
freqMap = getFreqMap(window, k)
for key in freqMap:
if freqMap[key] >= leastFrequency:
patterns.append(key)
return 0 if len(patterns) < 1 else list(set(patterns))
k = 5
windowLength = 50
leastFrequency = 4
def loadFile(path, pattern):
file = open(path, 'r')
genome = "".join(file.readlines())
return findClumps(genome, k, windowLength, leastFrequency)
print(findClumps(genome, k, windowLength, leastFrequency))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment