Last active
December 27, 2015 21:39
-
-
Save calizarr/7393194 to your computer and use it in GitHub Desktop.
Find k-mers in (L,t) clumps.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def allBelow(genome,k,L,t): | |
kmerCount = 0 | |
## print 'Beginning: ' | |
results = set() | |
for x in range(len(genome)-k): #looping through the genome | |
kMer = genome[x:x+k] #getting each k-mer at a time | |
indices = collections.deque([]) #keeping track of the indices | |
start = 0 #for the string.find(sub) method | |
if kMer not in results: #Making sure we don't do extra work. | |
while True: | |
if len(indices)>=t: #Check as we go across the genome. | |
flag = False #Needed for the double break out of the loop. | |
for ind in range(len(indices)): | |
end = ind+t-1 | |
if end<=len(indices)-1 and indices[end]<=indices[ind]+L: | |
results.add(kMer) | |
kmerCount +=1 | |
print 'Found a kmer! kMers at: '+str(kmerCount) | |
flag = True | |
break | |
else: | |
indices.popleft() | |
if flag: break | |
start = genome.find(kMer,start) | |
if start == -1: break | |
indices.append(start) | |
start +=1 | |
return results | |
# Initial Algorithm. | |
def findKmer(genome,k,t): | |
kMers = {} | |
for x in range(len(genome)-k): | |
kMer = genome[x:x+k] | |
indices = patternMatch(kMer,genome) | |
if len(indices)>=t and kMer not in kMers: | |
kMers[kMer]=indices | |
return kMers | |
def patternMatch(pattern,genome): | |
indices = [] | |
start = 0 | |
while True: | |
start = genome.find(pattern,start) | |
if start == -1: return indices | |
indices.append(start) | |
start +=1 | |
def findClump(kMers,L,t): | |
results = set() | |
for kmer in kMers: | |
indices = kMers[kmer] | |
for ind in range(len(indices)): | |
end = ind+t-1 | |
if end<=len(indices)-1 and indices[end]<=indices[ind]+L: | |
results.add(kmer) | |
return results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment