Skip to content

Instantly share code, notes, and snippets.

@kdm9
Created November 5, 2015 02:15
Show Gist options
  • Save kdm9/f7100e823c16357f3ab3 to your computer and use it in GitHub Desktop.
Save kdm9/f7100e823c16357f3ab3 to your computer and use it in GitHub Desktop.
from __future__ import print_function, division
from frisk.kmerhash import *
import screed
from matplotlib import pyplot as plt
from functools import partial
import multiprocessing as mp
from os import path
def seq_ivom(gen):
kv = build_kmer_vec(1, K)
hash_seq(str(gen), kv)
kf = kmer_freqs(kv)
return ivom(kf)
def window_kli(start, seq, giv, winsz):
winseq = seq[start:start+winsz]
iv = seq_ivom(winseq)
return start, kli(giv, iv)
if __name__ == "__main__":
import sys
K=8
winsz = 5000
n = 10000000
r = screed.open(sys.argv[1])
for x in r:
seq = x.sequence[:n]
break
if path.exists('genome.npy'):
giv = np.load('genome.npy')
print("load genome IVOM")
else:
giv = seq_ivom(seq)
np.save('genome.npy', giv)
print("calculated genome IVOM")
do_window = partial(window_kli, seq=seq, giv=giv, winsz=winsz)
klis = []
pool = mp.Pool()
starts = range(0, len(seq), int(winsz/2))
for window in pool.imap(do_window, starts):
klis.append(window)
if len(klis) % 100 == 0:
print("done", len(klis), "windows")
klis = np.array([k for _, k in sorted(klis)])
#np.save('window_klis.npy', klis)
plt.plot(klis)
plt.savefig('frisk.pdf')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment