Skip to content

Instantly share code, notes, and snippets.

View endrebak's full-sized avatar
🧬
Trying to write a genomic library in Rust

endrebak.ada endrebak

🧬
Trying to write a genomic library in Rust
View GitHub Profile
import copy
tmp_path = '/nethome/jkpickrell/1kG_data/covariance_matrix/'
# tmp_path_EUR = tmp_path
# tmp_path_ASN = '/nethome/jkpickrell/1kG_data/covariance_matrix/ASN/'
# tmp_path_AFR = '/nethome/jkpickrell/1kG_data/covariance_matrix/AFR/'
def return_conf(path):
return {
'partition_root' : path,
39967768 39967768 0.249157622127387
39967768 39967824 0.022333456325143934
39967768 39967950 -0.00434938492491698
39967768 39968210 -0.01366949547831051
39967768 39968231 -0.019882902513906196
39967768 39968596 -0.007456088442714824
39967768 39968633 0.00348960255080374
39967768 39968753 0.0006979205101607479
39967768 39969173 0.0006979205101607479
39967768 39969251 0.18011222422567374
rule compute_kde:
input:
"{prefix}/data/{genome}/HMM_states/{statistic}/correlations_all.gz"
output:
"{prefix}/data/{genome}/HMM_states/{statistic}/cutoff.txt"
run:
f = input[0]
o = output[0]
from scipy.stats import gaussian_kde
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef _getitem(const long [::1] runs, const double [::1] values, const long [::1] run_cumsum, start, end):
cdef:
int i = 0
int arr_length = 100
int nfound = 0
# int foundsum = 0
rule gene_biotypes:
input:
regions = gene_biotype_infiles,
annotation = "{prefix}/data/{genome}/annotation.tsv"
output:
"{prefix}/data/{genome}/{hmm_or_anatomy}_regions/{cutoff}/gene_biotype_counts.tsv"
run:
df = pd.read_table(input.regions, header=0)
df2 = pd.read_csv(input.annotation, header=0, sep=",")
import pyranges_db as db
import pyranges as pr
gr = db.gencode.genes("human") # takes a while to download from ftp
# Wall time: 2min 1s
gr.to_gtf("gencode_human.gtf.gz") # takes a while to gzip and write to disk
# Wall time: 4min 3s
# subset for faster operations
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
# cimport numpy as cnp
import pandas as pd
from time import time
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
import pandas as pd
from time import time
from libc.math cimport exp, fabs
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
import pandas as pd
from time import time
from libc.math cimport exp, fabs
half_window_size = 3
len_haps = 10
# half_window_size += 1
for i in range(0, half_window_size):
for j in range(i):
j1 = i - j