Skip to content

Instantly share code, notes, and snippets.

View endrebak's full-sized avatar
🧬
Trying to write a genomic library in Rust

endrebak.ada endrebak

🧬
Trying to write a genomic library in Rust
View GitHub Profile
➜ head ~/Downloads/CEU/CEU-22-final.txt
Position(bp) Rate(cM/Mb) Map(cM) Filtered
16489239 0.0 0.0 1
16494187 0.0 0.0 1
16504399 0.0 0.0 1
16855618 0.0 0.0 1
16869887 0.0 0.0 1
16872459 0.0 0.0 1
16890307 0.0 0.0 1
16923693 0.0 0.0 1
from scipy.stats import gaussian_kde
import numpy as np
import pandas as pd
df = pd.read_table(f, sep="\t")
values = df.CorrelationSum.sort_values()
gk = gaussian_kde(values)
def table(region, gwas_minus_region, total_gwas):
pos_cols = "Chromosome Start End".split()
total_region = len(region.drop_duplicates(pos_cols))
tp = region.groupby("Trait").size()
fp = (tp - total_region).abs()
fn = gwas_minus_region.groupby("Trait").size()
tn = ((fn + tp + fp) - total_gwas).abs()
@endrebak
endrebak / nrwas.py
Last active December 11, 2019 12:01
#!/usr/bin/env python
import os
import sys
import pandas as pd
import numpy as np
import pyranges as pr
import argparse
#!/usr/bin/env python2.7
### On SNPsnap
#!/bin/env python
#source /opt/rh/python27/enable
#---> gives Python 2.7.5 (where as Broad Dotkit python is Python 2.7.1)
import os
import sys
import collections
# Author: denis.engemann@gmail.com
# License: simplified BSD (3 clause)
# Note: code is based on scipy.stats.pearsonr
def ss(a, axis):
return np.sum(a * a, axis=axis)
def compute_corr(x, y):
x = np.asarray(x)
y = np.asarray(y)
# ctypedef struct ailist_t:
# int64_t nr, mr # Number of regions
# interval_t *interval_list # Regions data
# uint32_t first, last # Record range of intervals
# int nc, lenC[10], idxC[10]
# uint32_t *maxE
# ...
# uint32_t binary_search(interval_t* As, uint32_t idxS, uint32_t idxE, uint32_t qe) nogil
# wget http://big.databio.org/example_data/AIList/AIListTestData.tgz
nrows = 1.5e6
from ncls import NCLS
from ailist import AIList
import numpy as np
import pandas as pd
# Works on very large datasets.
import pandas as pd
try:
import mkl
mkl.set_num_threads(1)
except:
pass
{
"global": {
"check_for_updates_on_startup": true,
"show_in_menu_bar": true,
"show_profile_name_in_menu_bar": false
},
"profiles": [
{