Skip to content

Instantly share code, notes, and snippets.

View endrebak's full-sized avatar
🧬
Trying to write a genomic library in Rust

endrebak.ada endrebak

🧬
Trying to write a genomic library in Rust
View GitHub Profile
rule gene_biotypes:
input:
regions = gene_biotype_infiles,
annotation = "{prefix}/data/{genome}/annotation.tsv"
output:
"{prefix}/data/{genome}/{hmm_or_anatomy}_regions/{cutoff}/gene_biotype_counts.tsv"
run:
df = pd.read_table(input.regions, header=0)
df2 = pd.read_csv(input.annotation, header=0, sep=",")
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef _getitem(const long [::1] runs, const double [::1] values, const long [::1] run_cumsum, start, end):
cdef:
int i = 0
int arr_length = 100
int nfound = 0
# int foundsum = 0
rule compute_kde:
input:
"{prefix}/data/{genome}/HMM_states/{statistic}/correlations_all.gz"
output:
"{prefix}/data/{genome}/HMM_states/{statistic}/cutoff.txt"
run:
f = input[0]
o = output[0]
from scipy.stats import gaussian_kde
39967768 39967768 0.249157622127387
39967768 39967824 0.022333456325143934
39967768 39967950 -0.00434938492491698
39967768 39968210 -0.01366949547831051
39967768 39968231 -0.019882902513906196
39967768 39968596 -0.007456088442714824
39967768 39968633 0.00348960255080374
39967768 39968753 0.0006979205101607479
39967768 39969173 0.0006979205101607479
39967768 39969251 0.18011222422567374
import copy
tmp_path = '/nethome/jkpickrell/1kG_data/covariance_matrix/'
# tmp_path_EUR = tmp_path
# tmp_path_ASN = '/nethome/jkpickrell/1kG_data/covariance_matrix/ASN/'
# tmp_path_AFR = '/nethome/jkpickrell/1kG_data/covariance_matrix/AFR/'
def return_conf(path):
return {
'partition_root' : path,
# from . import flat_file_consts as cnst
import sys
import csv
import gzip
import time
import math
import bisect
def get_final_partitions(input_config, name, snp_first, snp_last):
#!/usr/bin/env python3
import ldetect.baselib.flat_file_consts as cnst
import ldetect.baselib.flat_file as flat
import ldetect.baselib.binary_search as binsrch
import sys
import os.path
import math
import bisect
def calc_diag_lean(self, out_fname, out_delim, dynamic_delete=True):
if dynamic_delete == False:
raise Exception('Error: Conversion has been run in lean mode, but with dynamically=False.')
self.dynamic_delete = dynamic_delete
flat.print_log_msg('Start')
#!/usr/bin/env python3
import sys, os, gzip, math
import numpy as np
# calculate Wen/Stephens shrinkage LD estimate
gmapfile = gzip.open(sys.argv[1]) # genetic map
indfile = open(sys.argv[2]) #list of individuals
# NE = 11418.0
NE = float(sys.argv[3])
index1 = 0
index2 = 0
while index1 < len(posin):
pos = posin[index1]
rs = rsin[index1]
if pos == mappos[index2]:
#the 1000 Genomes site was genotyped as part of the map
results.append((rs, pos, mapgpos[index2]))
index1 = index1 + 1