endrebak.ada endrebak

## fisher_exact_gene_biotypes.py
rule gene_biotypes:
    input:
        regions = gene_biotype_infiles,
        annotation = "{prefix}/data/{genome}/annotation.tsv"
    output:
        "{prefix}/data/{genome}/{hmm_or_anatomy}_regions/{cutoff}/gene_biotype_counts.tsv"
    run:
        df = pd.read_table(input.regions, header=0)
        df2 = pd.read_csv(input.annotation, header=0, sep=",")

## getitem_rle.pyx
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef _getitem(const long [::1] runs, const double [::1] values, const long [::1] run_cumsum, start, end):

    cdef:
        int i = 0
        int arr_length = 100
        int nfound = 0
        # int foundsum = 0

## compute_kde.py
rule compute_kde:
    input:
        "{prefix}/data/{genome}/HMM_states/{statistic}/correlations_all.gz"
    output:
        "{prefix}/data/{genome}/HMM_states/{statistic}/cutoff.txt"
    run:
        f = input[0]
        o = output[0]

        from scipy.stats import gaussian_kde

## test.txt
39967768 39967768 0.249157622127387
39967768 39967824 0.022333456325143934
39967768 39967950 -0.00434938492491698
39967768 39968210 -0.01366949547831051
39967768 39968231 -0.019882902513906196
39967768 39968596 -0.007456088442714824
39967768 39968633 0.00348960255080374
39967768 39968753 0.0006979205101607479
39967768 39969173 0.0006979205101607479
39967768 39969251 0.18011222422567374

## flat_file_const.py
import copy

tmp_path = '/nethome/jkpickrell/1kG_data/covariance_matrix/'
# tmp_path_EUR = tmp_path
# tmp_path_ASN = '/nethome/jkpickrell/1kG_data/covariance_matrix/ASN/'
# tmp_path_AFR = '/nethome/jkpickrell/1kG_data/covariance_matrix/AFR/'

def return_conf(path):
	return {
		'partition_root' : path,

## flat_file.py
# from . import flat_file_consts as cnst

import sys
import csv
import gzip
import time
import math
import bisect

def get_final_partitions(input_config, name, snp_first, snp_last):

## matrix_to_vector.py
#!/usr/bin/env python3

import ldetect.baselib.flat_file_consts as cnst
import ldetect.baselib.flat_file as flat
import ldetect.baselib.binary_search as binsrch

import sys
import os.path
import math
import bisect

## calc_diag_lean.py

def calc_diag_lean(self, out_fname, out_delim, dynamic_delete=True):

    if dynamic_delete == False:
        raise Exception('Error: Conversion has been run in lean mode, but with dynamically=False.')

    self.dynamic_delete = dynamic_delete

    flat.print_log_msg('Start')

## calc_covariance.py
#!/usr/bin/env python3

import sys, os, gzip, math
import numpy as np

# calculate Wen/Stephens shrinkage LD estimate
gmapfile = gzip.open(sys.argv[1]) # genetic map
indfile = open(sys.argv[2]) #list of individuals
# NE = 11418.0
NE = float(sys.argv[3])

## interpolate_maps.py

index1 = 0
index2 = 0
while index1 < len(posin):
    pos = posin[index1]
    rs = rsin[index1]
    if pos == mappos[index2]:
        #the 1000 Genomes site was genotyped as part of the map
        results.append((rs, pos, mapgpos[index2]))
        index1 = index1 + 1
	rule gene_biotypes:
	input:
	regions = gene_biotype_infiles,
	annotation = "{prefix}/data/{genome}/annotation.tsv"
	output:
	"{prefix}/data/{genome}/{hmm_or_anatomy}_regions/{cutoff}/gene_biotype_counts.tsv"
	run:
	df = pd.read_table(input.regions, header=0)
	df2 = pd.read_csv(input.annotation, header=0, sep=",")
	@cython.boundscheck(False)
	@cython.wraparound(False)
	@cython.initializedcheck(False)
	cpdef _getitem(const long [::1] runs, const double [::1] values, const long [::1] run_cumsum, start, end):

	cdef:
	int i = 0
	int arr_length = 100
	int nfound = 0
	# int foundsum = 0
	rule compute_kde:
	input:
	"{prefix}/data/{genome}/HMM_states/{statistic}/correlations_all.gz"
	output:
	"{prefix}/data/{genome}/HMM_states/{statistic}/cutoff.txt"
	run:
	f = input[0]
	o = output[0]

	from scipy.stats import gaussian_kde
	39967768 39967768 0.249157622127387
	39967768 39967824 0.022333456325143934
	39967768 39967950 -0.00434938492491698
	39967768 39968210 -0.01366949547831051
	39967768 39968231 -0.019882902513906196
	39967768 39968596 -0.007456088442714824
	39967768 39968633 0.00348960255080374
	39967768 39968753 0.0006979205101607479
	39967768 39969173 0.0006979205101607479
	39967768 39969251 0.18011222422567374
	import copy

	tmp_path = '/nethome/jkpickrell/1kG_data/covariance_matrix/'
	# tmp_path_EUR = tmp_path
	# tmp_path_ASN = '/nethome/jkpickrell/1kG_data/covariance_matrix/ASN/'
	# tmp_path_AFR = '/nethome/jkpickrell/1kG_data/covariance_matrix/AFR/'

	def return_conf(path):
	return {
	'partition_root' : path,
	# from . import flat_file_consts as cnst

	import sys
	import csv
	import gzip
	import time
	import math
	import bisect

	def get_final_partitions(input_config, name, snp_first, snp_last):
	#!/usr/bin/env python3

	import ldetect.baselib.flat_file_consts as cnst
	import ldetect.baselib.flat_file as flat
	import ldetect.baselib.binary_search as binsrch

	import sys
	import os.path
	import math
	import bisect

	def calc_diag_lean(self, out_fname, out_delim, dynamic_delete=True):

	if dynamic_delete == False:
	raise Exception('Error: Conversion has been run in lean mode, but with dynamically=False.')

	self.dynamic_delete = dynamic_delete

	flat.print_log_msg('Start')
	#!/usr/bin/env python3

	import sys, os, gzip, math
	import numpy as np

	# calculate Wen/Stephens shrinkage LD estimate
	gmapfile = gzip.open(sys.argv[1]) # genetic map
	indfile = open(sys.argv[2]) #list of individuals
	# NE = 11418.0
	NE = float(sys.argv[3])

	index1 = 0
	index2 = 0
	while index1 < len(posin):
	pos = posin[index1]
	rs = rsin[index1]
	if pos == mappos[index2]:
	#the 1000 Genomes site was genotyped as part of the map
	results.append((rs, pos, mapgpos[index2]))
	index1 = index1 + 1