Created
September 16, 2023 15:16
-
-
Save sashagusev/f12efb085432cd21935722723d446a0f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
args = as.numeric(commandArgs(trailingOnly=TRUE)) | |
# N,M,var_direct,var_indirect | |
# code for implementation of REML: | |
# https://github.com/sashagusev/SKK-REML-sim/blob/master/func_reml.R | |
source("reml_func.R") | |
# Num individuals | |
N = 1e3 | |
N = args[1] | |
# Num markers | |
M = 100 | |
M = args[2] | |
# Minor allele frequency | |
maf = 0.5 | |
var_direct = 0.23 | |
var_direct = args[3] | |
var_indirect = 0.1 | |
var_indirect = args[4] | |
# Assortative Mating | |
AM_cor = 0.6 | |
AM_cor = args[5] | |
for ( s in 1:10 ) { | |
params = c(s , N,M,AM_cor,var_direct,var_indirect) | |
# --- | |
# sample effect sizes for direct effect (follow Young et al. and just sample from std norm) | |
betas = rnorm(M,0,1) | |
# sample 0/1 haplotypes | |
mate1_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
mate1_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
# sample a random mate | |
mate2_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
mate2_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
# compute genetic value for direct phenotype | |
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% betas | |
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% betas | |
# compute genetic value for indirect phenotype | |
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% betas | |
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% betas | |
pheno_mate1 = sqrt(var_direct) * scale(gv_mate1_direct) + sqrt(var_indirect) * scale(gv_mate1_indirect) | |
pheno_mate2 = sqrt(var_direct) * scale(gv_mate2_direct) + sqrt(var_indirect) * scale(gv_mate2_indirect) | |
# compute residual variance in the un-assorted population | |
resid_var = 1-var(c(pheno_mate1,pheno_mate2)) | |
# add residual variance to sum up to 1 | |
pheno_mate1 = pheno_mate1 + rnorm(N,0,sqrt(resid_var)) | |
pheno_mate2 = pheno_mate2 + rnorm(N,0,sqrt(resid_var)) | |
## --- Assortative Mating until equilibrium | |
for ( gens in 1:20 ) { | |
# compute genetic value for direct phenotype | |
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% betas | |
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% betas | |
# compute genetic value for indirect phenotype | |
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% betas | |
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% betas | |
pheno_mate1 = sqrt(var_direct) * scale(gv_mate1_direct) + sqrt(var_indirect) * scale(gv_mate1_indirect) + rnorm(N,0,sqrt(resid_var)) | |
pheno_mate2 = sqrt(var_direct) * scale(gv_mate2_direct) + sqrt(var_indirect) * scale(gv_mate2_indirect) + rnorm(N,0,sqrt(resid_var)) | |
# rank the phenotypes and find similar ranks | |
if ( AM_cor != 0 ) { | |
ord1 = order(pheno_mate1) | |
ord2 = order( AM_cor * scale(pheno_mate2) + rnorm(N,0,sqrt(1-AM_cor^2))) | |
} else { | |
ord1 = sample(1:N) | |
ord2 = sample(1:N) | |
} | |
mate1_hap_a = mate1_hap_a[ord1,] | |
mate1_hap_b = mate1_hap_b[ord1,] | |
mate2_hap_a = mate2_hap_a[ord2,] | |
mate2_hap_b = mate2_hap_b[ord2,] | |
pheno_mate1 = pheno_mate1[ord1] | |
pheno_mate2 = pheno_mate2[ord2] | |
# --- mate and generate kids | |
child1_hap_a = mate1_hap_a | |
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child1_hap_a[ !sampled_vars ] = mate1_hap_b[ !sampled_vars ] | |
child1_hap_b = mate2_hap_a | |
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child1_hap_b[ !sampled_vars ] = mate2_hap_b[ !sampled_vars ] | |
# --- find new mate and generate kids | |
if ( AM_cor != 0 ) { | |
ord1 = order(pheno_mate1) | |
ord2 = order( AM_cor * scale(pheno_mate2) + rnorm(N,0,sqrt(1-AM_cor^2))) | |
} else { | |
ord1 = sample(1:N) | |
ord2 = sample(1:N) | |
} | |
mate1_hap_a = mate1_hap_a[ord1,] | |
mate1_hap_b = mate1_hap_b[ord1,] | |
mate2_hap_a = mate2_hap_a[ord2,] | |
mate2_hap_b = mate2_hap_b[ord2,] | |
pheno_mate1 = pheno_mate1[ord1] | |
pheno_mate2 = pheno_mate2[ord2] | |
child2_hap_a = mate1_hap_a | |
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child2_hap_a[ !sampled_vars ] = mate1_hap_b[ !sampled_vars ] | |
child2_hap_b = mate2_hap_a | |
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child2_hap_b[ !sampled_vars ] = mate2_hap_b[ !sampled_vars ] | |
# update to the next generation | |
mate1_hap_a = child1_hap_a | |
mate1_hap_b = child1_hap_b | |
mate2_hap_a = child2_hap_a | |
mate2_hap_a = child2_hap_b | |
} | |
### ---- | |
# --- generate one child per family | |
child_hap_a = mate1_hap_a | |
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ] | |
child_hap_b = mate2_hap_a | |
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ] | |
# compute genetic value | |
gv_child_direct = scale( child_hap_a + child_hap_b ) %*% betas | |
gv_child_indirect = scale( mate1_hap_a + mate1_hap_b + mate2_hap_a + mate2_hap_b ) %*% betas | |
pheno_child = sqrt(var_direct) * scale(gv_child_direct) + sqrt(var_indirect) * scale(gv_child_indirect) | |
# add noise to make phenotype have variance 1 | |
pheno_child = pheno_child + rnorm(N,0,sqrt(1-var(pheno_child))) | |
# --- generate a sibling | |
sib_hap_a = mate1_hap_a | |
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
sib_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ] | |
sib_hap_b = mate2_hap_a | |
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
sib_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ] | |
# compute genetic value | |
gv_sib_direct = scale( sib_hap_a + sib_hap_b ) %*% betas | |
gv_sib_indirect = scale( mate1_hap_a + mate1_hap_b + mate2_hap_a + mate2_hap_b ) %*% betas | |
pheno_sib = sqrt(var_direct) * scale(gv_sib_direct) + sqrt(var_indirect) * scale(gv_sib_indirect) | |
# add noise to make phenotype have variance 1 | |
pheno_sib = pheno_sib + rnorm(N,0,sqrt(1-var(pheno_sib))) | |
# --- Estimate: | |
# estimate heritability from kids using HE regression | |
x_child = scale( child_hap_a + child_hap_b ) | |
k_child = x_child %*% t(x_child) / ncol(x_child) | |
yprod_child = scale( pheno_child ) %*% t( scale( pheno_child ) ) | |
x_parents = sqrt(2) * scale( mate1_hap_a + mate1_hap_b + mate2_hap_a + mate2_hap_b ) | |
k_par = x_parents %*% t(x_parents) / (2*ncol(x_parents)) | |
k_opar = ( x_child %*% t(x_parents) + x_parents %*% t(x_child) ) / (2*ncol(x_parents)) | |
K = list() | |
K[[1]] = k_child | |
K[[2]] = k_par | |
K[[3]] = k_opar | |
# REML RDR | |
try({ | |
reml = aiML( A = K , y = scale(pheno_child) , Var = c(0.25,0.25,0.25,0.25) , verbose=FALSE ) | |
est = reml$h2 | |
cat( params , "RDR_REML_direct" , est , '\n' , sep='\t') | |
},silent=F) | |
# HE total | |
kvals = k_child[ lower.tri(k_child) ] | |
yvals = yprod_child[ lower.tri(yprod_child) ] | |
# HE RDR | |
kvals_par = k_par[ lower.tri(k_par) ] | |
kvals_opar = k_opar[ lower.tri(k_opar) ] | |
est = lm( yvals ~ kvals + kvals_par + kvals_opar )$coef[2] | |
cat( params , "RDR_HE_direct" , est , '\n' , sep='\t') | |
# HE total | |
est = lm( yvals ~ kvals )$coef[2] | |
cat( params , "HE_total" , est , '\n' , sep='\t') | |
# PGI estimates (assuming true genetic values are known) | |
est = summary(lm(pheno_child ~ scale(gv_child_direct) + scale(gv_child_indirect) ))$coef[2,1]^2 | |
cat( params , "PGI_direct" , est , '\n' , sep='\t') | |
est = summary(lm(pheno_child ~ gv_child_direct))$adj.r.sq | |
cat( params , "PGI_total" , est , '\n' , sep='\t') | |
# sib regression using SNPs (note scale by -1 rather than -0.5) | |
#x_child = scale( child_hap_a + child_hap_b ) | |
#x_sib = scale( sib_hap_a + sib_hap_b ) | |
#k_sib = diag( ( x_child %*% t( x_sib )) / ncol(x_child) ) | |
#y_diff = ( scale(pheno_child) - scale(pheno_sib) )^2 | |
#est_hsq_direct_sibr = -1 * lm( y_diff ~ k_sib )$coef[2] | |
# sib regression using IBD | |
k_sib = apply( (child_hap_a == sib_hap_a) + (child_hap_b == sib_hap_b) , 1 , mean) | |
y_diff = ( scale(pheno_child) - scale(pheno_sib) )^2 | |
est = -0.5 * lm( y_diff ~ k_sib )$coef[2] | |
cat( params , "SIBREG" , est , '\n' , sep='\t') | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment