Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created September 16, 2023 15:16
Show Gist options
  • Save sashagusev/f12efb085432cd21935722723d446a0f to your computer and use it in GitHub Desktop.
Save sashagusev/f12efb085432cd21935722723d446a0f to your computer and use it in GitHub Desktop.
args = as.numeric(commandArgs(trailingOnly=TRUE))
# N,M,var_direct,var_indirect
# code for implementation of REML:
# https://github.com/sashagusev/SKK-REML-sim/blob/master/func_reml.R
source("reml_func.R")
# Num individuals
N = 1e3
N = args[1]
# Num markers
M = 100
M = args[2]
# Minor allele frequency
maf = 0.5
var_direct = 0.23
var_direct = args[3]
var_indirect = 0.1
var_indirect = args[4]
# Assortative Mating
AM_cor = 0.6
AM_cor = args[5]
for ( s in 1:10 ) {
params = c(s , N,M,AM_cor,var_direct,var_indirect)
# ---
# sample effect sizes for direct effect (follow Young et al. and just sample from std norm)
betas = rnorm(M,0,1)
# sample 0/1 haplotypes
mate1_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate1_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
# sample a random mate
mate2_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate2_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
# compute genetic value for direct phenotype
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% betas
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% betas
# compute genetic value for indirect phenotype
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% betas
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% betas
pheno_mate1 = sqrt(var_direct) * scale(gv_mate1_direct) + sqrt(var_indirect) * scale(gv_mate1_indirect)
pheno_mate2 = sqrt(var_direct) * scale(gv_mate2_direct) + sqrt(var_indirect) * scale(gv_mate2_indirect)
# compute residual variance in the un-assorted population
resid_var = 1-var(c(pheno_mate1,pheno_mate2))
# add residual variance to sum up to 1
pheno_mate1 = pheno_mate1 + rnorm(N,0,sqrt(resid_var))
pheno_mate2 = pheno_mate2 + rnorm(N,0,sqrt(resid_var))
## --- Assortative Mating until equilibrium
for ( gens in 1:20 ) {
# compute genetic value for direct phenotype
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% betas
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% betas
# compute genetic value for indirect phenotype
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% betas
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% betas
pheno_mate1 = sqrt(var_direct) * scale(gv_mate1_direct) + sqrt(var_indirect) * scale(gv_mate1_indirect) + rnorm(N,0,sqrt(resid_var))
pheno_mate2 = sqrt(var_direct) * scale(gv_mate2_direct) + sqrt(var_indirect) * scale(gv_mate2_indirect) + rnorm(N,0,sqrt(resid_var))
# rank the phenotypes and find similar ranks
if ( AM_cor != 0 ) {
ord1 = order(pheno_mate1)
ord2 = order( AM_cor * scale(pheno_mate2) + rnorm(N,0,sqrt(1-AM_cor^2)))
} else {
ord1 = sample(1:N)
ord2 = sample(1:N)
}
mate1_hap_a = mate1_hap_a[ord1,]
mate1_hap_b = mate1_hap_b[ord1,]
mate2_hap_a = mate2_hap_a[ord2,]
mate2_hap_b = mate2_hap_b[ord2,]
pheno_mate1 = pheno_mate1[ord1]
pheno_mate2 = pheno_mate2[ord2]
# --- mate and generate kids
child1_hap_a = mate1_hap_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child1_hap_a[ !sampled_vars ] = mate1_hap_b[ !sampled_vars ]
child1_hap_b = mate2_hap_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child1_hap_b[ !sampled_vars ] = mate2_hap_b[ !sampled_vars ]
# --- find new mate and generate kids
if ( AM_cor != 0 ) {
ord1 = order(pheno_mate1)
ord2 = order( AM_cor * scale(pheno_mate2) + rnorm(N,0,sqrt(1-AM_cor^2)))
} else {
ord1 = sample(1:N)
ord2 = sample(1:N)
}
mate1_hap_a = mate1_hap_a[ord1,]
mate1_hap_b = mate1_hap_b[ord1,]
mate2_hap_a = mate2_hap_a[ord2,]
mate2_hap_b = mate2_hap_b[ord2,]
pheno_mate1 = pheno_mate1[ord1]
pheno_mate2 = pheno_mate2[ord2]
child2_hap_a = mate1_hap_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child2_hap_a[ !sampled_vars ] = mate1_hap_b[ !sampled_vars ]
child2_hap_b = mate2_hap_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child2_hap_b[ !sampled_vars ] = mate2_hap_b[ !sampled_vars ]
# update to the next generation
mate1_hap_a = child1_hap_a
mate1_hap_b = child1_hap_b
mate2_hap_a = child2_hap_a
mate2_hap_a = child2_hap_b
}
### ----
# --- generate one child per family
child_hap_a = mate1_hap_a
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ]
child_hap_b = mate2_hap_a
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ]
# compute genetic value
gv_child_direct = scale( child_hap_a + child_hap_b ) %*% betas
gv_child_indirect = scale( mate1_hap_a + mate1_hap_b + mate2_hap_a + mate2_hap_b ) %*% betas
pheno_child = sqrt(var_direct) * scale(gv_child_direct) + sqrt(var_indirect) * scale(gv_child_indirect)
# add noise to make phenotype have variance 1
pheno_child = pheno_child + rnorm(N,0,sqrt(1-var(pheno_child)))
# --- generate a sibling
sib_hap_a = mate1_hap_a
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
sib_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ]
sib_hap_b = mate2_hap_a
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
sib_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ]
# compute genetic value
gv_sib_direct = scale( sib_hap_a + sib_hap_b ) %*% betas
gv_sib_indirect = scale( mate1_hap_a + mate1_hap_b + mate2_hap_a + mate2_hap_b ) %*% betas
pheno_sib = sqrt(var_direct) * scale(gv_sib_direct) + sqrt(var_indirect) * scale(gv_sib_indirect)
# add noise to make phenotype have variance 1
pheno_sib = pheno_sib + rnorm(N,0,sqrt(1-var(pheno_sib)))
# --- Estimate:
# estimate heritability from kids using HE regression
x_child = scale( child_hap_a + child_hap_b )
k_child = x_child %*% t(x_child) / ncol(x_child)
yprod_child = scale( pheno_child ) %*% t( scale( pheno_child ) )
x_parents = sqrt(2) * scale( mate1_hap_a + mate1_hap_b + mate2_hap_a + mate2_hap_b )
k_par = x_parents %*% t(x_parents) / (2*ncol(x_parents))
k_opar = ( x_child %*% t(x_parents) + x_parents %*% t(x_child) ) / (2*ncol(x_parents))
K = list()
K[[1]] = k_child
K[[2]] = k_par
K[[3]] = k_opar
# REML RDR
try({
reml = aiML( A = K , y = scale(pheno_child) , Var = c(0.25,0.25,0.25,0.25) , verbose=FALSE )
est = reml$h2
cat( params , "RDR_REML_direct" , est , '\n' , sep='\t')
},silent=F)
# HE total
kvals = k_child[ lower.tri(k_child) ]
yvals = yprod_child[ lower.tri(yprod_child) ]
# HE RDR
kvals_par = k_par[ lower.tri(k_par) ]
kvals_opar = k_opar[ lower.tri(k_opar) ]
est = lm( yvals ~ kvals + kvals_par + kvals_opar )$coef[2]
cat( params , "RDR_HE_direct" , est , '\n' , sep='\t')
# HE total
est = lm( yvals ~ kvals )$coef[2]
cat( params , "HE_total" , est , '\n' , sep='\t')
# PGI estimates (assuming true genetic values are known)
est = summary(lm(pheno_child ~ scale(gv_child_direct) + scale(gv_child_indirect) ))$coef[2,1]^2
cat( params , "PGI_direct" , est , '\n' , sep='\t')
est = summary(lm(pheno_child ~ gv_child_direct))$adj.r.sq
cat( params , "PGI_total" , est , '\n' , sep='\t')
# sib regression using SNPs (note scale by -1 rather than -0.5)
#x_child = scale( child_hap_a + child_hap_b )
#x_sib = scale( sib_hap_a + sib_hap_b )
#k_sib = diag( ( x_child %*% t( x_sib )) / ncol(x_child) )
#y_diff = ( scale(pheno_child) - scale(pheno_sib) )^2
#est_hsq_direct_sibr = -1 * lm( y_diff ~ k_sib )$coef[2]
# sib regression using IBD
k_sib = apply( (child_hap_a == sib_hap_a) + (child_hap_b == sib_hap_b) , 1 , mean)
y_diff = ( scale(pheno_child) - scale(pheno_sib) )^2
est = -0.5 * lm( y_diff ~ k_sib )$coef[2]
cat( params , "SIBREG" , est , '\n' , sep='\t')
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment