Created
May 11, 2023 04:14
-
-
Save sashagusev/a6c3725d02aab599b6e4097a839f53fb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Num SNPS | |
M = 50 | |
# Num POP | |
N_POP = 10000 | |
# Num GWAS | |
N_GWAS = 5000 | |
# Num Trios | |
N_trio = 2000 | |
# GWAS heritability | |
hsq_GWAS = 0.3 | |
# Indirect effect | |
hsq_indirect = 0.1 | |
# Indirect variance explained (scaled up by the total/GWAS heritability) | |
var_indirect = hsq_indirect / hsq_GWAS | |
seeds = 20 | |
par(mfrow=c(2,2)) | |
# for plot titles: | |
MOD.names = list("base"="Standard","base_ascertain"="Ascertain 50% of population at the tails","direct_by_indirect2"="20% of variance explained\nby Direct x Indirect^2 interaction","direct_by_indirect3"="20% of variance explained\nby Direct^2 x Indirect interaction") | |
MOD.clr = list("base"="#000000","base_ascertain"="#66c2a5","direct_by_indirect2"="#fc8d62","direct_by_indirect3"="#8da0cb") | |
for ( MOD in c("base","base_ascertain","direct_by_indirect2","direct_by_indirect3") ) { | |
plot( 0 , 0 , type="n" , xlim=c(0,0.5) , ylim=c(0,0.5) , xlab="True Direct h^2", ylab="Estimated Direct h^2", las=1 , main=MOD.names[[MOD]] ) | |
abline(0,1,lty=3,col="gray") | |
for ( hsq_direct in seq(0.02,0.2,by=0.02) ) { | |
all_est = matrix(NA,nrow=seeds,ncol=4) | |
for ( s in 1:seeds ) { | |
# generate the full population with MAF=10% | |
X = matrix( rbinom(N_POP*M,2,0.1) , nrow=N_POP , ncol=M ) | |
# randomly sample the GWAS | |
i_GWAS = sample(N_POP,N_GWAS) | |
X_GWAS = X[i_GWAS,] | |
# randomly sample some parents | |
i_mat = sample(N_POP,N_trio) | |
i_pat = sample(N_POP,N_trio) | |
X_mat = X[i_mat,] | |
X_pat = X[i_pat,] | |
X_child = matrix(NA,nrow=nrow(X_mat),ncol=ncol(X_mat)) | |
# transmit the homozygous alleles | |
X_child[ X_mat == 0 & X_pat == 0 ] = 0 | |
X_child[ X_mat == 2 & X_pat == 2 ] = 2 | |
X_child[ (X_mat == 2 & X_pat == 0) | (X_mat == 0 & X_pat == 2) ] = 1 | |
# sample the heterozygous alleles | |
keep = (X_mat == 0 & X_pat == 1) | (X_mat == 1 & X_pat == 0) | |
X_child[ keep ] = 0 + rbinom( sum(keep) , 1 , 0.5 ) | |
keep = (X_mat == 2 & X_pat == 1) | (X_mat == 1 & X_pat == 2) | |
X_child[ keep ] = 1 + rbinom( sum(keep) , 1 , 0.5 ) | |
keep = (X_mat == 1 & X_pat == 1) | |
X_child[ keep ] = rbinom( sum(keep) , 1 , 0.5 ) + rbinom( sum(keep) , 1 , 0.5 ) | |
# sample effect sizes | |
# direct effects | |
b_direct = c( rep(0,M/2) , rnorm(M/2) ) | |
# indirect effects | |
b_indirect = b_direct | |
# if we want direct and indirect effects to be different: | |
#b_indirect = c( rnorm(M/2) , rep(0,M/2) ) | |
# in the GWAS, generate phenotypes as a mix of direct + indirect | |
b_mix = (b_direct + b_indirect) | |
gv_GWAS = sqrt(hsq_GWAS) * scale(scale(X_GWAS) %*% b_mix) | |
y_GWAS = gv_GWAS + rnorm(N_GWAS,0,sqrt(1-hsq_GWAS)) | |
# do the same in the parents | |
g_mat = scale(X_mat) %*% b_mix | |
g_pat = scale(X_pat) %*% b_mix | |
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
# --- baseline: generate the kids as a mix of direct SNP effects and indirect parental effects | |
if ( MOD == "base" | MOD == "base_ascertain" ) { | |
g_child = scale(X_child) %*% b_direct | |
y_child = sqrt(hsq_direct) * scale(g_child) + | |
sqrt(var_indirect) * scale(y_mat+y_pat) + rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect)) | |
} | |
# --- | |
# --- generate with direct*indirect interactions | |
if ( MOD == "direct_by_indirect" ) { | |
var_interaction = 0.2 | |
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
g_child = scale(X_child) %*% b_direct | |
y_child = sqrt(hsq_direct) * scale(g_child) + | |
sqrt(var_indirect) * scale(y_mat+y_pat) + sqrt(var_interaction) * scale( scale(y_mat+y_pat) * scale(g_child) ) + | |
rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect-var_interaction)) | |
} | |
# --- | |
# --- generate with direct*indirect interactions | |
if ( MOD == "direct_by_indirect2" ) { | |
var_interaction = 0.2 | |
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
g_child = scale(X_child) %*% b_direct | |
y_child = sqrt(hsq_direct) * scale(g_child) + | |
sqrt(var_indirect) * scale(y_mat+y_pat) + sqrt(var_interaction) * scale( scale(g_child) * scale(y_mat+y_pat)^2 ) + | |
rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect-var_interaction)) | |
} | |
# --- | |
# --- generate with direct*indirect interactions | |
if ( MOD == "direct_by_indirect3" ) { | |
var_interaction = 0.2 | |
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS)) | |
g_child = scale(X_child) %*% b_direct | |
y_child = sqrt(hsq_direct) * scale(g_child) + | |
sqrt(var_indirect) * scale(y_mat+y_pat) + sqrt(var_interaction) * scale( scale(g_child)^2 * scale(y_mat+y_pat) ) + | |
rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect-var_interaction)) | |
} | |
# --- | |
# compute the PGIs | |
#bhat_GWAS = t(t(scale(y_GWAS)) %*% scale(X_GWAS) / (N_GWAS - 1)) | |
# estimate the GWAS betas without any environmental noise | |
bhat_GWAS = t(t(scale(gv_GWAS)) %*% scale(X_GWAS) / (N_GWAS - 1)) | |
PGI_mat = scale( scale(X_mat) %*% bhat_GWAS ) | |
PGI_pat = scale( scale(X_pat) %*% bhat_GWAS ) | |
PGI_child = scale( scale(X_child) %*% bhat_GWAS ) | |
# --- ascertainment | |
keep = rep(TRUE,length(y_child)) | |
if ( MOD == "base_ascertain" ) { | |
keep = scale(y_child) < -0.5 | scale(y_child) > 0.5 | |
} | |
# estimate the direct and indirect effects | |
est_total = lm( y_child[keep] ~ PGI_child[keep] )$coef[-1] | |
est_partitioned = lm( y_child[keep] ~ PGI_child[keep] + PGI_mat[keep] + PGI_pat[keep] )$coef[-1] | |
all_est[ s , ] = c(est_total,est_partitioned) | |
} | |
cat( "true direct:" , hsq_direct , '\n' ) | |
cat( "est direct:" , mean(all_est[,2])^2 , '\n' ) | |
cat( "est p1:" , mean(all_est[,3])^2 , '\n' ) | |
cat( "est p2:" , mean(all_est[,4])^2 , '\n' ) | |
arrows( hsq_direct , mean(all_est[,2])^2 - 1.96*sd(all_est[,2]^2)/sqrt(nrow(all_est)) , hsq_direct , mean(all_est[,2])^2 + 1.96*sd(all_est[,2]^2)/sqrt(nrow(all_est)) , len=0 , lwd=4 , col="gray") | |
points( hsq_direct , mean(all_est[,2])^2 , pch=19 , col=MOD.clr[[MOD]]) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment