Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created May 11, 2023 04:14
Show Gist options
  • Save sashagusev/a6c3725d02aab599b6e4097a839f53fb to your computer and use it in GitHub Desktop.
Save sashagusev/a6c3725d02aab599b6e4097a839f53fb to your computer and use it in GitHub Desktop.
# Num SNPS
M = 50
# Num POP
N_POP = 10000
# Num GWAS
N_GWAS = 5000
# Num Trios
N_trio = 2000
# GWAS heritability
hsq_GWAS = 0.3
# Indirect effect
hsq_indirect = 0.1
# Indirect variance explained (scaled up by the total/GWAS heritability)
var_indirect = hsq_indirect / hsq_GWAS
seeds = 20
par(mfrow=c(2,2))
# for plot titles:
MOD.names = list("base"="Standard","base_ascertain"="Ascertain 50% of population at the tails","direct_by_indirect2"="20% of variance explained\nby Direct x Indirect^2 interaction","direct_by_indirect3"="20% of variance explained\nby Direct^2 x Indirect interaction")
MOD.clr = list("base"="#000000","base_ascertain"="#66c2a5","direct_by_indirect2"="#fc8d62","direct_by_indirect3"="#8da0cb")
for ( MOD in c("base","base_ascertain","direct_by_indirect2","direct_by_indirect3") ) {
plot( 0 , 0 , type="n" , xlim=c(0,0.5) , ylim=c(0,0.5) , xlab="True Direct h^2", ylab="Estimated Direct h^2", las=1 , main=MOD.names[[MOD]] )
abline(0,1,lty=3,col="gray")
for ( hsq_direct in seq(0.02,0.2,by=0.02) ) {
all_est = matrix(NA,nrow=seeds,ncol=4)
for ( s in 1:seeds ) {
# generate the full population with MAF=10%
X = matrix( rbinom(N_POP*M,2,0.1) , nrow=N_POP , ncol=M )
# randomly sample the GWAS
i_GWAS = sample(N_POP,N_GWAS)
X_GWAS = X[i_GWAS,]
# randomly sample some parents
i_mat = sample(N_POP,N_trio)
i_pat = sample(N_POP,N_trio)
X_mat = X[i_mat,]
X_pat = X[i_pat,]
X_child = matrix(NA,nrow=nrow(X_mat),ncol=ncol(X_mat))
# transmit the homozygous alleles
X_child[ X_mat == 0 & X_pat == 0 ] = 0
X_child[ X_mat == 2 & X_pat == 2 ] = 2
X_child[ (X_mat == 2 & X_pat == 0) | (X_mat == 0 & X_pat == 2) ] = 1
# sample the heterozygous alleles
keep = (X_mat == 0 & X_pat == 1) | (X_mat == 1 & X_pat == 0)
X_child[ keep ] = 0 + rbinom( sum(keep) , 1 , 0.5 )
keep = (X_mat == 2 & X_pat == 1) | (X_mat == 1 & X_pat == 2)
X_child[ keep ] = 1 + rbinom( sum(keep) , 1 , 0.5 )
keep = (X_mat == 1 & X_pat == 1)
X_child[ keep ] = rbinom( sum(keep) , 1 , 0.5 ) + rbinom( sum(keep) , 1 , 0.5 )
# sample effect sizes
# direct effects
b_direct = c( rep(0,M/2) , rnorm(M/2) )
# indirect effects
b_indirect = b_direct
# if we want direct and indirect effects to be different:
#b_indirect = c( rnorm(M/2) , rep(0,M/2) )
# in the GWAS, generate phenotypes as a mix of direct + indirect
b_mix = (b_direct + b_indirect)
gv_GWAS = sqrt(hsq_GWAS) * scale(scale(X_GWAS) %*% b_mix)
y_GWAS = gv_GWAS + rnorm(N_GWAS,0,sqrt(1-hsq_GWAS))
# do the same in the parents
g_mat = scale(X_mat) %*% b_mix
g_pat = scale(X_pat) %*% b_mix
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
# --- baseline: generate the kids as a mix of direct SNP effects and indirect parental effects
if ( MOD == "base" | MOD == "base_ascertain" ) {
g_child = scale(X_child) %*% b_direct
y_child = sqrt(hsq_direct) * scale(g_child) +
sqrt(var_indirect) * scale(y_mat+y_pat) + rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect))
}
# ---
# --- generate with direct*indirect interactions
if ( MOD == "direct_by_indirect" ) {
var_interaction = 0.2
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
g_child = scale(X_child) %*% b_direct
y_child = sqrt(hsq_direct) * scale(g_child) +
sqrt(var_indirect) * scale(y_mat+y_pat) + sqrt(var_interaction) * scale( scale(y_mat+y_pat) * scale(g_child) ) +
rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect-var_interaction))
}
# ---
# --- generate with direct*indirect interactions
if ( MOD == "direct_by_indirect2" ) {
var_interaction = 0.2
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
g_child = scale(X_child) %*% b_direct
y_child = sqrt(hsq_direct) * scale(g_child) +
sqrt(var_indirect) * scale(y_mat+y_pat) + sqrt(var_interaction) * scale( scale(g_child) * scale(y_mat+y_pat)^2 ) +
rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect-var_interaction))
}
# ---
# --- generate with direct*indirect interactions
if ( MOD == "direct_by_indirect3" ) {
var_interaction = 0.2
y_mat = sqrt(hsq_GWAS) * scale(g_mat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
y_pat = sqrt(hsq_GWAS) * scale(g_pat) + rnorm(N_trio,0,sqrt(1-hsq_GWAS))
g_child = scale(X_child) %*% b_direct
y_child = sqrt(hsq_direct) * scale(g_child) +
sqrt(var_indirect) * scale(y_mat+y_pat) + sqrt(var_interaction) * scale( scale(g_child)^2 * scale(y_mat+y_pat) ) +
rnorm(N_trio,0,sqrt(1-hsq_direct-var_indirect-var_interaction))
}
# ---
# compute the PGIs
#bhat_GWAS = t(t(scale(y_GWAS)) %*% scale(X_GWAS) / (N_GWAS - 1))
# estimate the GWAS betas without any environmental noise
bhat_GWAS = t(t(scale(gv_GWAS)) %*% scale(X_GWAS) / (N_GWAS - 1))
PGI_mat = scale( scale(X_mat) %*% bhat_GWAS )
PGI_pat = scale( scale(X_pat) %*% bhat_GWAS )
PGI_child = scale( scale(X_child) %*% bhat_GWAS )
# --- ascertainment
keep = rep(TRUE,length(y_child))
if ( MOD == "base_ascertain" ) {
keep = scale(y_child) < -0.5 | scale(y_child) > 0.5
}
# estimate the direct and indirect effects
est_total = lm( y_child[keep] ~ PGI_child[keep] )$coef[-1]
est_partitioned = lm( y_child[keep] ~ PGI_child[keep] + PGI_mat[keep] + PGI_pat[keep] )$coef[-1]
all_est[ s , ] = c(est_total,est_partitioned)
}
cat( "true direct:" , hsq_direct , '\n' )
cat( "est direct:" , mean(all_est[,2])^2 , '\n' )
cat( "est p1:" , mean(all_est[,3])^2 , '\n' )
cat( "est p2:" , mean(all_est[,4])^2 , '\n' )
arrows( hsq_direct , mean(all_est[,2])^2 - 1.96*sd(all_est[,2]^2)/sqrt(nrow(all_est)) , hsq_direct , mean(all_est[,2])^2 + 1.96*sd(all_est[,2]^2)/sqrt(nrow(all_est)) , len=0 , lwd=4 , col="gray")
points( hsq_direct , mean(all_est[,2])^2 , pch=19 , col=MOD.clr[[MOD]])
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment