Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created June 24, 2023 20:32
Show Gist options
  • Save sashagusev/bb7f6d10a76fb2f8a63a1190fe849f6f to your computer and use it in GitHub Desktop.
Save sashagusev/bb7f6d10a76fb2f8a63a1190fe849f6f to your computer and use it in GitHub Desktop.
library("RColorBrewer")
# function for generating twis
make_all_twins = function(N,rge,rgc,h2g,h2c,h2e,h2gxe=0,h2gxc=0,pop_strat=0,env_strat=0){
# --- generate MZs
# generate MZ twins
gv_MZ1 = scale(rnorm(N))
gv_MZ2 = scale(gv_MZ1)
# generate rGE
ev_MZ1 = scale(rge * gv_MZ1 + sqrt(1 - rge^2) * scale(rnorm(N)))
ev_MZ2 = scale(rge * gv_MZ2 + sqrt(1 - rge^2) * scale(rnorm(N)))
# generate rGC
shared_env = scale(rgc * scale(gv_MZ1+gv_MZ2) + sqrt(1 - rgc^2) * scale(rnorm(N)))
# generate phenotypes
y_MZ1 = (sqrt(h2g) * gv_MZ1 + sqrt(h2e) * ev_MZ1 + sqrt(h2c) * shared_env + sqrt(h2gxe) * scale(gv_MZ1*ev_MZ1) + sqrt(h2gxc) * scale(gv_MZ1*shared_env) )
y_MZ2 = (sqrt(h2g) * gv_MZ2 + sqrt(h2e) * ev_MZ2 + sqrt(h2c) * shared_env + sqrt(h2gxe) * scale(gv_MZ2*ev_MZ2) + sqrt(h2gxc) * scale(gv_MZ2*shared_env) )
# generate DZs
gv_DZ1 = scale(rnorm(N))
gv_DZ2 = scale(sqrt(0.5^2) * scale(gv_DZ1) + sqrt(1-0.5^2) * scale(rnorm(N)))
# give them the rge and the shared environment
# generate rGE
ev_DZ1 = scale(rge * gv_DZ1 + sqrt(1 - rge^2) * scale(rnorm(N)))
ev_DZ2 = scale(rge * gv_DZ2 + sqrt(1 - rge^2) * scale(rnorm(N)))
# generate rGC
shared_env = scale(rgc * scale(gv_DZ1+gv_DZ2) + sqrt(1 - rgc^2) * scale(rnorm(N)))
# generate phenotypes
y_DZ1 = (sqrt(h2g) * gv_DZ1 + sqrt(h2e) * ev_DZ1 + sqrt(h2c) * shared_env + sqrt(h2gxe) * scale(gv_DZ1*ev_DZ1) + sqrt(h2gxc) * scale(gv_DZ1*shared_env) )
y_DZ2 = (sqrt(h2g) * gv_DZ2 + sqrt(h2e) * ev_DZ2 + sqrt(h2c) * shared_env + sqrt(h2gxe) * scale(gv_DZ2*ev_DZ2) + sqrt(h2gxc) * scale(gv_DZ2*shared_env) )
# add population structure (genotype artificially correlated with phenotype)
if ( pop_strat != 0 ) {
y_MZ1 = sqrt(pop_strat) * scale(gv_MZ1+gv_MZ2) + sqrt(1-pop_strat) * scale(y_MZ1)
y_MZ2 = sqrt(pop_strat) * scale(gv_MZ1+gv_MZ2) + sqrt(1-pop_strat) * scale(y_MZ2)
y_DZ1 = sqrt(pop_strat) * scale(gv_DZ1+gv_DZ2) + sqrt(1-pop_strat) * scale(y_DZ1)
y_DZ2 = sqrt(pop_strat) * scale(gv_DZ1+gv_DZ2) + sqrt(1-pop_strat) * scale(y_DZ2)
}
# add environmental stratification
if ( env_strat != 0 ) {
e_strat = scale(rnorm(N))
y_MZ1 = sqrt(env_strat) * scale(e_strat) + sqrt(1-env_strat) * scale(y_MZ1)
y_MZ2 = sqrt(env_strat) * scale(e_strat) + sqrt(1-env_strat) * scale(y_MZ2)
y_DZ1 = sqrt(env_strat) * scale(e_strat) + sqrt(1-env_strat) * scale(y_DZ1)
y_DZ2 = sqrt(env_strat) * scale(e_strat) + sqrt(1-env_strat) * scale(y_DZ2)
}
df = data.frame("y_MZ1"=y_MZ1,"y_MZ2"=y_MZ2,"y_DZ1"=y_DZ1,"y_DZ2"=y_DZ2,"gv_MZ1"=gv_MZ1,"gv_DZ1"=gv_DZ1)
return(df)
}
# unrelated samples
N = 100000
# heritability parameter
h2g = 0.3
standardize = TRUE
# --- GxE Interactions
pdf("hsq_gxe.pdf",width=10,height=5)
par(mfrow=c(1,2))
clr = brewer.pal(4,"Set2")
# storing estimates here:
for ( MODE in c("Twins","Population")) {
all.var = seq(0.1,0.7,by=0.05)
plot( 0 , 0 , type="n" , xlim=range(all.var) , ylim=c(0,1) , las=1 , xlab="GxE Variance", ylab="Heritability est" , main=paste("Gene-Environment Interaction\n(Estimated in ",MODE,")",sep='') )
for ( model in 1:3 ) {
all_est = rep(NA,length(all.var))
for ( i in 1:length(all.var) ) {
if ( model == 1 ) {
est = make_all_twins(N,rge=0,rgc=0,h2g=h2g,h2c=(1-all.var[i]-h2g)/2,h2e=(1-all.var[i]-h2g)/2,h2gxe=all.var[i],h2gxc=0)
} else if ( model == 2 ) {
est = make_all_twins(N,rge=0,rgc=0,h2g=h2g,h2c=(1-all.var[i]-h2g)/2,h2e=(1-all.var[i]-h2g)/2,h2gxe=0,h2gxc=all.var[i])
} else if ( model == 3 ) {
est = make_all_twins(N,rge=0,rgc=-0.5,h2g=h2g,h2c=(1-all.var[i]-h2g)/2,h2e=(1-all.var[i]-h2g)/2,h2gxe=0,h2gxc=all.var[i])
}
if ( standardize ) {
est$y_MZ1 = scale(est$y_MZ1)
est$y_MZ2 = scale(est$y_MZ2)
est$y_DZ1 = scale(est$y_DZ1)
est$y_DZ2 = scale(est$y_DZ2)
}
if ( MODE == "Twins") {
# estimate under the ACE model
rmz = cov(est$y_MZ1,est$y_MZ2)
rdz = cov(est$y_DZ1,est$y_DZ2)
A_est = 2*(rmz-rdz)
all_est[i] = A_est
} else {
# estimate in the population
all_est[i] = cor(c(est$y_MZ1,est$y_DZ1),c(est$gv_MZ1,est$gv_DZ1))^2
}
}
lines( all.var , all_est , col=clr[model] , lwd=3 )
points( all.var, all_est , col=clr[model] , pch=19,cex=0.75)
}
abline(h=h2g,lty=2,lwd=2)
legend("topleft",legend=c("True Additive","GxE_NS","GxE_S (S random)","GxE_S (rG,E_S=-0.5)"),pch=19,col=c("black",clr),bty="n",cex=0.75)
}
dev.off()
# ---G-E Correlations
pdf("hsq_rge.pdf",width=10,height=5)
par(mfrow=c(1,2))
clr = brewer.pal(4,"Paired")
# parameters to scan over
h2g = 0.3
all.rg = seq(-0.9,0.9,by=0.1)
var_high = 0.6
var_low = 0.1
#par(mfrow=c(1,2))
for ( MODE in c("Twins","Population") ) {
plot( 0 , 0 , type="n" , xlim=c(-1,1) , ylim=c(0,1) , las=1 , xlab="G-E Correlation", ylab="Heritability est" , main=paste("Gene-Environment Correlation\n(Estimated in ",MODE,")",sep='') )
for ( model in c(1:4) ) {
all_est = rep(NA,length(all.rg))
for ( i in 1:length(all.rg) ) {
rg = all.rg[i]
if ( model == 1 ) {
est = make_all_twins(N,rge=rg,rgc=0,h2g=h2g,h2c=var_high,h2e=var_low)
} else if ( model == 2 ) {
est = make_all_twins(N,rge=rg,rgc=0,h2g=h2g,h2c=var_low,h2e=var_high)
} else if ( model == 3 ) {
est = make_all_twins(N,rge=0,rgc=rg,h2g=h2g,h2c=var_low,h2e=var_high)
} else if ( model == 4 ) {
est = make_all_twins(N,rge=0,rgc=rg,h2g=h2g,h2c=var_high,h2e=var_low)
}
if ( standardize ) {
est$y_MZ1 = scale(est$y_MZ1)
est$y_MZ2 = scale(est$y_MZ2)
est$y_DZ1 = scale(est$y_DZ1)
est$y_DZ2 = scale(est$y_DZ2)
}
# estimate under the ACE model
if ( MODE == "Twins" ) {
rmz = cov(est$y_MZ1,est$y_MZ2)
rdz = cov(est$y_DZ1,est$y_DZ2)
A_est = 2*(rmz-rdz)
all_est[i] = A_est
} else {
# estimate in the population
all_est[i] = cor(c(est$y_MZ1,est$y_DZ1),c(est$gv_MZ1,est$gv_DZ1))^2
}
}
lines( all.rg , all_est , col=clr[model] , lwd=3 )
points( all.rg, all_est , col=clr[model] , pch=19,cex=0.75)
}
abline(h=h2g,lty=2,lwd=2)
legend("topleft",legend=c("True Additive",paste("rG,E_NS (NS var=",var_low,")",sep=''),paste("rG,E_NS (NS var=",var_high,")",sep=''),paste("rG,E_S (S var=",var_low,")",sep=''),paste("rG,E_S (S var=",var_high,")",sep='')),pch=19,col=c("black",clr),bty="n")
}
dev.off()
# --- Population Stratification
pdf("hsq_strat.pdf",width=10,height=5)
par(mfrow=c(1,2))
clr = brewer.pal(4,"Set2")
# storing estimates here:
#par(mfrow=c(1,2))
for ( MODE in c("Twins","Population")) {
all.var = seq(0.1,0.7,by=0.05)
plot( 0 , 0 , type="n" , xlim=range(all.var) , ylim=c(0,1) , las=1 , xlab="Stratification Variance", ylab="Heritability est" , main=paste("Stratification/Error\n(Estimated in ",MODE,")",sep='') )
for ( model in 1:2 ) {
all_est = rep(NA,length(all.var))
for ( i in 1:length(all.var) ) {
if ( model == 1 ) {
est = make_all_twins(N,rge=0,rgc=0,h2g=h2g,h2c=0,h2e=1-h2g,pop_strat=all.var[i])
} else if ( model == 2 ) {
est = make_all_twins(N,rge=0,rgc=0,h2g=h2g,h2c=0,h2e=1-h2g,env_strat=all.var[i])
}
if ( standardize ) {
est$y_MZ1 = scale(est$y_MZ1)
est$y_MZ2 = scale(est$y_MZ2)
est$y_DZ1 = scale(est$y_DZ1)
est$y_DZ2 = scale(est$y_DZ2)
}
if ( MODE == "Twins") {
# estimate under the ACE model
rmz = cov(est$y_MZ1,est$y_MZ2)
rdz = cov(est$y_DZ1,est$y_DZ2)
A_est = 2*(rmz-rdz)
all_est[i] = A_est
} else {
# estimate in the population
all_est[i] = cor(c(est$y_MZ1,est$y_DZ1),c(est$gv_MZ1,est$gv_DZ1))^2
}
}
lines( all.var , all_est , col=clr[model] , lwd=3 )
points( all.var, all_est , col=clr[model] , pch=19,cex=0.75)
}
abline(h=h2g,lty=2,lwd=2)
legend("topleft",legend=c("True Additive","Population Structure","Measurement Error"),pch=19,col=c("black",clr),bty="n")
}
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment