Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Last active September 6, 2023 17:32
Show Gist options
  • Save sashagusev/5998d81a9f0bcfeef041f0edaaac3c39 to your computer and use it in GitHub Desktop.
Save sashagusev/5998d81a9f0bcfeef041f0edaaac3c39 to your computer and use it in GitHub Desktop.
simple simulation of a phenotype in genetically distant populations
# reproducibility:
set.seed(42)
# number of causal variants:
M = 5e3
# number of individuals
N = 10e3
# heritability
hsq = 0.25
# sample individuals with admixture
admix = TRUE
# allele frequencies computed from the 1000 Gnomes
# can be downloaded here:
# https://www.dropbox.com/scl/fi/7n2ou0pp9m2yp3vbdf4lc/1000G.EUR_AFR.frq?rlkey=pkatcvwfgtvgtr4xrrjlv22b7&dl=0
frq = read.table("~/Downloads/1000G.EUR_AFR.frq",head=T,as.is=T)
# flip frequencies where the alleles don't match
flip = frq[,3] != frq[,6]
frq[flip,7] = 1 - frq[flip,7]
seeds = 20
est = rep(NA,seeds)
cat( "variance explained by group label\n" )
for ( s in 1:seeds ) {
# generate causal variants
effects = rnorm(M,0,1)
# sample some causal variants
M.causal = sample(1:nrow(frq),M)
# simulate some individuals
pop_EUR = matrix(NA,nrow=N,ncol=M)
pop_AFR = matrix(NA,nrow=N,ncol=M)
for ( i in 1:M ) {
pop_EUR[,i] = rbinom(N,2,frq[i,4])
pop_AFR[,i] = rbinom(N,2,frq[i,7])
}
# simulate some heritable phenotypes
gv_EUR = pop_EUR %*% effects
gv_AFR = pop_AFR %*% effects
# simulate african ancestry in african americans (based on GERA estimates)
if ( admix == TRUE ) {
afr_anc = rnorm(N,0.736,0.174)
afr_anc[ afr_anc < 0 ] = 0
afr_anc[ afr_anc > 1 ] = 1
# admix the phenotypes
gv_AFR2 = pop_EUR %*% effects
for ( i in 1:N ) {
gv_AFR[i] = afr_anc[i] * gv_AFR[i] + (1-afr_anc[i]) * gv_AFR2[i]
}
}
gv_both = scale(c(gv_EUR,gv_AFR)) * sqrt(hsq)
env_both = rnorm(N+N,0,sqrt(1-hsq))
y_both = gv_both + env_both
pop_lbl = c(rep(TRUE,N),rep(FALSE,N))
# total heritability check:
# cor(y_both,gv_both)^2
# variance in outcome explained by group label
est[s] = cor(y_both,pop_lbl)^2
cat( "iteration" , s , ":", mean(est,na.rm=T) , "s.e." , sd(est,na.rm=T)/sqrt(sum(!is.na(est))) , '\n' )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment