Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created October 15, 2023 20:03
Show Gist options
  • Save sashagusev/9fc364f7c9c16f85f0af7bbdd93120ce to your computer and use it in GitHub Desktop.
Save sashagusev/9fc364f7c9c16f85f0af7bbdd93120ce to your computer and use it in GitHub Desktop.
# --- admixture analysis
# sample size
N = 5e3
# mate correlation
AM_cor = 0.75
# variance in ancestry transmission
genetic_sd = 0.025
# wealth gap data from: https://economics.yale.edu/sites/default/files/2023-01/HannahYang_Senior_Essay.pdf
mean_g1 = 114252
mean_g0 = 573133
sd_g1 = 284907
sd_g0 = 2114999
# variance in trait across generation
child_sd = sd_g1
par(mfrow=c(2,5))
# run using a latent trait that's related to the transmitted trait
for ( latent in c(FALSE,TRUE) ) {
# 25% of the cohort is in one population and the rest in another
g1 = rbinom(N,1,0.25)
y1 = rep(NA,N)
y1[ g1 == 0 ] = rnorm( sum(g1==0) , mean_g0 , sd_g0 )
y1[ g1 == 1 ] = rnorm( sum(g1==1) , mean_g1 , sd_g1 )
g2 = rbinom(N,1,0.25)
y2 = rep(NA,N)
y2[ g2 == 0 ] = rnorm( sum(g2==0) , mean_g0 , sd_g0 )
y2[ g2 == 1 ] = rnorm( sum(g2==1) , mean_g1 , sd_g1 )
# run through each generation
for ( gens in 1:5 ) {
samp = sample(N*2,4e3)
pop_g = c(g1,g2)[samp]
pop_y = scale(c(y1,y2)[samp])
if( latent ) {
# when the transmitted trait explains 20% of the variance in an observed trait
pop_y = sqrt(0.2) * scale(pop_y) + rnorm(length(pop_y),0,sqrt(1-0.2))
}
reg = lm( pop_y ~ pop_g )
plot(pop_g,pop_y,xlim=c(0,1),ylim=c(-4,4),type="n",xlab="Admixture fraction",ylab="Trait",las=1,main=paste("Generation ",gens,"\nSlope = ",round(reg$coef[2],2),sep=''))
points( pop_g , pop_y , col="#8c96c610" , cex=0.5 , pch=19 )
points( pop_g , pop_y , col="#8c96c610" , cex=0.5 , pch=1 )
abline( reg , lty=3 )
# assortative mating on wealth
ord1 = order(y1)
ord2 = order( AM_cor * scale(y2) + rnorm(N,0,sqrt(1-AM_cor^2)))
g1 = g1[ord1]
y1 = y1[ord1]
g2 = g2[ord2]
y2 = y2[ord2]
# mate and have kids
childa_g = (g1 + g2)/2
childa_g[ g1 != g2 ] = childa_g[ g1 != g2 ] + rnorm(sum(g1 != g2),0,genetic_sd)
childa_y = (y1 + y2)/2 + rnorm(N,0,child_sd)
# rerun for second child
ord1 = order(y1)
ord2 = order( AM_cor * scale(y2) + rnorm(N,0,sqrt(1-AM_cor^2)))
g1 = g1[ord1]
y1 = y1[ord1]
g2 = g2[ord2]
y2 = y2[ord2]
childb_g = (g1 + g2)/2
childb_g[ g1 != g2 ] = childb_g[ g1 != g2 ] + rnorm(sum(g1 != g2),0,genetic_sd)
childb_y = (y1 + y2)/2 + rnorm(N,0,child_sd)
# replace the next generation with the current generation
g1 = childb_g
y1 = childb_y
g2 = childa_g
y2 = childa_y
# bound the ancestry
g1[ g1 > 1 ] = 1
g1[ g1 < 0 ] = 0
g2[ g2 > 1 ] = 1
g2[ g2 < 0 ] = 0
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment