sashagusev/admixture_transmission.R

## admixture_transmission.R
# --- admixture analysis

# sample size
N = 5e3

# mate correlation
AM_cor = 0.75

# variance in ancestry transmission
genetic_sd = 0.025

# wealth gap data from: https://economics.yale.edu/sites/default/files/2023-01/HannahYang_Senior_Essay.pdf
mean_g1 = 114252
mean_g0 = 573133
sd_g1 = 284907
sd_g0 = 2114999

# variance in trait across generation
child_sd = sd_g1

par(mfrow=c(2,5))

# run using a latent trait that's related to the transmitted trait
for ( latent in c(FALSE,TRUE) ) {

  # 25% of the cohort is in one population and the rest in another
  g1 = rbinom(N,1,0.25)
  y1 = rep(NA,N)
  y1[ g1 == 0 ] = rnorm( sum(g1==0) , mean_g0 , sd_g0 )
  y1[ g1 == 1 ] = rnorm( sum(g1==1) , mean_g1 , sd_g1 )

  g2 = rbinom(N,1,0.25)
  y2 = rep(NA,N)
  y2[ g2 == 0 ] = rnorm( sum(g2==0) , mean_g0 , sd_g0 )
  y2[ g2 == 1 ] = rnorm( sum(g2==1) , mean_g1 , sd_g1 )

  # run through each generation
  for ( gens in 1:5 ) {
    samp = sample(N*2,4e3)
    pop_g = c(g1,g2)[samp]
    pop_y = scale(c(y1,y2)[samp])
    if( latent ) {
      # when the transmitted trait explains 20% of the variance in an observed trait
      pop_y = sqrt(0.2) * scale(pop_y) + rnorm(length(pop_y),0,sqrt(1-0.2))
    }
    reg = lm( pop_y ~ pop_g )
    plot(pop_g,pop_y,xlim=c(0,1),ylim=c(-4,4),type="n",xlab="Admixture fraction",ylab="Trait",las=1,main=paste("Generation ",gens,"\nSlope = ",round(reg$coef[2],2),sep=''))
    points( pop_g , pop_y , col="#8c96c610" , cex=0.5 , pch=19 )
    points( pop_g , pop_y , col="#8c96c610" , cex=0.5 , pch=1 )
    abline( reg , lty=3 )

    # assortative mating on wealth
    ord1 = order(y1)
    ord2 = order( AM_cor * scale(y2) + rnorm(N,0,sqrt(1-AM_cor^2)))
    g1 = g1[ord1]
    y1 = y1[ord1]
    g2 = g2[ord2]
    y2 = y2[ord2]

    # mate and have kids
    childa_g = (g1 + g2)/2
    childa_g[ g1 != g2 ] = childa_g[ g1 != g2 ] + rnorm(sum(g1 != g2),0,genetic_sd)
    childa_y = (y1 + y2)/2 + rnorm(N,0,child_sd)

    # rerun for second child
    ord1 = order(y1)
    ord2 = order( AM_cor * scale(y2) + rnorm(N,0,sqrt(1-AM_cor^2)))
    g1 = g1[ord1]
    y1 = y1[ord1]
    g2 = g2[ord2]
    y2 = y2[ord2]

    childb_g = (g1 + g2)/2
    childb_g[ g1 != g2 ] = childb_g[ g1 != g2 ] + rnorm(sum(g1 != g2),0,genetic_sd)
    childb_y = (y1 + y2)/2 + rnorm(N,0,child_sd)

    # replace the next generation with the current generation
    g1 = childb_g
    y1 = childb_y
    g2 = childa_g
    y2 = childa_y

    # bound the ancestry
    g1[ g1 > 1 ] = 1
    g1[ g1 < 0 ] = 0
    g2[ g2 > 1 ] = 1
    g2[ g2 < 0 ] = 0
  }
}
	# --- admixture analysis

	# sample size
	N = 5e3

	# mate correlation
	AM_cor = 0.75

	# variance in ancestry transmission
	genetic_sd = 0.025

	# wealth gap data from: https://economics.yale.edu/sites/default/files/2023-01/HannahYang_Senior_Essay.pdf
	mean_g1 = 114252
	mean_g0 = 573133
	sd_g1 = 284907
	sd_g0 = 2114999

	# variance in trait across generation
	child_sd = sd_g1

	par(mfrow=c(2,5))

	# run using a latent trait that's related to the transmitted trait
	for ( latent in c(FALSE,TRUE) ) {

	# 25% of the cohort is in one population and the rest in another
	g1 = rbinom(N,1,0.25)
	y1 = rep(NA,N)
	y1[ g1 == 0 ] = rnorm( sum(g1==0) , mean_g0 , sd_g0 )
	y1[ g1 == 1 ] = rnorm( sum(g1==1) , mean_g1 , sd_g1 )

	g2 = rbinom(N,1,0.25)
	y2 = rep(NA,N)
	y2[ g2 == 0 ] = rnorm( sum(g2==0) , mean_g0 , sd_g0 )
	y2[ g2 == 1 ] = rnorm( sum(g2==1) , mean_g1 , sd_g1 )

	# run through each generation
	for ( gens in 1:5 ) {
	samp = sample(N*2,4e3)
	pop_g = c(g1,g2)[samp]
	pop_y = scale(c(y1,y2)[samp])
	if( latent ) {
	# when the transmitted trait explains 20% of the variance in an observed trait
	pop_y = sqrt(0.2) * scale(pop_y) + rnorm(length(pop_y),0,sqrt(1-0.2))
	}
	reg = lm( pop_y ~ pop_g )
	plot(pop_g,pop_y,xlim=c(0,1),ylim=c(-4,4),type="n",xlab="Admixture fraction",ylab="Trait",las=1,main=paste("Generation ",gens,"\nSlope = ",round(reg$coef[2],2),sep=''))
	points( pop_g , pop_y , col="#8c96c610" , cex=0.5 , pch=19 )
	points( pop_g , pop_y , col="#8c96c610" , cex=0.5 , pch=1 )
	abline( reg , lty=3 )

	# assortative mating on wealth
	ord1 = order(y1)
	ord2 = order( AM_cor * scale(y2) + rnorm(N,0,sqrt(1-AM_cor^2)))
	g1 = g1[ord1]
	y1 = y1[ord1]
	g2 = g2[ord2]
	y2 = y2[ord2]

	# mate and have kids
	childa_g = (g1 + g2)/2
	childa_g[ g1 != g2 ] = childa_g[ g1 != g2 ] + rnorm(sum(g1 != g2),0,genetic_sd)
	childa_y = (y1 + y2)/2 + rnorm(N,0,child_sd)

	# rerun for second child
	ord1 = order(y1)
	ord2 = order( AM_cor * scale(y2) + rnorm(N,0,sqrt(1-AM_cor^2)))
	g1 = g1[ord1]
	y1 = y1[ord1]
	g2 = g2[ord2]
	y2 = y2[ord2]

	childb_g = (g1 + g2)/2
	childb_g[ g1 != g2 ] = childb_g[ g1 != g2 ] + rnorm(sum(g1 != g2),0,genetic_sd)
	childb_y = (y1 + y2)/2 + rnorm(N,0,child_sd)

	# replace the next generation with the current generation
	g1 = childb_g
	y1 = childb_y
	g2 = childa_g
	y2 = childa_y

	# bound the ancestry
	g1[ g1 > 1 ] = 1
	g1[ g1 < 0 ] = 0
	g2[ g2 > 1 ] = 1
	g2[ g2 < 0 ] = 0
	}
	}