sashagusev/breeders_threshold.R

## breeders_threshold.R
# ---
# Code to estimate and simulate selection on healthy individuals (controls) for a threshold trait
# Updated with alternative simulation setup (producing the same result) and averaging to get smoother curves
# ---

library("VGAM")

# --- Estimate using the Breeder's Equation for threshold traits (Walsh & Lynch 2018; Dempster & Lerner 1950)
# We will select on being "healthy" where 1-prev of the population is healthy
ThresholdSelection <- function( FractionHealthy, Heritability ) {
  # --- Estimate the mean liability in the healthy population
  ProbitFraction = probitlink(FractionHealthy)
  # Derived in WL 14.6 using the inverse:
  # ProbitFraction = -1 * probitlink(1 - FractionHealthy)
  # Sanity check: pnorm(ProbitFraction) should equal FractionHealthy

  # --- Estimate the selection differential
  # Derived in WL 14.9 when only cases (in this case healthy) are selected
  Selection = dnorm(ProbitFraction) / FractionHealthy

  # --- Estimate the new healthy fraction in response to selection
  NewFractionHealthy = pnorm( ProbitFraction + Heritability * Selection )
  # Derived in WL 14.7 using the inverse:
  # FractionHealthyNew = 1 - pnorm( -1 * ProbitFraction - Heritability * Selection )

  return( NewFractionHealthy )
}

# --- Simulate under the liability threshold model (here cases are cases)
SimulatedSelection <- function( Prevalence , Heritability , N = 50e3 ) {
  LiabThreshold = qnorm(1-Prevalence)

  # Simulate offspring as parents plus transmission variance
  # gv_dad = rnorm(N)
  # gv_mum = rnorm(N)
  # y_dad = sqrt(Heritability) * gv_dad + rnorm(N,0,sqrt(1-Heritability))
  # y_mum = sqrt(Heritability) * gv_mum + rnorm(N,0,sqrt(1-Heritability))
  # gv_kid = (0.5) * gv_dad + (0.5) * gv_mum + sqrt(0.5)*rnorm(N,0,1)
  # y_kid = sqrt(Heritability) * gv_kid + rnorm(N,0,sqrt(1-Heritability))

  # Simulate from parent haplotypes
  gv_dad_a = rnorm(N)
  gv_dad_b = rnorm(N)
  gv_mum_a = rnorm(N)
  gv_mum_b = rnorm(N)
  gv_dad = sqrt(Heritability*0.5) * gv_dad_a + sqrt(Heritability*0.5) * gv_dad_b
  gv_mum = sqrt(Heritability*0.5) * gv_mum_a + sqrt(Heritability*0.5) * gv_mum_b
  y_dad = gv_dad + rnorm(N,0,sqrt(1-Heritability))
  y_mum = gv_mum + rnorm(N,0,sqrt(1-Heritability))

  # Simulate offspring
  gv_kid = sqrt(Heritability*0.5) * gv_dad_a + sqrt(Heritability*0.5) * gv_mum_a
  y_kid = gv_kid + rnorm(N,0,sqrt(1-Heritability))

  # Select only the kids with healthy parents
  keep = y_dad < LiabThreshold & y_mum < LiabThreshold
  NewPrev = mean(y_kid[keep]>LiabThreshold)
  # estimate the new heritability of liability
  NewHeritability = cor( gv_kid[keep] , y_kid[keep] )^2
  return( c(NewPrev,NewHeritability) )
}

# --- Results for schizophrenia-like phenotype
# Prevalance of the disease
Prev = 0.01
# Heritability
Heritability = 0.8
NewPrev = 1 - ThresholdSelection( 1 - Prev , Heritability )
cat( "Breeder's Equation:" , "Starting Prevalance:" , Prev , "; New Prevalance:" , NewPrev , "; Ratio:" , NewPrev / Prev , '\n' )
NewPrev = SimulatedSelection( Prev , Heritability )[1]
cat( "Liability Threshold Sim:" , "Starting Prevalance:" , Prev , "; New Prevalance:" , NewPrev , "; Ratio:" , NewPrev / Prev , '\n' )

# --- Compute across several different heritabilities
hsq_try = c(0.1,0.2,0.5,0.8)
prev_try = c(0.001,0.005,seq(0.01,0.1,by=0.01))
results_breeder = matrix(NA,nrow=length(hsq_try),ncol=length(prev_try))
results_sim = matrix(NA,nrow=length(hsq_try),ncol=length(prev_try))
seeds = 10
for( h in 1:length(hsq_try) ) {
  Heritability = hsq_try[h]
  cat(Heritability,'\n')
  for ( i in 1:length(prev_try) ) {
    results_breeder[h,i] = 1 - ThresholdSelection( 1 - prev_try[i] , Heritability )
    # simulate more people for very common phenotypes
    res = rep(NA,seeds)
    for ( s in 1:seeds ) {
      res[s] = SimulatedSelection( prev_try[i] , Heritability , N=1e4/prev_try[i] )[1]
    }
    results_sim[h,i] = mean(res)
  }
}

# --- Visualize

par(mfrow=c(1,3))
library("RColorBrewer")
clr = rev(brewer.pal(9,"Blues"))

for ( h in c(2,4) ) {
  plot( prev_try , results_breeder[h,]/prev_try , ylim=c(0.5,1) , log="x" , xlab="Starting Prevalance" , ylab="New / Starting Prevalance" ,las=1 , type="l" , lwd=2 , col="gray" , main=paste("Heritability = ",hsq_try[h],sep=''))
  lines( prev_try , results_sim[h,]/prev_try , col=clr[h] ,lwd=2 )
  points( prev_try , results_sim[h,]/prev_try , col=clr[h] , pch=19 )
  legend("bottomleft",legend=c("Breeder's Equation","Simulation"),col=c("gray",clr[h]) , lty=c(1,1) , lwd=c(2,2) , bty="n")
}

plot( prev_try , prev_try , type="n" , bty="n" , log="x" , xlab="Starting Prevalance" , ylab="Simulation / Equation Prevalance" , ylim=c(0.8,1.1), las=1  , main="Ratio of New Prevalances")
for( h in 1:length(hsq_try) ) {
  lines( prev_try , results_sim[h,]/results_breeder[h,] , col=clr[h] , lwd=1.5)
  points( prev_try , results_sim[h,]/results_breeder[h,] , col=clr[h] , cex=0.5 , pch=19)
}
abline(h=1,lty=3)
legend("bottomleft",legend=hsq_try,col=clr,bty="n",lwd=2,title="Heritability")
	# ---
	# Code to estimate and simulate selection on healthy individuals (controls) for a threshold trait
	# Updated with alternative simulation setup (producing the same result) and averaging to get smoother curves
	# ---

	library("VGAM")

	# --- Estimate using the Breeder's Equation for threshold traits (Walsh & Lynch 2018; Dempster & Lerner 1950)
	# We will select on being "healthy" where 1-prev of the population is healthy
	ThresholdSelection <- function( FractionHealthy, Heritability ) {
	# --- Estimate the mean liability in the healthy population
	ProbitFraction = probitlink(FractionHealthy)
	# Derived in WL 14.6 using the inverse:
	# ProbitFraction = -1 * probitlink(1 - FractionHealthy)
	# Sanity check: pnorm(ProbitFraction) should equal FractionHealthy

	# --- Estimate the selection differential
	# Derived in WL 14.9 when only cases (in this case healthy) are selected
	Selection = dnorm(ProbitFraction) / FractionHealthy

	# --- Estimate the new healthy fraction in response to selection
	NewFractionHealthy = pnorm( ProbitFraction + Heritability * Selection )
	# Derived in WL 14.7 using the inverse:
	# FractionHealthyNew = 1 - pnorm( -1 * ProbitFraction - Heritability * Selection )

	return( NewFractionHealthy )
	}

	# --- Simulate under the liability threshold model (here cases are cases)
	SimulatedSelection <- function( Prevalence , Heritability , N = 50e3 ) {
	LiabThreshold = qnorm(1-Prevalence)

	# Simulate offspring as parents plus transmission variance
	# gv_dad = rnorm(N)
	# gv_mum = rnorm(N)
	# y_dad = sqrt(Heritability) * gv_dad + rnorm(N,0,sqrt(1-Heritability))
	# y_mum = sqrt(Heritability) * gv_mum + rnorm(N,0,sqrt(1-Heritability))
	# gv_kid = (0.5) * gv_dad + (0.5) * gv_mum + sqrt(0.5)*rnorm(N,0,1)
	# y_kid = sqrt(Heritability) * gv_kid + rnorm(N,0,sqrt(1-Heritability))

	# Simulate from parent haplotypes
	gv_dad_a = rnorm(N)
	gv_dad_b = rnorm(N)
	gv_mum_a = rnorm(N)
	gv_mum_b = rnorm(N)
	gv_dad = sqrt(Heritability0.5) gv_dad_a + sqrt(Heritability0.5) gv_dad_b
	gv_mum = sqrt(Heritability0.5) gv_mum_a + sqrt(Heritability0.5) gv_mum_b
	y_dad = gv_dad + rnorm(N,0,sqrt(1-Heritability))
	y_mum = gv_mum + rnorm(N,0,sqrt(1-Heritability))

	# Simulate offspring
	gv_kid = sqrt(Heritability0.5) gv_dad_a + sqrt(Heritability0.5) gv_mum_a
	y_kid = gv_kid + rnorm(N,0,sqrt(1-Heritability))

	# Select only the kids with healthy parents
	keep = y_dad < LiabThreshold & y_mum < LiabThreshold
	NewPrev = mean(y_kid[keep]>LiabThreshold)
	# estimate the new heritability of liability
	NewHeritability = cor( gv_kid[keep] , y_kid[keep] )^2
	return( c(NewPrev,NewHeritability) )
	}

	# --- Results for schizophrenia-like phenotype
	# Prevalance of the disease
	Prev = 0.01
	# Heritability
	Heritability = 0.8
	NewPrev = 1 - ThresholdSelection( 1 - Prev , Heritability )
	cat( "Breeder's Equation:" , "Starting Prevalance:" , Prev , "; New Prevalance:" , NewPrev , "; Ratio:" , NewPrev / Prev , '\n' )
	NewPrev = SimulatedSelection( Prev , Heritability )[1]
	cat( "Liability Threshold Sim:" , "Starting Prevalance:" , Prev , "; New Prevalance:" , NewPrev , "; Ratio:" , NewPrev / Prev , '\n' )

	# --- Compute across several different heritabilities
	hsq_try = c(0.1,0.2,0.5,0.8)
	prev_try = c(0.001,0.005,seq(0.01,0.1,by=0.01))
	results_breeder = matrix(NA,nrow=length(hsq_try),ncol=length(prev_try))
	results_sim = matrix(NA,nrow=length(hsq_try),ncol=length(prev_try))
	seeds = 10
	for( h in 1:length(hsq_try) ) {
	Heritability = hsq_try[h]
	cat(Heritability,'\n')
	for ( i in 1:length(prev_try) ) {
	results_breeder[h,i] = 1 - ThresholdSelection( 1 - prev_try[i] , Heritability )
	# simulate more people for very common phenotypes
	res = rep(NA,seeds)
	for ( s in 1:seeds ) {
	res[s] = SimulatedSelection( prev_try[i] , Heritability , N=1e4/prev_try[i] )[1]
	}
	results_sim[h,i] = mean(res)
	}
	}

	# --- Visualize

	par(mfrow=c(1,3))
	library("RColorBrewer")
	clr = rev(brewer.pal(9,"Blues"))

	for ( h in c(2,4) ) {
	plot( prev_try , results_breeder[h,]/prev_try , ylim=c(0.5,1) , log="x" , xlab="Starting Prevalance" , ylab="New / Starting Prevalance" ,las=1 , type="l" , lwd=2 , col="gray" , main=paste("Heritability = ",hsq_try[h],sep=''))
	lines( prev_try , results_sim[h,]/prev_try , col=clr[h] ,lwd=2 )
	points( prev_try , results_sim[h,]/prev_try , col=clr[h] , pch=19 )
	legend("bottomleft",legend=c("Breeder's Equation","Simulation"),col=c("gray",clr[h]) , lty=c(1,1) , lwd=c(2,2) , bty="n")
	}

	plot( prev_try , prev_try , type="n" , bty="n" , log="x" , xlab="Starting Prevalance" , ylab="Simulation / Equation Prevalance" , ylim=c(0.8,1.1), las=1 , main="Ratio of New Prevalances")
	for( h in 1:length(hsq_try) ) {
	lines( prev_try , results_sim[h,]/results_breeder[h,] , col=clr[h] , lwd=1.5)
	points( prev_try , results_sim[h,]/results_breeder[h,] , col=clr[h] , cex=0.5 , pch=19)
	}
	abline(h=1,lty=3)
	legend("bottomleft",legend=hsq_try,col=clr,bty="n",lwd=2,title="Heritability")