Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Last active August 4, 2023 21:21
Show Gist options
  • Save sashagusev/09a148edc2f13486f3cda566ccde9c52 to your computer and use it in GitHub Desktop.
Save sashagusev/09a148edc2f13486f3cda566ccde9c52 to your computer and use it in GitHub Desktop.
N = 50000
get_correlated_trait = function(input,r) {
r * scale(input) + sqrt(1-r^2) * rnorm(length(input),0,1)
}
get_noisy_trait = function(input,noise) {
sqrt(1-noise) * scale(input) + sqrt(noise) * rnorm(length(input),0,1)
}
# good parameters from a 2-dof grid search
AM = 0.34
ENV = 0
err = 0.42
# Data from Table 2
clark_obs = c(0.578,0.502,0.431,0.266,0.139,0.071)
set.seed(42)
# --- Simulate lineages
kins = c(1,2,3,5,7,9)
labs = c("Sibling","Sibling-r","Cousin","Cousin2","Cousin3","Cousin4")
# corrs will store the simulated correlations which we will later compare to the ground truth
corrs = rep(NA,length(kins))
# simulate individuals with some amount of wealth
parent_A = rnorm(N,0,1)
# simulate their partner with correlated wealth
parent_B = get_correlated_trait( parent_A , AM )
# measurement error
m_parent_A = get_noisy_trait(parent_A,err)
m_parent_B = get_noisy_trait(parent_A,err)
# simulate siblings
# generate two offspring who get mean wealth of the parents + noise
child_A = get_noisy_trait( scale(parent_A+parent_B) , ENV )
child_B = get_noisy_trait( scale(parent_A+parent_B) , ENV )
# measurement error
m_child_A = get_noisy_trait(child_A,err)
m_child_B = get_noisy_trait(child_B,err)
corrs[1] = cor(m_child_A,m_child_B)
# simulate their partner with correlated wealth
child_A_partner = get_correlated_trait(child_A,AM)
child_B_partner = get_correlated_trait(child_B,AM)
# generate two offspring who get mean wealth of the parents + noise
child_A_child = get_noisy_trait( scale(child_A+child_A_partner) , ENV )
child_B_child = get_noisy_trait( scale(child_B+child_B_partner) , ENV )
# add measurement error
m_child_A_child = get_noisy_trait(child_A_child,err)
m_child_B_child = get_noisy_trait(child_B_child,err)
# --- sibling-r
corrs[2] = cor(m_child_A_child,m_child_B)
# -- cousins
corrs[3] = cor(m_child_A_child,m_child_B_child)
# continue for three more generations
for ( ctr in 4:6 ) {
child_A = child_A_child
child_B = child_B_child
child_A_partner = get_correlated_trait(child_A,AM)
child_B_partner = get_correlated_trait(child_B,AM)
# generate two offspring who get mean wealth of the parents + noise
child_A_child = get_noisy_trait( scale(child_A+child_A_partner) , ENV )
child_B_child = get_noisy_trait( scale(child_B+child_B_partner) , ENV )
# -- cousins-2
# add measurement error
m_child_A_child = get_noisy_trait(child_A_child,err)
m_child_B_child = get_noisy_trait(child_B_child,err)
corrs[ctr] = cor(m_child_A_child,m_child_B_child)
}
# fit the exponential model from Clark et al to this simulated data
y = log(corrs)
x = (kins)
reg = lm(y~x)
# This is what the Clark estimate would be for
# 1. assortative mating
b = exp(reg$coef[2])
m = 2*b-1
# 2. heritability
hsq = exp(reg$coef[1])
cat( "Clark estimate from simulation:" , hsq , m , summary(reg)$r.sq , '\n' )
# --- Generate Figures
# function for visualizing the data
plot_clark = function( plot_corrs , plot_obs , plot_labs , title ) {
plot(plot_corrs,plot_obs,las=1,cex=1.5,col="yellow",pch=19,xlim=c(0,0.8),ylim=c(0,0.8),xlab="Predicted Correlation",ylab="Observed Correlation",main=title )
points(plot_corrs,plot_obs,cex=1.5,lwd=2)
text(plot_corrs,plot_obs,plot_labs,pos=4)
reg = lm(plot_obs~plot_corrs)
abline(reg,lty=2,lwd=2)
rsq = cor(plot_obs,plot_corrs)^2
model_rmse = sqrt(mean((c(0,plot_obs) - c(0,plot_corrs))^2))
legend("bottomright",legend=c(paste("R2 = ",round(rsq,2),sep=''),paste("RMSE = ",round(model_rmse,2))),bty="n")
}
par(mfrow=c(1,2))
# --- Plot the Clark model parameters from Table S2 for "Occupational Status - 1780/1859"
clark_pred = 0.722*( (1+0.624)/2 )^kins
# add in zero correlation for unrelateds
plot_clark( plot_corrs = c(0,clark_pred) ,
plot_obs = c(0,clark_obs) ,
plot_labs = c("Unrelated",labs) , title="Clark Estimate\n(assortment = 0.624, hsq = 0.722)" )
# Plot the best non-genetic model
# add in zero correlation for unrelateds
plot_clark( plot_corrs = c(0,corrs) ,
plot_obs = c(0,clark_obs) ,
plot_labs = c("Unrelated",labs) , title=paste("Fit from non-genetic model\n(assortment=",AM,", measurement error=",err,")",sep='') )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment