Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created July 2, 2023 04:54
Show Gist options
  • Save sashagusev/073af07c561bc896715cb39818346d85 to your computer and use it in GitHub Desktop.
Save sashagusev/073af07c561bc896715cb39818346d85 to your computer and use it in GitHub Desktop.
get_correlated_trait = function(input,r) {
r * (input) + sqrt(1-r^2) * rnorm(length(input),0,1)
}
get_noisy_trait = function(input,noise) {
sqrt(1-noise) * (input) + sqrt(noise) * rnorm(length(input),0,1)
}
simulate_non_genetic = function(N,AM,ENV,err) {
set.seed(42)
kins = c(1,2,3,5,7,9)
corrs = rep(NA,length(kins))
# simulate individuals with some amount of wealth
parent_A = rnorm(N,0,1)
# simulate their partner with correlated wealth
parent_B = get_correlated_trait( parent_A , AM )
# measurement error
m_parent_A = get_noisy_trait(parent_A,err)
m_parent_B = get_noisy_trait(parent_A,err)
# simulate siblings
# generate two offspring who get mean wealth of the parents + noise
child_A = get_noisy_trait( ((parent_A+parent_B)/2) , ENV )
child_B = get_noisy_trait( ((parent_A+parent_B)/2) , ENV )
# measurement error
m_child_A = get_noisy_trait(child_A,err)
m_child_B = get_noisy_trait(child_B,err)
corrs[1] = cor(m_child_A,m_child_B)
# simulate their partner with correlated wealth
child_A_partner = get_correlated_trait(child_A,AM)
child_B_partner = get_correlated_trait(child_B,AM)
# generate two offspring who get mean wealth of the parents + noise
child_A_child = get_noisy_trait( ((child_A+child_A_partner)/2) , ENV )
child_B_child = get_noisy_trait( ((child_B+child_B_partner)/2) , ENV )
# add measurement error
m_child_A_child = get_noisy_trait(child_A_child,err)
m_child_B_child = get_noisy_trait(child_B_child,err)
# --- sibling-r
corrs[2] = cor(m_child_A_child,m_child_B)
# -- cousins
corrs[3] = cor(m_child_A_child,m_child_B_child)
# continue for three more generations
for ( ctr in 4:6 ) {
child_A = child_A_child
child_B = child_B_child
child_A_partner = get_correlated_trait(child_A,AM)
child_B_partner = get_correlated_trait(child_B,AM)
# generate two offspring who get mean wealth of the parents + noise
child_A_child = get_noisy_trait( ((child_A+child_A_partner)/2) , ENV )
child_B_child = get_noisy_trait( ((child_B+child_B_partner)/2) , ENV )
# -- cousins-2
# add measurement error
m_child_A_child = get_noisy_trait(child_A_child,err)
m_child_B_child = get_noisy_trait(child_B_child,err)
corrs[ctr] = cor(m_child_A_child,m_child_B_child)
}
corrs
}
labs = c("Sibling","Sibling-r","Cousin","Cousin2","Cousin3","Cousin4")
kins = c(1,2,3,5,7,9)
# Clark Occ-Stat correlations:
clark_obs = c(0.578,0.502,0.431,0.266,0.139,0.071)
# Best fitting parameters
N = 50000
AM = 0.55
ENV = 0
err = 0.35
corrs = simulate_non_genetic(N,AM,ENV,err)
plot_clark = function( plot_corrs , plot_obs , plot_labs , title ) {
plot(plot_corrs,plot_obs,las=1,cex=1.5,col="yellow",pch=19,xlim=c(0,0.8),ylim=c(0,0.8),xlab="Predicted Correlation",ylab="Observed Correlation",main=title )
points(plot_corrs,plot_obs,cex=1.5,lwd=2)
text(plot_corrs,plot_obs,plot_labs,pos=4)
reg = lm(plot_obs~plot_corrs)
abline(reg,lty=2,lwd=2)
rsq = cor(plot_obs,plot_corrs)^2
model_rmse = sqrt(mean((c(0,plot_corrs) - c(0,plot_obs))^2))
legend("bottomright",legend=c(paste("R2 = ",round(rsq,2),sep=''),paste("RMSE = ",round(model_rmse,2))),bty="n")
}
par(mfrow=c(1,2))
clark_pred = 0.722*( (1+0.624)/2 )^kins
plot_clark( plot_corrs = c(0,clark_pred) ,
plot_obs = c(0,clark_obs) ,
plot_labs = c("Unrelated",labs) , title="Clark Estimate\n(assortment = 0.624, hsq = 0.722)" )
plot_clark( plot_corrs = c(0,corrs) ,
plot_obs = c(0,clark_obs) ,
plot_labs = c("Unrelated",labs) , title=paste("Fit from non-genetic model\n(assortment=",AM,", measurement error=",err,")",sep='') )
### Find best parameters for all Clark results
labs = c("Sibling","Sibling-r","Cousin","Cousin2","Cousin3","Cousin4")
kins = c(1,2,3,5,7,9)
clark_obs = matrix(nrow=9,ncol=length(kins))
clark_obs[,1] = c(0.375,0.336,0.269,0.168,0.578,0.522,0.479,0.326,0.431)
clark_obs[,2] = c(0.256, 0.246, 0.172, 0.057, 0.502, 0.380, 0.382, 0.228, 0.273)
clark_obs[,3] = c(0.208, 0.211, 0.144, 0.061, 0.431, 0.299, 0.291, 0.173, 0.234)
clark_obs[,4] = c(0.133, 0.141, 0.073, 0.069, 0.266, 0.234, 0.176, 0.186, 0.200)
clark_obs[,5] = c(0.097, 0.100, 0.052, 0.053, 0.139, 0.174, 0.071, 0.085, 0.150)
clark_obs[,6] = c(0.074, 0.078, 0.028, 0.021, 0.071, 0.066, 0.080, 0.014, 0.107)
clark_measures = c("ModStat 1910–1997", "House value 1910–1997", "IMD 1910–1997", "CoDir 1910–1997", "OccStat 1780–1859", "OccStat 1860–1919", "HighEd 1780–1859", "HighEd 1860–1919", "Literacy 1725–1869" )
for ( i in 1:nrow(clark_obs) ) {
cat("\nOptimizing",clark_measures[i],'\n')
current_obs = clark_obs[i,]
# fit the Clark model
y = log(current_obs)
x = (kins)
reg = lm(y~x)
# compute inferred "assortative mating" and "heritability"
b = exp(reg$coef[2])
m = 2*b-1
hsq = exp(reg$coef[1])
clark_pred = hsq*( (1+m)/2 )^kins
rsq = cor( c(0,clark_pred) , c(0,current_obs) )^2
rmse = sqrt(mean((c(0,clark_pred) - c(0,current_obs))^2))
cat("Clark model:",rsq,rmse,hsq,m,'\n')
all.AM = seq(0.2,1,by=0.05)
all.ENV = seq(0,1,by=0.05)
all.ENV = 0
all.err = seq(0,1,by=0.05)
max_rsq = 0
min_rmse = 100
best_param = c(NA,NA,NA,NA)
for ( AM in all.AM ) {
for ( ENV in all.ENV ) {
for ( err in all.err ) {
corrs = simulate_non_genetic(N,AM,ENV,err)
rsq = cor( c(0,corrs) , c(0,current_obs) )^2
rmse = sqrt(mean((c(0,corrs) - c(0,current_obs))^2))
if ( rmse < min_rmse ) {
min_rmse = rmse
best_param = c(rsq,rmse,AM,err)
}
}
}
}
cat( "Non-genetic model:", best_param , '\n' )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment