Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created September 13, 2023 00:24
Show Gist options
  • Save sashagusev/5eaf43c672cbca4ceecafd6e2084c1af to your computer and use it in GitHub Desktop.
Save sashagusev/5eaf43c672cbca4ceecafd6e2084c1af to your computer and use it in GitHub Desktop.
# Num individuals
N = 2e3
# Num markers
M = 1e3
# Affected cutoff
aff_thresh = 70
# Minor allele frequency
maf = 0.5
# simulate different parameter settings
for ( param in c("negative","positive_AM","positive_noAM")) {
# run the simulation five times
for ( s in 1:5 ) {
if ( param == "positive_AM" ) {
# Assortative Mating:
#for no assortative mating: AM_gap = 100
AM_gap = 1.75
# Direct heritability
var_direct = 0.1
# Indirect heritability
var_indirect = 0.3
# Environment
var_env = 1 - var_direct - var_indirect
# Direct/indirect correlation
cor_direct_indirect = 1
} else if ( param == "positive_noAM" ) {
# Assortative Mating:
AM_gap = 100
# Direct heritability
var_direct = 0.1
# Indirect heritability
var_indirect = 0.3
# Environment
var_env = 1 - var_direct - var_indirect
# Direct/indirect correlation
cor_direct_indirect = 1
} else if ( param == "negative" ) {
# Assortative Mating:
#for no assortative mating: AM_gap = 100
AM_gap = 1.75
# Direct heritability
var_direct = 0.2
# Indirect heritability
var_indirect = 0.6
# Environment
var_env = 1 - var_direct - var_indirect
# Direct/indirect correlation
cor_direct_indirect = -0.8
}
# ---
# sample effect sizes for direct effect (hsq_direct)
b_direct = rnorm(M,0,sqrt(var_direct/(M*sqrt(maf*(1-maf)) ) ))
# sample effect sizes for indirect effect (cor_direct_indirect , hsq_indirect)
b_indirect = sqrt(var_indirect/(M*sqrt(maf*(1-maf)) )) * (cor_direct_indirect * scale(b_direct) + rnorm(M,0,sqrt(1-cor_direct_indirect^2)))
# sample 0/1 haplotypes
mate1_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate1_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
# sample a random mate
mate2_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate2_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
# compute genetic value for direct phenotype
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% b_direct
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% b_direct
# compute genetic value for indirect phenotype
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% b_indirect
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% b_indirect
# normalize by population mean and store the population mean for future normalizations
mean_direct = mean( c(gv_mate1_direct,gv_mate2_direct) )
mean_indirect = mean( c(gv_mate1_indirect,gv_mate2_indirect) )
gv_mate1_direct = gv_mate1_direct - mean_direct
gv_mate2_direct = gv_mate2_direct - mean_direct
gv_mate1_indirect = gv_mate1_indirect - mean_indirect
gv_mate2_indirect = gv_mate2_indirect - mean_indirect
# sanity check heritability:
pheno_mate1 = gv_mate1_direct + gv_mate1_indirect + rnorm(N,0,sqrt(var_env))
pheno_mate2 = gv_mate2_direct + gv_mate2_indirect + rnorm(N,0,sqrt(var_env))
sd_multiplier = 15/sd(c(pheno_mate1,pheno_mate2))
estimated_hsq = summary(lm(pheno_mate1~gv_mate1_direct+gv_mate1_indirect))$adj.r.sq
#cat("hsq_total", estimated_hsq, '\n' , sep='\t' )
estimated_hsq = summary(lm(pheno_mate1~gv_mate1_direct))$adj.r.sq
#cat("hsq_direct", estimated_hsq, '\n', sep='\t' )
# Induce assortative mating by reordering the mate
# identify a mate with that phenotype and pair up
# Note: there should be a more elegant way to do this!
AM_pair = rep(NA,N)
for ( i in 1:N ){
keep = ( scale(pheno_mate2) - scale(pheno_mate1)[i] )^2 < (AM_gap)^2
keep[i] = FALSE
if ( sum(keep) > 0 ) {
keep = sample( which(keep) , 1 )
AM_pair[i] = keep
}
}
mate2_hap_a = mate2_hap_a[ AM_pair , ]
mate2_hap_b = mate2_hap_b[ AM_pair , ]
pheno_mate2 = pheno_mate2[ AM_pair ]
#cat( "AM_corr", cor( pheno_mate1 , pheno_mate2 , use="complete" ) , '\n' , sep='\t' )
cat( s , param , "G0", mean(pheno_mate1*sd_multiplier) , mean(100 + pheno_mate1*sd_multiplier < aff_thresh) , '\n' , sep='\t' )
# screen 10 embryos:
for ( emb in 1:10 ) {
# generate embryo by sampling maternal and paternal hapotypes
child_hap_a = mate1_hap_a
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ]
child_hap_b = mate2_hap_a
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ]
# compute genetic value for direct phenotype
gv_child_direct = ( child_hap_a + child_hap_b ) %*% b_direct - mean_direct
# compute genetic value for indirect phenotype
gv_child_indirect = ( child_hap_a + child_hap_b ) %*% b_indirect - mean_indirect
# pick the embryo with the highest genetic value
gv_to_select_on = gv_child_direct
if ( emb == 1 ) {
best_gv = gv_to_select_on
best_hap_a = child_hap_a
best_hap_b = child_hap_b
} else {
samples_to_update = gv_to_select_on > best_gv
best_hap_a[ samples_to_update , ] = child_hap_a[ samples_to_update , ]
best_hap_b[ samples_to_update , ] = child_hap_b[ samples_to_update , ]
best_gv[ samples_to_update ] = gv_to_select_on[ samples_to_update ]
}
#cat( emb , mean(best_gv) , '\n' )
}
child_hap_a = best_hap_a
child_hap_b = best_hap_b
# recompute parental indirect effects
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% b_indirect - mean_indirect
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% b_indirect - mean_indirect
# sample embryo phenotype = direct_genetics + paternal_indirect + maternal_indirect + environment
gv_child_direct = ( child_hap_a + child_hap_b ) %*% b_direct - mean_direct
gv_child_indirect = ( child_hap_a + child_hap_b ) %*% b_indirect - mean_indirect
pheno_child = gv_child_direct + gv_mate1_indirect/2 + gv_mate2_indirect/2 + rnorm(N,0,sqrt(var_env))
cat( s , param , "G1", mean(pheno_child*sd_multiplier) , mean(100 + pheno_child*sd_multiplier < aff_thresh) , '\n' , sep='\t' )
#estimated_hsq = summary(lm(pheno_child ~ gv_child_direct + gv_mate1_indirect + gv_mate2_indirect ))$adj.r.sq
#cat("Estimated total heritability:", estimated_hsq, '\n')
#estimated_hsq = summary(lm(pheno_child ~ gv_child_direct))$adj.r.sq
#cat("Estimated direct heritability:", estimated_hsq, '\n')
# then have mating for five generations
for ( g in 1:5 ) {
# --- restart the simulation in the next generation
mate1_hap_a = child_hap_a
mate1_hap_b = child_hap_b
# sample a random mate
mate2_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate2_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
# compute genetic value for direct phenotype
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% b_direct - mean_direct
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% b_direct - mean_direct
# compute genetic value for indirect phenotype
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% b_indirect - mean_indirect
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% b_indirect - mean_indirect
pheno_mate1 = gv_mate1_direct + gv_mate1_indirect + rnorm(N,0,sqrt(var_env))
pheno_mate2 = gv_mate2_direct + gv_mate2_indirect + rnorm(N,0,sqrt(var_env))
# --- Assortative Mating
AM_pair = rep(NA,N)
for ( i in 1:N ){
keep = ( scale(pheno_mate2) - scale(pheno_mate1)[i] )^2 < (AM_gap)^2
keep[i] = FALSE
if ( sum(keep) > 0 ) {
keep = sample( which(keep) , 1 )
AM_pair[i] = keep
}
}
# reorder the mates for assortment
mate2_hap_a = mate2_hap_a[ AM_pair , ]
mate2_hap_b = mate2_hap_b[ AM_pair , ]
pheno_mate2 = pheno_mate2[ AM_pair ]
gv_mate2_direct = gv_mate2_direct[ AM_pair ]
gv_mate2_indirect = gv_mate2_indirect[ AM_pair ]
# generate the child
child_hap_a = mate1_hap_a
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ]
child_hap_b = mate2_hap_a
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ]
# compute genetic value for direct phenotype
gv_child_direct = ( child_hap_a + child_hap_b ) %*% b_direct - mean_direct
# compute genetic value for indirect phenotype
gv_child_indirect = ( child_hap_a + child_hap_b ) %*% b_indirect - mean_indirect
pheno_child = gv_child_direct + gv_mate1_indirect/2 + gv_mate2_indirect/2 + rnorm(N,0,sqrt(var_env))
cat( s , param , paste("G",g+1,sep=''), mean(pheno_child*sd_multiplier) , mean(100 + pheno_child*sd_multiplier < aff_thresh) , '\n' , sep='\t' )
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment