Created
September 13, 2023 00:24
-
-
Save sashagusev/5eaf43c672cbca4ceecafd6e2084c1af to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Num individuals | |
N = 2e3 | |
# Num markers | |
M = 1e3 | |
# Affected cutoff | |
aff_thresh = 70 | |
# Minor allele frequency | |
maf = 0.5 | |
# simulate different parameter settings | |
for ( param in c("negative","positive_AM","positive_noAM")) { | |
# run the simulation five times | |
for ( s in 1:5 ) { | |
if ( param == "positive_AM" ) { | |
# Assortative Mating: | |
#for no assortative mating: AM_gap = 100 | |
AM_gap = 1.75 | |
# Direct heritability | |
var_direct = 0.1 | |
# Indirect heritability | |
var_indirect = 0.3 | |
# Environment | |
var_env = 1 - var_direct - var_indirect | |
# Direct/indirect correlation | |
cor_direct_indirect = 1 | |
} else if ( param == "positive_noAM" ) { | |
# Assortative Mating: | |
AM_gap = 100 | |
# Direct heritability | |
var_direct = 0.1 | |
# Indirect heritability | |
var_indirect = 0.3 | |
# Environment | |
var_env = 1 - var_direct - var_indirect | |
# Direct/indirect correlation | |
cor_direct_indirect = 1 | |
} else if ( param == "negative" ) { | |
# Assortative Mating: | |
#for no assortative mating: AM_gap = 100 | |
AM_gap = 1.75 | |
# Direct heritability | |
var_direct = 0.2 | |
# Indirect heritability | |
var_indirect = 0.6 | |
# Environment | |
var_env = 1 - var_direct - var_indirect | |
# Direct/indirect correlation | |
cor_direct_indirect = -0.8 | |
} | |
# --- | |
# sample effect sizes for direct effect (hsq_direct) | |
b_direct = rnorm(M,0,sqrt(var_direct/(M*sqrt(maf*(1-maf)) ) )) | |
# sample effect sizes for indirect effect (cor_direct_indirect , hsq_indirect) | |
b_indirect = sqrt(var_indirect/(M*sqrt(maf*(1-maf)) )) * (cor_direct_indirect * scale(b_direct) + rnorm(M,0,sqrt(1-cor_direct_indirect^2))) | |
# sample 0/1 haplotypes | |
mate1_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
mate1_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
# sample a random mate | |
mate2_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
mate2_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
# compute genetic value for direct phenotype | |
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% b_direct | |
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% b_direct | |
# compute genetic value for indirect phenotype | |
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% b_indirect | |
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% b_indirect | |
# normalize by population mean and store the population mean for future normalizations | |
mean_direct = mean( c(gv_mate1_direct,gv_mate2_direct) ) | |
mean_indirect = mean( c(gv_mate1_indirect,gv_mate2_indirect) ) | |
gv_mate1_direct = gv_mate1_direct - mean_direct | |
gv_mate2_direct = gv_mate2_direct - mean_direct | |
gv_mate1_indirect = gv_mate1_indirect - mean_indirect | |
gv_mate2_indirect = gv_mate2_indirect - mean_indirect | |
# sanity check heritability: | |
pheno_mate1 = gv_mate1_direct + gv_mate1_indirect + rnorm(N,0,sqrt(var_env)) | |
pheno_mate2 = gv_mate2_direct + gv_mate2_indirect + rnorm(N,0,sqrt(var_env)) | |
sd_multiplier = 15/sd(c(pheno_mate1,pheno_mate2)) | |
estimated_hsq = summary(lm(pheno_mate1~gv_mate1_direct+gv_mate1_indirect))$adj.r.sq | |
#cat("hsq_total", estimated_hsq, '\n' , sep='\t' ) | |
estimated_hsq = summary(lm(pheno_mate1~gv_mate1_direct))$adj.r.sq | |
#cat("hsq_direct", estimated_hsq, '\n', sep='\t' ) | |
# Induce assortative mating by reordering the mate | |
# identify a mate with that phenotype and pair up | |
# Note: there should be a more elegant way to do this! | |
AM_pair = rep(NA,N) | |
for ( i in 1:N ){ | |
keep = ( scale(pheno_mate2) - scale(pheno_mate1)[i] )^2 < (AM_gap)^2 | |
keep[i] = FALSE | |
if ( sum(keep) > 0 ) { | |
keep = sample( which(keep) , 1 ) | |
AM_pair[i] = keep | |
} | |
} | |
mate2_hap_a = mate2_hap_a[ AM_pair , ] | |
mate2_hap_b = mate2_hap_b[ AM_pair , ] | |
pheno_mate2 = pheno_mate2[ AM_pair ] | |
#cat( "AM_corr", cor( pheno_mate1 , pheno_mate2 , use="complete" ) , '\n' , sep='\t' ) | |
cat( s , param , "G0", mean(pheno_mate1*sd_multiplier) , mean(100 + pheno_mate1*sd_multiplier < aff_thresh) , '\n' , sep='\t' ) | |
# screen 10 embryos: | |
for ( emb in 1:10 ) { | |
# generate embryo by sampling maternal and paternal hapotypes | |
child_hap_a = mate1_hap_a | |
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ] | |
child_hap_b = mate2_hap_a | |
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ] | |
# compute genetic value for direct phenotype | |
gv_child_direct = ( child_hap_a + child_hap_b ) %*% b_direct - mean_direct | |
# compute genetic value for indirect phenotype | |
gv_child_indirect = ( child_hap_a + child_hap_b ) %*% b_indirect - mean_indirect | |
# pick the embryo with the highest genetic value | |
gv_to_select_on = gv_child_direct | |
if ( emb == 1 ) { | |
best_gv = gv_to_select_on | |
best_hap_a = child_hap_a | |
best_hap_b = child_hap_b | |
} else { | |
samples_to_update = gv_to_select_on > best_gv | |
best_hap_a[ samples_to_update , ] = child_hap_a[ samples_to_update , ] | |
best_hap_b[ samples_to_update , ] = child_hap_b[ samples_to_update , ] | |
best_gv[ samples_to_update ] = gv_to_select_on[ samples_to_update ] | |
} | |
#cat( emb , mean(best_gv) , '\n' ) | |
} | |
child_hap_a = best_hap_a | |
child_hap_b = best_hap_b | |
# recompute parental indirect effects | |
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% b_indirect - mean_indirect | |
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% b_indirect - mean_indirect | |
# sample embryo phenotype = direct_genetics + paternal_indirect + maternal_indirect + environment | |
gv_child_direct = ( child_hap_a + child_hap_b ) %*% b_direct - mean_direct | |
gv_child_indirect = ( child_hap_a + child_hap_b ) %*% b_indirect - mean_indirect | |
pheno_child = gv_child_direct + gv_mate1_indirect/2 + gv_mate2_indirect/2 + rnorm(N,0,sqrt(var_env)) | |
cat( s , param , "G1", mean(pheno_child*sd_multiplier) , mean(100 + pheno_child*sd_multiplier < aff_thresh) , '\n' , sep='\t' ) | |
#estimated_hsq = summary(lm(pheno_child ~ gv_child_direct + gv_mate1_indirect + gv_mate2_indirect ))$adj.r.sq | |
#cat("Estimated total heritability:", estimated_hsq, '\n') | |
#estimated_hsq = summary(lm(pheno_child ~ gv_child_direct))$adj.r.sq | |
#cat("Estimated direct heritability:", estimated_hsq, '\n') | |
# then have mating for five generations | |
for ( g in 1:5 ) { | |
# --- restart the simulation in the next generation | |
mate1_hap_a = child_hap_a | |
mate1_hap_b = child_hap_b | |
# sample a random mate | |
mate2_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
mate2_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M ) | |
# compute genetic value for direct phenotype | |
gv_mate1_direct = ( mate1_hap_a + mate1_hap_b ) %*% b_direct - mean_direct | |
gv_mate2_direct = ( mate2_hap_a + mate2_hap_b ) %*% b_direct - mean_direct | |
# compute genetic value for indirect phenotype | |
gv_mate1_indirect = ( mate1_hap_a + mate1_hap_b ) %*% b_indirect - mean_indirect | |
gv_mate2_indirect = ( mate2_hap_a + mate2_hap_b ) %*% b_indirect - mean_indirect | |
pheno_mate1 = gv_mate1_direct + gv_mate1_indirect + rnorm(N,0,sqrt(var_env)) | |
pheno_mate2 = gv_mate2_direct + gv_mate2_indirect + rnorm(N,0,sqrt(var_env)) | |
# --- Assortative Mating | |
AM_pair = rep(NA,N) | |
for ( i in 1:N ){ | |
keep = ( scale(pheno_mate2) - scale(pheno_mate1)[i] )^2 < (AM_gap)^2 | |
keep[i] = FALSE | |
if ( sum(keep) > 0 ) { | |
keep = sample( which(keep) , 1 ) | |
AM_pair[i] = keep | |
} | |
} | |
# reorder the mates for assortment | |
mate2_hap_a = mate2_hap_a[ AM_pair , ] | |
mate2_hap_b = mate2_hap_b[ AM_pair , ] | |
pheno_mate2 = pheno_mate2[ AM_pair ] | |
gv_mate2_direct = gv_mate2_direct[ AM_pair ] | |
gv_mate2_indirect = gv_mate2_indirect[ AM_pair ] | |
# generate the child | |
child_hap_a = mate1_hap_a | |
sampled_vars_mate1 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child_hap_a[ !sampled_vars_mate1 ] = mate1_hap_b[ !sampled_vars_mate1 ] | |
child_hap_b = mate2_hap_a | |
sampled_vars_mate2 = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M ) | |
child_hap_b[ !sampled_vars_mate2 ] = mate2_hap_b[ !sampled_vars_mate2 ] | |
# compute genetic value for direct phenotype | |
gv_child_direct = ( child_hap_a + child_hap_b ) %*% b_direct - mean_direct | |
# compute genetic value for indirect phenotype | |
gv_child_indirect = ( child_hap_a + child_hap_b ) %*% b_indirect - mean_indirect | |
pheno_child = gv_child_direct + gv_mate1_indirect/2 + gv_mate2_indirect/2 + rnorm(N,0,sqrt(var_env)) | |
cat( s , param , paste("G",g+1,sep=''), mean(pheno_child*sd_multiplier) , mean(100 + pheno_child*sd_multiplier < aff_thresh) , '\n' , sep='\t' ) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment