Created
February 22, 2024 17:26
-
-
Save sashagusev/251d7e89470765167f8a26f4ad8abbb2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("RColorBrewer") | |
library("umap") | |
library("Rtsne") | |
# Number of markers | |
M = 2e3 | |
# Number of people | |
N = 1e3 | |
# frequencies in the ancestral population | |
p0 = runif(M,0.1,0.9) | |
# amount of drift | |
drift = 0.25 | |
# drift to one population | |
p1 = p0 + rnorm(M,0,drift) | |
p1[ p1 < 0 ] = 0 | |
p1[ p1 > 1 ] = 1 | |
# drift to another population | |
p2 = p0 + rnorm(M,0,drift) | |
p2[ p2 < 0 ] = 0 | |
p2[ p2 > 1 ] = 1 | |
# drift from the ancestral population | |
p0 = p0 + rnorm(M,0,drift) | |
p0[ p0 < 0 ] = 0 | |
p0[ p0 > 1 ] = 1 | |
# We may want to compute FST | |
#fst = ((p0 - p1)^2) / ( p0*(1-p1) + p1*(1-p0)) | |
# Define the ancestry fractions | |
pct0 = runif(N,0,1) | |
pct1 = runif(N,0,1) | |
pct2 = runif(N,0,1) | |
# Set some individuals to be two-way admixed | |
pct1[1:(N/2-50)] = 0 | |
pct2[(N-(N/2-50)):N] = 0 | |
# Normalize | |
sum = pct0+pct1+pct2 | |
pct0 = pct0 / (sum) | |
pct1 = pct1 / (sum) | |
pct2 = pct2 / (sum) | |
# Set up the matrix for population source | |
pop = matrix(NA,nrow=N,ncol=M) | |
for ( i in 1:N ) { | |
pop[i,] = rep(0,M) | |
# sample pop1 + pop2 | |
keep = sample(1:M,M*(pct1[i]+pct2[i])) | |
if ( length(keep) > 0 ) { | |
pop[i,keep] = 1 | |
# sub sample pop2 | |
keep = sample(keep,length(keep)*(pct2[i]/(pct1[i]+pct2[i])) ) | |
if ( length(keep) > 0 ) { | |
pop[i,keep] = 2 | |
} | |
} | |
} | |
# Set up the matrix for genotypes | |
mat = matrix(NA,nrow=N,ncol=M) | |
# Sample genotypes for each individual | |
for ( i in 1:N ) { | |
p_cur = rep(NA,M) | |
p_cur[ pop[i,] == 0 ] = p0[ pop[i,] == 0 ] | |
p_cur[ pop[i,] == 1 ] = p1[ pop[i,] == 1 ] | |
p_cur[ pop[i,] == 2 ] = p2[ pop[i,] == 2 ] | |
mat[i,] = rbinom(M,2,p_cur) | |
} | |
mat = mat[,apply(mat,2,sd)!=0] | |
# Color code. by whether someone has >cutoff ancestry from one pop | |
cutoff = 0.8 | |
clr_pal = brewer.pal(4,"Set2") | |
clr = rep(clr_pal[4],N) | |
# two-way admixed (one pop has 0%) are colored. black | |
clr[ pct0 == 0 | pct1 == 0 | pct2 == 0 ] = "#000000" | |
# other pops above the cutoff are colored | |
clr[ pct0 > cutoff ] = clr_pal[1] | |
clr[ pct1 > cutoff ] = clr_pal[2] | |
clr[ pct2 > cutoff ] = clr_pal[3] | |
# --- Dimensionality reduction | |
# scale for PCA | |
s_mat = scale(mat) | |
# PCA | |
pca = prcomp(t(s_mat),center=F,scale=F) | |
pca_1 = pca$rotation[,1] | |
pca_2 = pca$rotation[,2] | |
# UMAP | |
umap_run = umap(scale(mat),init="random") | |
umap_1 = umap_run$layout[,1] | |
umap_2 = umap_run$layout[,2] | |
# TSNE | |
tsne_run = Rtsne(scale(mat)) | |
tsne_1 = tsne_run$Y[,1] | |
tsne_2 = tsne_run$Y[,2] | |
# STRUCTURE (not shown but included) | |
#lda_run = LDA(mat,k=3,method="Gibbs" , control = list(alpha = 1/3) ) | |
#barplot( t(posterior(lda_run)$topics[,1:3]) , beside=F , col=clr_pal , border=clr_pal ) | |
par(mfrow=c(1,3)) | |
plot(pca_1,pca_2,main="PCA",col=clr,cex=0.5,pch=19,bty="n",xlab="PC1",ylab="PC2",cex.axis=0.5,las=1) | |
legend("bottomright",legend=c("Pop 1","Pop 2","Pop 3","2-way admix","3-way admix"),pch=19,col=c(clr_pal[1],clr_pal[2],clr_pal[3],"#000000",clr_pal[4]),bty="n") | |
plot( umap_1 , umap_2 , main="UMAP",col=clr , pch=19 , cex=0.5 ,bty="n",xlab="UMAP1",ylab="UMAP2",cex.axis=0.5,las=1) | |
plot( tsne_1 , tsne_2 , main="tSNE",col=clr , pch=19 , cex=0.5 ,bty="n",xlab="tSNE1",ylab="tSNE2",cex.axis=0.5,las=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment