Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created February 22, 2024 17:26
Show Gist options
  • Save sashagusev/251d7e89470765167f8a26f4ad8abbb2 to your computer and use it in GitHub Desktop.
Save sashagusev/251d7e89470765167f8a26f4ad8abbb2 to your computer and use it in GitHub Desktop.
library("RColorBrewer")
library("umap")
library("Rtsne")
# Number of markers
M = 2e3
# Number of people
N = 1e3
# frequencies in the ancestral population
p0 = runif(M,0.1,0.9)
# amount of drift
drift = 0.25
# drift to one population
p1 = p0 + rnorm(M,0,drift)
p1[ p1 < 0 ] = 0
p1[ p1 > 1 ] = 1
# drift to another population
p2 = p0 + rnorm(M,0,drift)
p2[ p2 < 0 ] = 0
p2[ p2 > 1 ] = 1
# drift from the ancestral population
p0 = p0 + rnorm(M,0,drift)
p0[ p0 < 0 ] = 0
p0[ p0 > 1 ] = 1
# We may want to compute FST
#fst = ((p0 - p1)^2) / ( p0*(1-p1) + p1*(1-p0))
# Define the ancestry fractions
pct0 = runif(N,0,1)
pct1 = runif(N,0,1)
pct2 = runif(N,0,1)
# Set some individuals to be two-way admixed
pct1[1:(N/2-50)] = 0
pct2[(N-(N/2-50)):N] = 0
# Normalize
sum = pct0+pct1+pct2
pct0 = pct0 / (sum)
pct1 = pct1 / (sum)
pct2 = pct2 / (sum)
# Set up the matrix for population source
pop = matrix(NA,nrow=N,ncol=M)
for ( i in 1:N ) {
pop[i,] = rep(0,M)
# sample pop1 + pop2
keep = sample(1:M,M*(pct1[i]+pct2[i]))
if ( length(keep) > 0 ) {
pop[i,keep] = 1
# sub sample pop2
keep = sample(keep,length(keep)*(pct2[i]/(pct1[i]+pct2[i])) )
if ( length(keep) > 0 ) {
pop[i,keep] = 2
}
}
}
# Set up the matrix for genotypes
mat = matrix(NA,nrow=N,ncol=M)
# Sample genotypes for each individual
for ( i in 1:N ) {
p_cur = rep(NA,M)
p_cur[ pop[i,] == 0 ] = p0[ pop[i,] == 0 ]
p_cur[ pop[i,] == 1 ] = p1[ pop[i,] == 1 ]
p_cur[ pop[i,] == 2 ] = p2[ pop[i,] == 2 ]
mat[i,] = rbinom(M,2,p_cur)
}
mat = mat[,apply(mat,2,sd)!=0]
# Color code. by whether someone has >cutoff ancestry from one pop
cutoff = 0.8
clr_pal = brewer.pal(4,"Set2")
clr = rep(clr_pal[4],N)
# two-way admixed (one pop has 0%) are colored. black
clr[ pct0 == 0 | pct1 == 0 | pct2 == 0 ] = "#000000"
# other pops above the cutoff are colored
clr[ pct0 > cutoff ] = clr_pal[1]
clr[ pct1 > cutoff ] = clr_pal[2]
clr[ pct2 > cutoff ] = clr_pal[3]
# --- Dimensionality reduction
# scale for PCA
s_mat = scale(mat)
# PCA
pca = prcomp(t(s_mat),center=F,scale=F)
pca_1 = pca$rotation[,1]
pca_2 = pca$rotation[,2]
# UMAP
umap_run = umap(scale(mat),init="random")
umap_1 = umap_run$layout[,1]
umap_2 = umap_run$layout[,2]
# TSNE
tsne_run = Rtsne(scale(mat))
tsne_1 = tsne_run$Y[,1]
tsne_2 = tsne_run$Y[,2]
# STRUCTURE (not shown but included)
#lda_run = LDA(mat,k=3,method="Gibbs" , control = list(alpha = 1/3) )
#barplot( t(posterior(lda_run)$topics[,1:3]) , beside=F , col=clr_pal , border=clr_pal )
par(mfrow=c(1,3))
plot(pca_1,pca_2,main="PCA",col=clr,cex=0.5,pch=19,bty="n",xlab="PC1",ylab="PC2",cex.axis=0.5,las=1)
legend("bottomright",legend=c("Pop 1","Pop 2","Pop 3","2-way admix","3-way admix"),pch=19,col=c(clr_pal[1],clr_pal[2],clr_pal[3],"#000000",clr_pal[4]),bty="n")
plot( umap_1 , umap_2 , main="UMAP",col=clr , pch=19 , cex=0.5 ,bty="n",xlab="UMAP1",ylab="UMAP2",cex.axis=0.5,las=1)
plot( tsne_1 , tsne_2 , main="tSNE",col=clr , pch=19 , cex=0.5 ,bty="n",xlab="tSNE1",ylab="tSNE2",cex.axis=0.5,las=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment