Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Last active June 2, 2023 13:22
Show Gist options
  • Save sashagusev/9d4574a09db8ce34e9cf270084fe9184 to your computer and use it in GitHub Desktop.
Save sashagusev/9d4574a09db8ce34e9cf270084fe9184 to your computer and use it in GitHub Desktop.
library("NMF")
library("ggplot2")
library("reshape2")
N_cells = 500
N_genes = 100
N_cells_program = 20
N_genes_program = 20
seeds = 10
mat_results = data.frame( "Program" = vector() , "Dropout" = vector() , "PCA" = vector() , "NMF" = vector() , "Scaled NMF" = vector() , "Truncated NMF" = vector() )
num_factors = 2
for ( dropout in c(0.2,0.05,0) ) {
for ( program_type in c("Random","FoldChange_2","FoldChange_0.5") ) {
for ( s in 1:seeds ) {
# sample mean gene expression values
gene_exp_mean = 100 * rgamma( N_genes , 2 , 2 )
# define cells with program
program_cell = rep(FALSE,N_cells)
program_cell[ sample(1:N_cells,N_cells_program) ] = TRUE
# define genes with program
program_gene = rep(FALSE,N_genes)
program_gene[ sample(1:N_genes,N_genes_program) ] = TRUE
if ( program_type == "Random" ) {
# sample expression values for the program
program_mean = 100 * rgamma( N_genes_program , 2 , 2 )
} else if ( program_type == "FoldChange_2") {
program_mean = gene_exp_mean[ program_gene ] * 2
} else if ( program_type == "FoldChange_0.5") {
program_mean = gene_exp_mean[ program_gene ] * 0.5
}
# sample total UMIs around 10k/cell
N_UMIs = rnorm(N_cells , 3000 , 500 )
N_UMIs[ N_UMIs < 0 ] = 0
counts = matrix(NA,nrow=N_cells,ncol=N_genes)
# populate each cell
for ( i in 1:N_cells) {
cur_exp = rpois( N_genes , gene_exp_mean )
if ( program_cell[i] ) {
cur_exp[ program_gene ] = rpois( N_genes_program , program_mean )
}
# add dropout
if ( dropout > 0 ) {
cur_exp[ sample( 1:N_genes , dropout * N_genes ) ] = 0
}
# normalize
cur_exp = as.integer(N_UMIs[i] * (cur_exp/sum(cur_exp)))
counts[i,] = 1e6 * cur_exp / N_UMIs[i]
}
counts = counts[ , apply(counts,2,sd) != 0 ]
# PCA
pca = prcomp( t(scale(counts)) , center = FALSE, scale = FALSE)
pca_factor = pca$rotation[,"PC1"]
# plot(pca$rotation[,"PC1"] , pca$rotation[,"PC2"] , col=program_cell+1)
# Standard NMF
nmf_out = nmf( counts , num_factors )
# Scaled/truncated NMF
trunc_counts = scale(counts)
trunc_counts[ trunc_counts < 0 ] = 0
nmf2_out = nmf( trunc_counts , num_factors )
# Scaled only
sds = apply(counts,2,sd)
trunc_counts = counts
for ( i in 1:nrow(trunc_counts) ) trunc_counts[i,] = counts[i,] / sds
nmf3_out = nmf( trunc_counts , num_factors )
# total r2 captured by all components
#cur_results = data.frame( "Program" = program_type ,
# "PCA" = summary( lm(program_cell ~ pca$rotation[,1:2] ))$adj.r.sq ,
# "NMF" = summary( lm(program_cell ~ basis(nmf_out)[,1:2] ))$adj.r.sq ,
# "Scaled NMF" = summary( lm(program_cell ~ basis(nmf3_out)[,1:2] ))$adj.r.sq ,
# "Truncated NMF" = summary( lm(program_cell ~ basis(nmf2_out)[,1:2] ))$adj.r.sq)
# best r2 across any component
cur_results = data.frame( "Program" = program_type ,
"Dropout" = dropout ,
"PCA" = max( cor(program_cell , pca$rotation[,1:2])^2 ),
"NMF" = max( cor(program_cell , basis(nmf_out)[,1:num_factors] )^2 ) ,
"Scaled NMF" = max(cor(program_cell , basis(nmf3_out)[,1:num_factors] )^2) ,
"Truncated NMF" = max(cor(program_cell , basis(nmf2_out)[,1:num_factors] )^2 ) )
cat( unlist(cur_results) , '\n' )
mat_results = rbind(mat_results,cur_results)
}
}
}
df = melt( mat_results , id.vars = c("Program","Dropout") , variable.name = "Method" , value.name = "R2" )
labs = c("Program = Random","Program = Fold Change 2","Program = Fold Change 0.5")
names(labs) = c("Random","FoldChange_2","FoldChange_0.5")
ggplot(df, aes(x=Method, y=R2 , color=Method)) +
ylim(0,1) +
geom_violin(trim = TRUE) +
geom_boxplot(width=0.25) +
facet_grid(Program ~ Dropout , labeller = label_both ) +
theme_bw() + theme(axis.text.x=element_blank())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment