Skip to content

Instantly share code, notes, and snippets.

@yannabraham
Last active June 29, 2021 08:16
Show Gist options
  • Save yannabraham/9890df6f18fc1e337a42dd8ec89d4e1b to your computer and use it in GitHub Desktop.
Save yannabraham/9890df6f18fc1e337a42dd8ec89d4e1b to your computer and use it in GitHub Desktop.
The source code to my presentation "Embracing complexity: From discrete analysis to multivariate visualizations" (2021-06-24 - Front Line Genomics)

Embracing complexity - from discrete analysis to multivariate visualizations

This gist contains the source code used to generate the plots presented at the Front Line Genomics Single Cell and Spatial Omics ONLINE webinar. Owing to difficulties with FlowRepository the code is provided together with a snapshot of the original data (~10%) to allow anyone to replicate the analysis, although some visualizations may look different.

#
# Author: ABRAHYA1
###############################################################################
rm(list=ls())
graphics.off()
## libraries
library(ggplot2)
library(Radviz)
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
library(kohonen)
library(hilbertSimilarity)
library(cytofan)
library(tsne)
set.seed(20210624)
## load the data
channels <- c('CD64','CD16','CD83','CD163','CD86','CD14',
'MSR1','TREM2','CD38','CD206','MerTK')
live.df <- readRDS("live_cells.rds") %>%
ungroup()
treatment.cols <- setNames(scales::hue_pal()(nlevels(live.df$treatment)),
levels(live.df$treatment))
## TREM2 CD206
live.df %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
group_by(treatment) %>%
sample_n(10000) %>%
ggplot(aes(x = TREM2,
y = CD206))+
geom_point(aes(color = treatment),
alpha = 1/5)+
scale_color_manual(values = treatment.cols)+
theme_light(base_size = 16)
live.df %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
group_by(treatment) %>%
sample_n(10000) %>%
ggplot(aes(x = TREM2,
y = CD206))+
geom_point(aes(color = treatment),
alpha = 1/5)+
geom_hline(yintercept = 1,
col = 2, lty = 2, size = 1.1)+
geom_vline(xintercept = 1,
col = 2, lty = 2, size = 1.1)+
scale_color_manual(values = treatment.cols)+
theme_light(base_size = 16)
live.df %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
mutate(isDex = CD206>1 & TREM2<1) %>%
group_by(treatment,donor_id,source) %>%
summarize(positive = sum(isDex)/length(isDex)) %>%
ggplot(aes(x = treatment,
y = positive))+
geom_line(aes(group = donor_id),
color = "grey80")+
geom_point(aes(color = treatment),
size = 3)+
scale_y_continuous(label = scales::percent_format())+
scale_color_manual(values = treatment.cols)+
ylab("percent CD206+ TREM2- cells")+
theme_light(base_size = 16)
## quick tSNE
proj.df <- live.df %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
group_by(treatment) %>%
sample_n(250) %>%
ungroup()
proj <- proj.df %>%
select(one_of(channels)) %>%
as.matrix() %>%
tsne()
proj.df$tx <- proj[,1]
proj.df$ty <- proj[,2]
ggplot(proj.df,
aes(x = tx,
y = ty))+
geom_point(aes(color = treatment),
size = 3,
alpha = 1/2)+
xlab("tSNE-1")+
ylab("tSNE-2")+
scale_color_manual(values = treatment.cols)+
theme_light(base_size = 16)
ggplot(proj.df,
aes(x = tx,
y = ty))+
geom_point(aes(color = CD206),
size = 3,
alpha = 1/2)+
xlab("tSNE-1")+
ylab("tSNE-2")+
scale_color_gradient(low = "grey80",
high = "orangered2")+
theme_light(base_size = 16)
## prepare Radviz
all.S <- make.S(c('CD14','CD86','CD64','TREM2','MSR1','CD83',
'MerTK','CD16','CD206','CD163','CD38'))
live.df %>%
group_by(treatment,dose) %>%
summarize_at(vars(all_of(rownames(all.S))),median)
live.df %>%
group_by(source,treatment,dose) %>%
summarize_at(vars(all_of(rownames(all.S))),
list(lower = min,
upper = ~ quantile(.,0.975))) %>%
gather("Channel","value",matches("(lower|upper)")) %>%
separate(Channel,c("Channel","stat"),sep="_") %>%
unite("i",c(source,treatment,dose,stat),remove = FALSE) %>%
ggplot(aes(x = Channel,
y = value))+
geom_line(aes(group = i))+
geom_point(aes(color = source),
size = 3)+
facet_wrap(~treatment)+
scale_color_manual(values = treatment.cols)+
theme_light(base_size = 16)
do.scaled.L <- function(x) do.L(x,fun = function(y) quantile(y,c(0,0.975)))
live.rv <- do.radviz(live.df,
all.S,
trans = do.scaled.L)
plot(live.rv)+
geom_density_2d(data = . %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = treatment),
size = 1.1)+
scale_color_manual(values = treatment.cols)
plot(live.rv)+
geom_density_2d(data = . %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = treatment),
size = 1.1)+
scale_color_manual(values = treatment.cols)
plot(subset(live.rv,
!is.na(dose)))+
geom_density_2d(data = live.rv$proj$data %>%
filter(is.na(dose)) %>%
group_by(treatment) %>%
sample_n(20000) %>%
ungroup() %>%
mutate(dose = sample(c("1uM","10uM"),
length(treatment),
replace = TRUE),
dose = factor(dose,
levels = c("1uM","10uM"))),
aes(color = treatment),
size = 1.1)+
geom_density_2d(data = . %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = treatment),
size = 1.1)+
scale_color_manual(values = treatment.cols)+
facet_grid(cols = vars(dose),
drop = TRUE)
## prepare Freeviz
# train <- live.df %>%
# group_by(condition) %>%
# sample_n(2000)
#
# fv.S <- do.optimFreeviz(train[,rownames(all.S)],
# train$condition)
# saveRDS(fv.S,
# "output/presentation/fv.rds")
fv.S <- readRDS("fv.rds")
live.fv <- do.radviz(live.df,
fv.S,
trans = identity)
plot(live.fv)
plot(live.fv)+
geom_density_2d(data = . %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = treatment),
size = 1.1)+
scale_color_manual(values = treatment.cols)
plot(subset(live.fv,
!is.na(dose)))+
geom_density_2d(data = live.fv$proj$data %>%
filter(is.na(dose)) %>%
group_by(treatment) %>%
sample_n(20000) %>%
ungroup() %>%
mutate(dose = sample(c("1uM","10uM"),
length(treatment),
replace = TRUE),
dose = factor(dose,
levels = c("1uM","10uM"))),
aes(color = treatment),
size = 1.1)+
geom_density_2d(data = . %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = treatment),
size = 1.1)+
scale_color_manual(values = treatment.cols)+
facet_grid(cols = vars(dose),
drop = TRUE)
# perfect!!
plot(subset(live.fv,
!is.na(dose)))+
geom_density_2d(data = . %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = dose),
size = 1.1)+
facet_grid(cols = vars(treatment),
drop = TRUE)
## Polyfunctionality
## create the SOM map
nbins <- 10
## Generate hilbert bins to downsample the data in a density-driven way
live.cuts <- make.cut(as.matrix(live.fv$proj$data[,c("rx","ry")]),
n=floor(1.5*nbins)+1)
live.cuts <- do.cut(as.matrix(live.fv$proj$data[,c("rx","ry")]),
live.cuts,
type = "fixed")
live.hc <- do.hilbert(live.cuts,
horder = floor(1.5*nbins))
live.hc <- factor(live.hc)
som.grid <- somgrid(xdim = nbins,
ydim = nbins,
topo = 'hexagonal',
toroidal = T)
hcweights <- table(live.hc)/length(live.hc)
hc.sub <- lapply(levels(live.hc),function(i) {
lapply(levels(live.df$condition),function(live.cond) {
if(sum(live.hc==i &
live.df$condition==live.cond)>0) {
sample(which(live.hc==i &
live.df$condition==live.cond),100,replace = TRUE)
}
})
})
hc.sub <- unlist(unlist(hc.sub))
som.model <- som(as.matrix(live.fv$proj$data[hc.sub,c("rx","ry")]),
grid = som.grid)
live.som <- predict(object = som.model,
newdata = as.matrix(live.fv$proj$data[,c("rx","ry")]))
live.som <- factor(live.som$unit.classif)
plot(live.fv)+
geom_density_2d(data = . %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = treatment),
size = 1.1)+
geom_point(data = . %>%
mutate(hIndex = live.som) %>%
group_by(hIndex) %>%
summarize(rx = median(rx),
ry = median(ry),
N = length(hIndex)),
aes(size = N),
alpha = 1/2)+
scale_color_manual(values = treatment.cols)+
scale_size_area()
N <- length(unique(live.som))
live.clr <- live.fv$proj$data %>%
mutate(hIndex = live.som) %>%
count(name,source,donor_id,treatment,dose,condition,hIndex) %>%
group_by(name) %>%
mutate(pc = n/sum(n)) %>%
do({
df <- .
## minimum rouding error ~ pseudo count
minim <- 1/(nrow(df)+1)
## how many missing bins?
m <- N-length(unique(df$hIndex))
## zero-replacement
ta <- minim*(m+1)*(N-m)/N^2
## adjustment
ts <- minim*m*(m+1)/N^2
## perform adjustment
df %>%
mutate(ta = ta,
pc = pc - pc*ts) %>%
complete(hIndex,
nesting(name,
source,
donor_id,
treatment,
dose,
condition),
fill=list(n=0,
pc=ta))
}) %>%
group_by(name) %>%
mutate(clr = log2(pc/exp(mean(log2(pc)))))
live.clr %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
group_by(hIndex,treatment) %>%
summarize(clr = mean(clr)) %>%
spread(treatment,clr) %>%
ggplot(aes(x = `M-CSF`,
y = Dex))+
geom_point(size = 3)+
geom_abline(slope = 1,
intercept = 0,
col = 2, lty = 2)+
theme_light(base_size = 16)
live.clr %>%
ungroup() %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
select(donor_id, treatment, hIndex, clr) %>%
group_by(donor_id,hIndex) %>%
filter(length(treatment)==2) %>%
# filter(donor_id=="Mono396",
# hIndex=="2") %>%
spread(treatment,clr) %>%
ggplot(aes(x = `M-CSF`,
y = Dex))+
geom_point(aes(color = donor_id),
size = 3)+
geom_abline(slope = 1,
intercept = 0,
col = 2, lty = 2)+
theme_light(base_size = 16)
live.diffs <- live.clr %>%
group_by(hIndex) %>%
filter(sum(n)>100) %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
# filter(treatment %in% c("M-CSF","cpd6")) %>%
group_by(hIndex) %>%
nest() %>%
ungroup() %>%
mutate(model = purrr::map(data,~ wilcox.test(clr ~ treatment,
data = .,
conf.int = TRUE)),
estimate = map_dbl(model,function(md) -md$estimate),
p.value = map_dbl(model,function(md) md$p.value),
log.p.value = -log10(p.value),
`N samples` = map_dbl(data,~ sum(.$n!=0)),
`N cells` = map_dbl(data,~ sum(.$n)))
live.diffs %>%
ggplot(aes(x = estimate,
y = log.p.value))+
geom_point(aes(size = `N cells`))+
geom_hline(yintercept = -log10(0.01),
col = 2,lty = 2)+
geom_vline(xintercept = c(-1,1),
col = 2, lty = 2)+
scale_size(range = c(2,6))+
theme_light(base_size = 16)
sig.live.diffs <- live.diffs %>%
filter(abs(estimate)>=1,
p.value<0.01)
sig.live.diffs %>%
select(hIndex,estimate,p.value) %>%
arrange(estimate)
live.diffs.lims <- max(abs(live.diffs$estimate))
live.plot <- plot(live.fv)+
geom_density_2d(data = . %>%
filter(treatment %in% c("M-CSF","Dex")) %>%
group_by(treatment,dose) %>%
sample_n(10000),
aes(color = treatment),
size = 1.1)
if(nrow(sig.live.diffs)>0) {
live.plot <- live.plot+
geom_point(data = . %>%
mutate(hIndex = factor(live.som)) %>%
group_by(hIndex) %>%
summarize(rx = median(rx),
ry = median(ry),
N = length(hIndex)) %>%
left_join(live.diffs %>%
select(hIndex,estimate,p.value),
by = "hIndex") %>%
arrange(abs(estimate)) %>%
mutate(estimate = ifelse(hIndex %in% sig.live.diffs$hIndex,
estimate,
NA)),
aes(size = N,
fill = estimate),
shape = 21)
} else {
live.plot <- live.plot+
geom_point(data = . %>%
mutate(hIndex = factor(live.som)) %>%
group_by(hIndex) %>%
summarize(rx = median(rx),
ry = median(ry),
N = length(hIndex)),
aes(size = N),
fill = NA,
shape = 21)
}
live.plot <- live.plot+
scale_size(range = c(2,6))+
scale_color_manual(values = treatment.cols)+
scale_fill_gradient2(limits = c(-live.diffs.lims,
live.diffs.lims),
low = blues9[7],
mid = "grey60",
high = "orangered2",
na.value = NA)+
guides(size = "none")
print(live.plot)
live.fan <- live.fv$proj$data %>%
mutate(hIndex = live.som) %>%
filter(hIndex %in% sig.live.diffs$hIndex) %>%
left_join(sig.live.diffs %>%
select(hIndex,estimate,p.value),
by = "hIndex") %>%
mutate(Direction = sign(estimate),
Direction = factor(Direction,
levels = c(-1,1),
labels = c("depleted","enriched"))) %>%
group_by(Direction) %>%
sample_n(5000) %>%
gather("Channel","value",one_of(rownames(fv.S))) %>%
mutate(Channel = factor(Channel,
levels=rownames(fv.S)))
polz.channels <- c("CD206","TREM2")
live.fan %>%
ggplot(aes(x=Channel,y=value))+
geom_fan()+
geom_vline(data = . %>%
distinct(Channel) %>%
mutate(Channel = droplevels(Channel),
i = as.numeric(Channel)) %>%
filter(Channel %in% polz.channels),
aes(xintercept = i),
col = 2,
lty = 2)+
facet_grid(rows = vars(Direction))+
theme_light(base_size = 16)+
theme(axis.text.x=element_text(angle=45,hjust=1))
bind_rows(live.fv$proj$data %>%
mutate(hIndex = live.som) %>%
filter(!hIndex %in% sig.live.diffs$hIndex) %>%
sample_n(10000) %>%
gather("Channel","value",one_of(rownames(fv.S))) %>%
mutate(Channel = factor(Channel,
levels=rownames(fv.S)),
Direction = "reference"),
live.fan %>%
mutate(Direction = as.character(Direction))) %>%
mutate(Direction = factor(Direction,
levels = c("depleted","enriched","reference"))) %>%
ggplot(aes(x=Channel,y=value))+
geom_fan()+
geom_vline(data = . %>%
distinct(Channel) %>%
mutate(Channel = droplevels(Channel),
i = as.numeric(Channel)) %>%
filter(Channel %in% polz.channels),
aes(xintercept = i),
col = 2,
lty = 2)+
facet_grid(rows = vars(Direction))+
theme_light(base_size = 16)+
theme(axis.text.x=element_text(angle=45,hjust=1))
## dose effect
cur.cpd <- "cpd11"
dose.diffs <- live.clr %>%
group_by(hIndex) %>%
filter(sum(n)>100) %>%
filter(treatment==cur.cpd) %>%
group_by(hIndex) %>%
nest() %>%
ungroup() %>%
mutate(model = purrr::map(data,~ wilcox.test(clr ~ dose,
data = .,
conf.int = TRUE)),
estimate = map_dbl(model,function(md) -md$estimate),
p.value = map_dbl(model,function(md) md$p.value),
log.p.value = -log10(p.value),
`N samples` = map_dbl(data,~ sum(.$n!=0)),
`N cells` = map_dbl(data,~ sum(.$n)))
dose.diffs %>%
ggplot(aes(x = estimate,
y = log.p.value))+
geom_point(aes(size = `N cells`))+
geom_hline(yintercept = -log10(0.01),
col = 2,lty = 2)+
geom_vline(xintercept = c(-1,1),
col = 2, lty = 2)+
scale_size(range = c(2,6))+
theme_light(base_size = 16)
sig.dose.diffs <- dose.diffs %>%
filter(abs(estimate)>=1,
p.value<0.01)
sig.dose.diffs %>%
select(hIndex,estimate,p.value) %>%
arrange(estimate)
dose.diffs.lims <- max(abs(dose.diffs$estimate))
dose.plot <- plot(live.fv)+
geom_density_2d(data = . %>%
filter(treatment==cur.cpd) %>%
group_by(dose) %>%
sample_n(10000),
aes(color = dose),
size = 1.1)
if(nrow(sig.dose.diffs)>0) {
dose.plot <- dose.plot+
geom_point(data = . %>%
mutate(hIndex = factor(live.som)) %>%
filter(treatment==cur.cpd) %>%
group_by(hIndex) %>%
summarize(rx = median(rx),
ry = median(ry),
N = length(hIndex)) %>%
left_join(dose.diffs %>%
select(hIndex,estimate,p.value),
by = "hIndex") %>%
arrange(abs(estimate)) %>%
mutate(estimate = ifelse(hIndex %in% sig.dose.diffs$hIndex,
estimate,
NA)),
aes(size = N,
fill = estimate),
shape = 21)
} else {
dose.plot <- dose.plot+
geom_point(data = . %>%
mutate(hIndex = factor(live.som)) %>%
group_by(hIndex) %>%
summarize(rx = median(rx),
ry = median(ry),
N = length(hIndex)),
aes(size = N),
fill = NA,
shape = 21)
}
dose.plot <- dose.plot+
scale_size(range = c(2,6))+
scale_fill_gradient2(limits = c(-dose.diffs.lims,
dose.diffs.lims),
low = blues9[7],
mid = "grey60",
high = "orangered2",
na.value = NA)+
guides(size = "none")
print(dose.plot)
dose.fan <- live.fv$proj$data %>%
mutate(hIndex = live.som) %>%
filter(hIndex %in% sig.dose.diffs$hIndex) %>%
left_join(sig.dose.diffs %>%
select(hIndex,estimate,p.value),
by = "hIndex") %>%
mutate(Direction = sign(estimate),
Direction = factor(Direction,
levels = c(-1,1),
labels = c("depleted","enriched"))) %>%
group_by(Direction) %>%
sample_n(5000) %>%
gather("Channel","value",one_of(rownames(fv.S))) %>%
mutate(Channel = factor(Channel,
levels=rownames(fv.S)))
dose.fan %>%
ggplot(aes(x=Channel,y=value))+
geom_fan()+
geom_vline(data = . %>%
distinct(Channel) %>%
mutate(Channel = droplevels(Channel),
i = as.numeric(Channel)) %>%
filter(Channel %in% polz.channels),
aes(xintercept = i),
col = 2,
lty = 2)+
facet_grid(rows = vars(Direction))+
theme_light(base_size = 16)+
theme(axis.text.x=element_text(angle=45,hjust=1))
bind_rows(live.fv$proj$data %>%
mutate(hIndex = live.som) %>%
filter(!hIndex %in% sig.dose.diffs$hIndex) %>%
sample_n(10000) %>%
gather("Channel","value",one_of(rownames(fv.S))) %>%
mutate(Channel = factor(Channel,
levels=rownames(fv.S)),
Direction = "reference"),
dose.fan %>%
mutate(Direction = as.character(Direction))) %>%
mutate(Direction = factor(Direction,
levels = c("depleted","enriched","reference"))) %>%
ggplot(aes(x=Channel,y=value))+
geom_fan()+
geom_vline(data = . %>%
distinct(Channel) %>%
mutate(Channel = droplevels(Channel),
i = as.numeric(Channel)) %>%
filter(Channel %in% polz.channels),
aes(xintercept = i),
col = 2,
lty = 2)+
facet_grid(rows = vars(Direction))+
theme_light(base_size = 16)+
theme(axis.text.x=element_text(angle=45,hjust=1))
bind_rows(live.fan %>%
mutate(Condition = "Stimulation") %>%
group_by(Direction),
dose.fan %>%
mutate(Condition = "Dose")) %>%
mutate(Condition = factor(Condition,
levels = c("Stimulation",
"Dose"))) %>%
ggplot(aes(x=Channel,y=value))+
geom_fan()+
geom_vline(data = . %>%
distinct(Channel) %>%
mutate(Channel = droplevels(Channel),
i = as.numeric(Channel)) %>%
filter(Channel %in% polz.channels),
aes(xintercept = i),
col = 2,
lty = 2)+
facet_grid(rows = vars(Condition,Direction))+
theme_light(base_size = 16)+
theme(axis.text.x=element_text(angle=45,hjust=1))
@yannabraham
Copy link
Author

Next step : produce a markdown report from the code!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment