Skip to content

Instantly share code, notes, and snippets.

@jvelezmagic
Last active November 1, 2021 22:42
Show Gist options
  • Save jvelezmagic/216eee1c1711ce7aa722fbe2b159b76c to your computer and use it in GitHub Desktop.
Save jvelezmagic/216eee1c1711ce7aa722fbe2b159b76c to your computer and use it in GitHub Desktop.
---
title: "Cydar batch correction"
author: ""
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
toc_depth: 3
code_download: true
code_folding: show
df_print: paged
params:
fcs_dir:
label: "FCS directory"
value: !r here::here()
output_dir:
label: "Output directory for results"
value : !r here::here("batch_corrected")
output_suffix:
label: "Output suffix (e.g., my-sample-name{value}.fcs)"
value: "_batchCorrected"
metadata_sep:
label: "Filename metadata separator"
value: "_"
batch_position:
label: "Batch position after splitting filename by the metadata separator"
value: 1
condition_position:
label: "Condition position after splitting filename by the metadata separator"
value: 2
control_string:
label: "String to identify which filenames are control samples"
value: "Sp"
fcs_glob:
label: "Glob to find files to use in batch correction"
value: "*.fcs"
plots_in_tabs:
label: "Should the report split comparisson plots by markes in tabs?"
value: TRUE
cytofkit_params: !r list(comp = FALSE, transformMethod = "arcsinh", mergeMethod = "all", sampleSeed = 1234)
cydar_params: !r list(mode = "range")
---
```{r bcwc-setup, include=FALSE}
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
fig.align = "center"
)
```
## Libraries
```{r bcwc-libraries}
library(fs)
library(tidyverse)
library(cytofkit2)
library(cydar)
library(flowCore)
```
## Identify FCS files
```{r bcwc-identify-fcs-files}
fcs_complete_files <- params$fcs_dir |>
fs::dir_ls(glob = params$fcs_glob)
```
```{r bcwc-show-files, echo=FALSE}
fcs_complete_files %>%
fs::path_file() %>%
tibble::tibble(
`FCS used` = .
)
```
## Read FCS files and merge them
```{r bcwc-read-and-merge-data}
merged_data <- purrr::exec(
.fn = cytof_exprsMerge,
fcsFiles = fcs_complete_files,
!!!params$cytofkit_params
) %>%
as.data.frame()
merged_data %>%
str()
```
## Prepare data for batch correction
```{r bcwc-prepare-data-for-batch-correction}
rownames_merged_data <- rownames(merged_data)
batch_data_prep <- tibble::tibble(
fcs_filename = fs::path_file(fcs_complete_files)
) %>%
dplyr::mutate(
sample = fs::path_ext_remove(fcs_filename),
metadata_info <- stringr::str_split(
string = sample,
pattern = params$metadata_sep
),
batch = purrr::map_chr(
.x = metadata_info,
.f = dplyr::nth,
params$batch_position
),
batch = stringr::str_c(
batch,
params$metadata_sep
),
condition = purrr::map_chr(
.x = metadata_info,
.f = dplyr::nth,
params$condition_position
),
condition = dplyr::case_when(
stringr::str_detect(condition, params$control_string) ~ "Control",
TRUE ~ "Sample"
),
condition = factor(
x = condition,
levels = c("Control", "Sample")
),
sample_data = purrr::map(
.x = sample,
.f = ~ merged_data[grep(.x, rownames_merged_data), ]
)
) %>%
dplyr::arrange(dplyr::desc(condition)) %>%
dplyr::select(sample, batch, condition, sample_data) %>%
dplyr::group_by(batch) %>%
dplyr::summarize(
data = list(sample_data),
data = purrr::map(
.x = data,
.f = ~ {
purrr::set_names(x = .x, nm = sample) %>%
list() %>%
purrr::set_names(nm = dplyr::cur_group())
}
),
composition = list(condition),
composition = purrr::map(
.x = composition,
.f = ~ {
purrr::set_names(x = .x, nm = condition) %>%
list() %>%
purrr::set_names(nm = dplyr::cur_group())
}
)
) %>%
identity()
batch_data_prep %>%
dplyr::slice_head(n = 1) %>%
dplyr::glimpse(width = 80)
```
## Run batch normalization
```{r bcwc-run-batch-normalization}
batch_x <- unlist(batch_data_prep$data, recursive = FALSE)
batch_comp <- unlist(batch_data_prep$composition, recursive = FALSE)
batch_corrected_results <- purrr::exec(
.fn = cydar::normalizeBatch,
batch.x = batch_x,
batch.comp = batch_comp,
!!!params$cydar_params
)
batch_corrected_results %>%
dplyr::first() %>%
str()
```
## Save results
```{r bcwc-save-results}
fs::dir_create(params$output_dir)
batch_corrected_results %>%
purrr::walk(
.f = ~ {
flowframe <- new(
"flowFrame",
exprs = as.matrix(.x[[1]])
)
flowframe_name <- fs::path(
params$output_dir,
stringr::str_c(names(.x)[[1]], params$output_suffix),
ext = "fcs"
)
flowCore::write.FCS(
x = flowframe,
filename = flowframe_name
)
}
)
```
## Compare batch corrections {.tabset .tabset-pills}
```{r bcwc-compare-batch-corrections, results='asis', echo=FALSE}
cols <- names(batch_corrected_results) %>%
length() %>%
scales::hue_pal()(.)
for (k in 1:ncol(batch_x[[1]][[1]])) {
collected <- list()
for (i in 1:length(batch_x)) {
collected[[names(batch_x)[i]]] <- batch_x[[i]][[1]][, k]
}
collected2 <- list()
for (i in 1:length(batch_x)) {
collected2[[names(batch_x)[i]]] <- batch_corrected_results[[i]][[1]][, k]
}
if (params$plots_in_tabs) {
cat("\n")
cat("###", colnames(batch_corrected_results[[i]][[1]])[k], "\n")
}
par(mfrow = c(1, 2))
multiIntHist(collected, main = colnames(batch_corrected_results[[i]][[1]])[k], cols = cols)
legend(
"topright",
legend = names(collected),
pch = 16,
col = cols
)
multiIntHist(collected2, main = "Corrected", cols = cols)
legend(
"topright",
legend = names(collected2),
pch = 16,
col = cols
)
if (params$plots_in_tabs) {
cat("\n")
}
}
```
```{r bcwc-plots-to-pdf, echo=FALSE, results='hide'}
pdf(fs::path(params$output_dir, "batch_corrected.pdf"), width = 10)
cols <- names(batch_corrected_results) %>%
length() %>%
scales::hue_pal()(.)
for (k in 1:ncol(batch_x[[1]][[1]])) {
collected <- list()
for (i in 1:length(batch_x)) {
collected[[names(batch_x)[i]]] <- batch_x[[i]][[1]][, k]
}
collected2 <- list()
for (i in 1:length(batch_x)) {
collected2[[names(batch_x)[i]]] <- batch_corrected_results[[i]][[1]][, k]
}
par(mfrow = c(1, 2))
multiIntHist(collected, main = colnames(batch_corrected_results[[i]][[1]])[k], cols = cols)
legend(
"topright",
legend = names(collected),
pch = 16,
col = cols
)
multiIntHist(collected2, main = "Corrected", cols = cols)
legend(
"topright",
legend = names(collected2),
pch = 16,
col = cols
)
}
dev.off()
```
## Document reproducibility
### Code used to render this document
```{r bcwc-toy-call, echo=FALSE, results='asis', class.source = 'fold-show', fig.width=10}
cat("```r", "\n")
rlang::call2(
.fn = expr(rmarkdown::render),
input = "batch-correction-with-cydar.Rmd",
params = list(
fcs_dir = params$fcs_dir,
output_dir = params$output_dir,
output_suffix = params$output_suffix,
metadata_sep = params$metadata_sep,
batch_position = params$batch_position,
condition_position = params$condition_position,
control_string = params$control_string,
fcs_glob = params$fcs_glob,
plots_in_tabs = params$plots_in_tabs,
cytofkit_params = params$cytofkit_params,
cydar_params = params$cydar_params
)
) |>
rlang::expr_text(width = 20) |>
styler::style_text()
cat("```")
```
### Session info
Cydar batch correction was built using `r R.version.string`.
```{r bcwc-print-options, echo=FALSE}
knitr::opts_chunk$set(rows.print = 1000)
```
```{r bcwc-session-info, echo=FALSE}
pkgs <- c(
"fs",
"tidyverse",
"cytofkit2",
"cydar",
"purrr",
"stringr",
"flowCore",
"here",
"tibble",
"dplyr",
"scales",
"styler",
"rlang"
)
pkgs |>
sessioninfo::package_info(dependencies = FALSE) |>
tibble::as_tibble() |>
dplyr::transmute(
Package = package,
Version = ondiskversion,
Source = stringr::str_replace(source, "@", "\\\\@"),
Source = stringr::str_remove(Source, "\\\\@[a-f0-9]*")
)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment