Create a Formula from a String
In the most basic case, use as.formula():
This returns a string:
"y ~ x1 + x2"
> [1] "y ~ x1 + x2"
This returns a formula:
as.formula("y ~ x1 + x2")
#> y ~ x1 + x2
#> <environment: 0x3361710>
Here is an example of how it might be used: These are the variable names:
measurevar <- "y"
groupvars <- c("x1","x2","x3")
This creates the appropriate string:
paste(measurevar, paste(groupvars, collapse=" + "), sep=" ~ ")
> [1] "y ~ x1 + x2 + x3"
This returns the formula:
as.formula(paste(measurevar, paste(groupvars, collapse=" + "), sep=" ~ "))
> y ~ x1 + x2 + x3
> <environment: 0x3361710>
File Handling
List file names in a directory
filenames <- Sys.glob(file.path(selected_data_path,"*.rds"))
Gather and Spread
Gather is to convert a table from wide to long-form by putting column names in a single column titled with a chosen “key” and all the values from those columns to an adjacent column title with a chosen “value”. If there are columns that should be kept the way they are then use -Name of the column of -c(vector with column names).
Df <- gather(data, key = “Markers”, value = “Expression”, -CellPopulation)
mate <- dplyr::select(comb_dat, cell_population, stim_type, estimate) %>%
spread(key = stim_type, value = estimate) %>%
column_to_rownames(var = "cell_population") %>%
as.matrix()
Examples
Select columns from a dataframe and coerce the data to a matrix. Also, group the data and perform min-max scaling per group.
cytof_mat <- cytof_m_ranks$attribute_stats %>%
mutate(cell_population = paste0(cell_population, " CYTOF")) %>%
dplyr::select(cell_population, state_marker, meanImp) %>%
dplyr::group_by(cell_population) %>%
mutate(min_max = (meanImp - min(meanImp)) /(max(meanImp)-min(meanImp))) %>%
dplyr::select(cell_population, state_marker, min_max) %>%
spread(key = state_marker, value = min_max) %>%
column_to_rownames(var = "cell_population") %>%
as.matrix()
Mutate
Mutate at
The example below formats and round off values to six decimal places in all the columns specified by vars(). You do not need to select columns to use mutate_at(). It performs the operation on the specified columns keeping the rest of the data as it is.
manual_dat %>% mutate_at(vars(all_of(markers)), ~ as.numeric(format(round(., 6))))
Plots
Colorblind-friendly palette
# The palette with grey:
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# The palette with black:
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# To use for fills, add
scale_fill_manual(values=cbPalette)
# To use for line and point colors, add
scale_colour_manual(values=cbPalette)
Main article: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
Complex Heatmaps
Put Text in the Cells
hmap <- ComplexHeatmap::Heatmap(mat, cluster_rows = TRUE, cluster_columns = TRUE,
heatmap_legend_param = list(title = "Mean\nImportance"),
column_split = c_breaks,
cell_fun = function(j, i, x, y, width, height, fill) {
if(mat_decision[i, j] == "Confirmed"){
grid.text("C", x, y, gp = gpar(fontsize = 6))
}else if(mat_decision[i, j] == "Tentative"){
grid.text("T", x, y, gp = gpar(fontsize = 6))
}
},
column_names_gp = gpar(fontsize = 10),
row_names_gp = gpar(fontsize = 10)
)
Parallel processing
doMC
library(doMC)
registerDoMC(2)
foreach(i=1:3, .combine=rbind) %dopar% sqrt(i)