Skip to content

Instantly share code, notes, and snippets.

@pcantalupo
Last active March 15, 2023 14:35
Show Gist options
  • Save pcantalupo/c5d68cd947ff1133545021b08ffded11 to your computer and use it in GitHub Desktop.
Save pcantalupo/c5d68cd947ff1133545021b08ffded11 to your computer and use it in GitHub Desktop.
single cell cluster score scores aggregate
# Aggregate a score table from single cell level to the cluster level
# Params
# scores - data.frame or matrix where rows are cells and columns are celltypes.
# Rownames or colnames are not required
# clusters - vector of same length as number of rows in 'scores'.
# The values specify the group for each row (i.e. c(3, 1, 0, 1, 2, 0, 3, ...))
# Return - A matrix with the mean scores. Columns are in the same order.
# Rows are sorted based on the typeof(clusters) (integer -> integer sorted, character -> character sorted).
# Rownames are set to the cluster value
scores_clusterlevel = function(scores, clusters) {
data = apply(scores, 2, function(row) {
aggregate(row, list(Clusters = clusters), mean)$x # only need 'x' where 'x' is the mean
})
rownames(data) = as.character(sort(unique(clusters)))
return(data)
}
# Determine the label for each row based on maximum value of the celltype scores in the row.
# scores - rows are cells or clusters and columns are celltypes
# Rownames and colnames are required.
# unknown - do you want to calculated Unknown labels? (if TRUE, must supply 'clusters' param)
# - only supported for cluster scores table
# clusters - vector that specifies the cluster for each cell (only needed if calculating unknown labels)
# - only supported for cluster scores table
# cutoff - if the score is less than this value, the label is 'Unknown' (default is 0.25; based on ScType)
# - only supported for clusters scores table
# Return - The scores table with 3 preprended columns 'clusters', 'labels', and 'score'
# 'clusters' values are taken from the rownames
# 'labels' the celltype with the maximum score
# 'score' the maximum score
add_labels_based_on_max = function(scores, unknown = FALSE, clusters, cutoff = 0.25) {
whichmax = apply(scores, 1, which.max) # for Group 0 the max value is column 3
labels = colnames(scores)[whichmax] # extract the label for each Group
score = sapply(seq_along(whichmax), function(i){ # extract the score for each Group
maxindex = whichmax[i]
scores[i,maxindex]
})
data = data.frame(clusters = rownames(scores), labels = labels, score = score, scores)
if(unknown) {
#numcells = table(clusters)
data$labels[data$score < cutoff] = "Unknown"
}
return(data)
}
# Purpose: You have cluster labels (in your cluster scores table) and you want to expand these labels to the cell level so you can add them to a table of cell metadata
# scores - data.frame of cluster scores containing at least two columns: 'clusters' and 'labels' (rows are clusters)
# clusters - vector that specifies the cluster for each cell
deconvolute_clusterlabel = function(scores, clusters) {
return(scores$labels[match(clusters, scores$clusters)])
}
@pcantalupo
Copy link
Author

pcantalupo commented Mar 15, 2023

set.seed(15)
m = matrix(rnorm(30), nrow = 10)
colnames(m) = c("foo","bar","baz")
m
clusters = c(rep(0,5),rep(1,5))
clusters
cluster.scores = scores_clusterlevel(m, clusters)
cluster.scores
cluster.scores.labels = add_labels_based_on_max(cluster.scores)
cluster.scores.labels
add_labels_based_on_max(cluster.scores, unknown = TRUE, clusters, cutoff = 0.35)$label
deconvolute_clusterlabel(cluster.scores.labels, clusters)

Output:

> m
             foo          bar        baz
 [1,]  0.2588229  0.855010750  1.3738887
 [2,]  1.8311207 -0.364980139  1.4123323
 [3,] -0.3396186  0.165554295 -0.4021738
 [4,]  0.8971982 -1.242784991 -0.4391439
 [5,]  0.4880163  1.459287685  1.0106085
 [6,] -1.2553858 -0.003612769  0.4308192
 [7,]  0.0227882 -0.020883173  0.7339284
 [8,]  1.0907732  0.032106002 -0.6806749
 [9,] -0.1321224 -1.167278006  0.3261963
[10,] -1.0750013 -0.519571618  0.9070296
> clusters = c(rep(0,5),rep(1,5))
> clusters
 [1] 0 0 0 0 0 1 1 1 1 1
> cluster.scores = scores_clusterlevel(m, clusters)
> cluster.scores
         foo        bar       baz
0  0.6271079  0.1744175 0.5911024
1 -0.2697896 -0.3358479 0.3434597
> cluster.scores.labels = add_labels_based_on_max(cluster.scores)
> cluster.scores.labels
  clusters labels     score        foo        bar       baz
0        0    foo 0.6271079  0.6271079  0.1744175 0.5911024
1        1    baz 0.3434597 -0.2697896 -0.3358479 0.3434597
> add_labels_based_on_max(cluster.scores, unknown = TRUE, clusters, cutoff = 0.35)$label
[1] "foo"     "Unknown"
> deconvolute_clusterlabel(cluster.scores.labels, clusters)
 [1] "foo" "foo" "foo" "foo" "foo" "baz" "baz" "baz" "baz" "baz"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment