Skip to content

Instantly share code, notes, and snippets.

@GabrielHoffman
Last active March 4, 2024 17:59
Show Gist options
  • Save GabrielHoffman/fee69865d20f9a965d5a053a8204cd14 to your computer and use it in GitHub Desktop.
Save GabrielHoffman/fee69865d20f9a965d5a053a8204cd14 to your computer and use it in GitHub Desktop.
Composite posterior test on mashr results
# Example code for analysis of
# 1) Trait specificity
# 2) Cell-type specificity
# 1) Trait specificity
######################
library(arrow)
library(tidyverse)
library(dreamlet)
df_prob = read_parquet('mashr_posterior.parquet')
# probabilities from first dataset
df1 = df_prob %>%
filter( Dataset == "MSSM",
AnnoLevel == "class",
coef == "c02xAD - c02xControl" ) %>%
pivot_wider(names_from = "assay", values_from="Probability")
# probabilities from second dataset
df2 = df_prob %>%
filter( Dataset == "RUSH",
AnnoLevel == "class",
coef == "c03xAD - c03xControl" ) %>%
pivot_wider(names_from = "assay", values_from="Probability")
df_join = inner_join(df1, df2, by="ID") %>%
select( -coef.x, -AnnoLevel.x, -Dataset.x,
-coef.y, -AnnoLevel.y, -Dataset.y)
# Identify genes differentially expressed in Immune in both datsets, but no other cell types
include = c('Immune.x', 'Immune.y')
exclude = setdiff(colnames(df_join)[-1], include)
res = df_join %>%
column_to_rownames(var='ID') %>%
filter(rowSums(is.na(.)) != ncol(.)) %>%
compositePosteriorTest(
include = include,
exclude = exclude,
test = "all")
# top genes
head(sort(res, decreasing=TRUE))
# Identify genes differentially expressed in Immune in both datsets, regardless of results in other cell types
include = c('Immune.x', 'Immune.y')
exclude = NULL
res = df_join %>%
column_to_rownames(var='ID') %>%
filter(rowSums(is.na(.)) != ncol(.)) %>%
compositePosteriorTest(
include = include,
exclude = exclude,
test = "all")
# top genes
head(sort(res, decreasing=TRUE))
# Identify genes differentially expressed in at least one neuron type
include = c('EN.x', 'EN.y', "IN.x", "IN.y")
exclude = setdiff(colnames(df_join)[-1], include)
res = df_join %>%
column_to_rownames(var='ID') %>%
filter(rowSums(is.na(.)) != ncol(.)) %>%
compositePosteriorTest(
include = include,
exclude = exclude)
# top genes
head(sort(res, decreasing=TRUE))
# 2) Cell-type specificity
##########################
library(tidyverse)
library(arrow)
library(ggplot2)
library(dreamlet)
df = read_parquet("mashr_posterior.parquet")
plot_heat = function(df, ids){
df %>%
filter(ID %in% ids) %>%
ggplot(aes(ID, assay, fill=Probability)) +
geom_tile() +
theme_classic(22) +
scale_fill_gradient(limits=c(0, 1),low="white", high="red",na.value = "grey92", name="Probability\nnon-zero") +
coord_equal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab('') +
ylab('')
}
# filter
df_sub = df %>%
filter(Dataset == 'meta',
coef == 'm01x', # AD vs controls
AnnoLevel == 'subclass')
# Define query
# Microglia
cellTypes = unique(df_sub$assay)
include = c("Micro")
exclude = c(cellTypes[cellTypes!=include])
# rank genes based on query
res = df_sub %>%
pivot_wider(names_from = "assay", values_from="Probability") %>%
select( -coef, -AnnoLevel, -Dataset) %>%
column_to_rownames(var='ID') %>%
filter(rowSums(is.na(.)) != ncol(.)) %>%
compositePosteriorTest(include, exclude) %>%
data.frame(prob=.) %>%
arrange(prob)
genes = rownames(tail(res))
plot_heat( df_sub, genes )
# Only EN
include = grep("^EN", cellTypes, value=TRUE)
exclude = cellTypes[!cellTypes %in% include]
# rank genes based on query
res = df_sub %>%
pivot_wider(names_from = "assay", values_from="Probability") %>%
select( -coef, -AnnoLevel, -Dataset) %>%
column_to_rownames(var='ID') %>%
filter(rowSums(is.na(.)) != ncol(.)) %>%
filter(rowSums(!is.na(.)) >= 7) %>% # gene must be expressed in at least 7 cell types
compositePosteriorTest(include, exclude) %>%
data.frame(prob=.) %>%
arrange(prob)
genes = rownames(tail(res))
plot_heat( df_sub, genes )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment