Skip to content

Instantly share code, notes, and snippets.

@slowkow
Last active August 29, 2022 18:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save slowkow/1866791169bdee7e1661d0ee20d31391 to your computer and use it in GitHub Desktop.
Save slowkow/1866791169bdee7e1661d0ee20d31391 to your computer and use it in GitHub Desktop.
Compute a hypergeometric p-value for a gene set of interest.
# Try this with:
# - https://github.com/jefworks/genesets
# - https://github.com/slowkow/tftargets
#' Compute a hypergeometric p-value for your gene set of interest relative to
#' a universe of genes that you have defined.
#'
#' @param ids A vector with genes of interest.
#' @param universe A vector with all genes, including the genes of interest.
#' @param gene_sets A list of gene vectors.
#' @return A dataframe with one row for each gene set, ordered by p-value.
do_hyper <- function(ids, universe, gene_sets) {
retval <- as.data.frame(t(sapply(gene_sets, function(gene_set) {
x <- sum(ids %in% gene_set)
n <- length(ids)
X <- sum(universe %in% gene_set)
N <- length(universe)
if (x > 0 && X > 0) {
pval <- phyper(x - 1, X, N - X, n, lower.tail = FALSE)
retvec <- c(
"x" = x, "n" = n, "X" = X, "N" = N,
"enrichment" = (x / n) / (X / N),
"pval" = pval
)
} else {
retvec <- c(
"x" = x, "n" = n, "X" = X, "N" = N,
"enrichment" = 0,
"pval" = 1
)
}
retvec
})))
retval$Name <- names(gene_sets)
retval <- retval[order(retval$pval),]
retval$qval <- p.adjust(retval$pval, method = "fdr")
retval
}
#' Compute a p-value with Fisher's exact test for your gene set of interest.
#'
#' @param ids A vector with genes of interest.
#' @param universe A vector with all genes, including the genes of interest.
#' @param gene_sets A list of gene vectors.
#' @return A dataframe with one row for each gene set, ordered by p-value.
do_fisher <- function(ids, universe, gene_sets) {
retval <- as.data.frame(t(sapply(gene_sets, function(gene_set) {
x <- sum(ids %in% gene_set)
n <- length(ids)
X <- sum(universe %in% gene_set)
N <- length(universe)
if (x > 0 && X > 0) {
fish <- fisher.test(
x = matrix(c(x, n - x, X - x, N - X - (n - x)), 2),
alternative = "greater"
)
retvec <- c(
"x" = x, "n" = n, "X" = X, "N" = N,
"enrichment" = (x / n) / (X / N),
"orlow" = fish$conf.int[1],
"orhigh" = fish$conf.int[2],
"or" = unname(fish$estimate),
"pval" = fish$p.value
)
} else {
retvec <- c(
"x" = x, "n" = n, "X" = X, "N" = N,
"enrichment" = 1,
"orlow" = 1,
"orhigh" = 1,
"oddsratio" = 1,
"pval" = 1
)
}
retvec
})))
retval$Name <- names(gene_sets)
retval <- retval[order(retval$pval),]
retval$qval <- p.adjust(retval$pval, method = "fdr")
retval
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment