Last active
April 2, 2019 21:45
-
-
Save jhrcook/a97e052b8f84e4d37620c681a434bc3f to your computer and use it in GitHub Desktop.
Wilcoxon Rank Sum for Shikha
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load libraries individually | |
library(purrr) | |
library(magrittr) | |
library(tidyr) | |
library(dplyr) | |
# or just load | |
library(tidyverse) | |
# some random data to make my own data frame with | |
# the same structure as yours | |
set.seed(0) | |
df <- tibble(Protein.Id = letters[1:5], | |
Gene.Symbol = LETTERS[1:5], | |
Description = letters[1:5], | |
Number.of.peptides = sample(1:20, 5), | |
C1_G12D_vitro_1 = rnorm(5, 4, 2), | |
C1_G12D_vitro_2 = rnorm(5, 4, 2), | |
C1_G12D_vitro_3 = rnorm(5, 4, 2), | |
C1_WT_vitro_1 = rnorm(5, 6, 2), | |
C1_WT_vitro_2 = rnorm(5, 6, 2), | |
C1_WT_vitro_3 = rnorm(5, 6, 2), | |
C1_G12D_vivo_1 = rnorm(5, 4, 2), | |
C1_G12D_vivo_2 = rnorm(5, 4, 2), | |
C1_G12D_vivo_3 = rnorm(5, 4, 2), | |
C1_WT_vivo_1 = rnorm(5, 6, 2), | |
C1_WT_vivo_2 = rnorm(5, 6, 2), | |
C1_WT_vivo_3 = rnorm(5, 6, 2)) | |
# a wrapper around 'wilcox.test()' to take one row at a time | |
wrap_wilcox_test <- function(Gene.Symbol, data) { | |
g12d <- unlist(data[, 1:3]) # G12D sample data | |
wt <- unlist(data[, 4:6]) # WT sample data | |
# run test and return p-value | |
return(wilcox.test(g12d, wt)$p.value) | |
} | |
# get p-values for "in vitro" data | |
df$vitro_pval <- df %>% | |
dplyr::select(Gene.Symbol, contains("vitro")) %>% | |
dplyr::group_by(Gene.Symbol) %>% | |
tidyr::nest() %>% | |
purrr::pmap(wrap_wilcox_test) | |
# get p-values for "in vivo" data | |
df$vivo_pval <- df %>% | |
dplyr::select(Gene.Symbol, contains("vivo")) %>% | |
dplyr::group_by(Gene.Symbol) %>% | |
tidyr::nest() %>% | |
purrr::pmap(wrap_wilcox_test) | |
# There should now be 2 new columns "vitro_pval" and "vivo_pval" | |
# in your data frame | |
# The two steps above use the following workflow: | |
# 1. select the gene symbol and either vitro or vivo columns | |
# 2. group the data by gene (i.e. each row is separate) | |
# 3. make each group (i.e. row, i.e. gene) into a separate data frame | |
# 4. run each through the wilcox.test wrapper function | |
# From here, adjusting for multiple hypothesis testing (with BH method) is easy | |
df <- df %>% | |
mutate(vitro_pval_adj = p.adjust(vitro_pval, method = "BH"), | |
vivo_pval_adj = p.adjust(vivo_pval, method = "BH")) | |
# This should create the columns "vivo_pval_adj" and "vivo_pval_adj" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment