brshallo/tidy_ks_test_all_combos.md

## tidy_ks_test_all_combos.md

      
    Raw
  

              tidy_ks_test_all_combos.md
            
          
    library(tidyverse)

# artificial data, groups "a" and "c" are alike... b is different
set.seed(12)
data <- tibble(group = c(rep("a", 30), rep("b", 20), rep("c", 20)), 
       value = c(rnorm(30, 8, 5), rt(20, 11, 5), rnorm(20, 8, 5))
) 

# plot example
data %>% 
  ggplot(aes(x = value, fill = group))+
  geom_density(alpha = .3)

# there is a more elegant way of setting up this dataframe... but forgetting it now...
nested_data <- data %>% 
  group_by(group) %>% 
  summarise(value = list(value))

nested_data_both <- nested_data %>% 
  mutate(data2 = list(nested_data)) %>% 
  unnest(data2, .drop = FALSE)

nested_data_both %>% 
  mutate(ks_test = map2(value, value1, stats::ks.test)) %>% 
  mutate(ks_stat = map_dbl(ks_test, "statistic"))
#> Warning in .f(.x[[i]], .y[[i]], ...): cannot compute exact p-value with
#> ties
#> Warning in .f(.x[[i]], .y[[i]], ...): cannot compute exact p-value with
#> ties

#> Warning in .f(.x[[i]], .y[[i]], ...): cannot compute exact p-value with
#> ties
#> # A tibble: 9 x 6
#>   group value      group1 value1     ks_test ks_stat
#>   <chr> <list>     <chr>  <list>     <list>    <dbl>
#> 1 a     <dbl [30]> a      <dbl [30]> <htest>   0    
#> 2 a     <dbl [30]> b      <dbl [20]> <htest>   0.567
#> 3 a     <dbl [30]> c      <dbl [20]> <htest>   0.167
#> 4 b     <dbl [20]> a      <dbl [30]> <htest>   0.567
#> 5 b     <dbl [20]> b      <dbl [20]> <htest>   0    
#> 6 b     <dbl [20]> c      <dbl [20]> <htest>   0.55 
#> 7 c     <dbl [20]> a      <dbl [30]> <htest>   0.167
#> 8 c     <dbl [20]> b      <dbl [20]> <htest>   0.55 
#> 9 c     <dbl [20]> c      <dbl [20]> <htest>   0

# you should set a filter so that you don't run tests twice (i.e. on a,b and b,a)
^{Created on 2019-08-23 by the reprex package (v0.3.0)}