Skip to content

Instantly share code, notes, and snippets.

@brshallo
Last active June 7, 2021 20:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brshallo/4db0d91daee1e638c2c5e1ec816c1054 to your computer and use it in GitHub Desktop.
Save brshallo/4db0d91daee1e638c2c5e1ec816c1054 to your computer and use it in GitHub Desktop.
Mockup piecewise weighted Spearman Correlation based on derivatives of GAM model... per conversations with Shaina and Ricky
library(tidyverse)
library(mgcv)
library(gratia)

n_obs <- 10000

set.seed(12345)
new_dat <- tibble(x = rnorm(n_obs, sd = 2*pi),
                  y = sin(x) + rnorm(n_obs, sd = 0.5))

# GAM model
mod <- gam(y ~ s(x), data = new_dat, method = "REML")

p <- bind_cols(new_dat, tibble(.pred = predict(mod, new_dat))) %>% 
  ggplot(aes(x = x, y = y)) + 
  geom_point(alpha = 0.01) + 
  geom_line(aes(y = .pred), colour = "red")+
  scale_x_continuous(limits = c(-6*pi, 6*pi), 
                     breaks = seq(-11 * pi / 2, 11 * pi / 2, pi) %>% 
                       round(2),
                     guide = guide_axis(n.dodge = 2))+
  labs(caption = "Breaks where derivitive of 0 would be expected for sin(x).")

p
#> Warning: Removed 25 rows containing missing values (geom_point).
#> Warning: Removed 25 row(s) containing missing values (geom_path).

# dervitive of GAM
deriv <- fderiv(mod, terms = "x")

signs_same <- function(x1, x2){
  sign(x1) == sign(x2)
}

# Identify mins / maxs
extrema <- 
  bind_cols(tibble(deriv = as.numeric(deriv$derivatives[[1]]$deriv)),
            deriv$eval) %>%
  filter(!signs_same(deriv, lead(deriv)))

split_points <- tibble(lt = c(extrema$x, Inf)) %>% 
  mutate(gtoe = lag(lt),
         gtoe = ifelse(is.na(gtoe), -Inf, gtoe)) %>% 
  relocate(gtoe)

# Split data by and calculate individual Spearman correlations
data_splits <- map2(
  split_points$lt,
  split_points$gtoe,
  ~ filter(new_dat, x < .x, x >= .y)
)

spearman_cors <- split_points %>% 
  bind_cols(tibble(data = data_splits)) %>% 
  mutate(n_obs = map_int(data, nrow),
         spearman_cor = map_dbl(data, ~cor(.x, method = "spearman")[2,1]))

spearman_cors
#> # A tibble: 9 x 5
#>      gtoe     lt data                 n_obs spearman_cor
#>     <dbl>  <dbl> <list>               <int>        <dbl>
#> 1 -Inf    -14.3  <tibble [127 × 2]>     127       -0.489
#> 2  -14.3   -9.31 <tibble [573 × 2]>     573        0.296
#> 3   -9.31  -5.42 <tibble [1,202 × 2]>  1202        0.593
#> 4   -5.42  -1.77 <tibble [1,960 × 2]>  1960       -0.798
#> 5   -1.77   1.66 <tibble [2,214 × 2]>  2214        0.834
#> 6    1.66   5.32 <tibble [1,972 × 2]>  1972       -0.818
#> 7    5.32   9.20 <tibble [1,230 × 2]>  1230        0.632
#> 8    9.20  14.0  <tibble [597 × 2]>     597        0.191
#> 9   14.0  Inf    <tibble [125 × 2]>     125       -0.638

# Plot bin points
p +
  geom_vline(xintercept = spearman_cors$gtoe[-1], color = "blue", alpha = 0.5)
#> Warning: Removed 25 rows containing missing values (geom_point).
#> Warning: Removed 25 row(s) containing missing values (geom_path).

# Weighted average absolute Spearman correlation
weighted_abs_spearman <- spearman_cors %>% 
  with(weighted.mean(abs(spearman_cor), n_obs))

weighted_abs_spearman
#> [1] 0.6940779

Created on 2021-06-07 by the reprex package (v2.0.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment