This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(gganimate) | |
# sample of 100 data points | |
tibble(x = rexp(1e2)) -> d | |
# 20 bootstrap samples, you likely want several orders of magnitude more. | |
tibble(boot_samples = seq_len(2e1)) %>% | |
mutate(data = map(boot_samples, ~ d %>% sample_frac(1, replace = TRUE))) %>% | |
unnest(cols = c(data)) %>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(fitdistrplus) | |
library(countreg) | |
library(tidyverse) | |
library(rtweet) | |
get_timeline("statwonk", n = 1e4) %>% | |
filter(!is_retweet) %>% | |
pull(retweet_count) -> my_retweets | |
# I know a priori it's negbin, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install.packages("countreg", repos="http://R-Forge.R-project.org") | |
# https://www.fromthebottomoftheheap.net/2016/06/07/rootograms/ | |
# https://channel9.msdn.com/Events/useR-international-R-User-conferences/useR-International-R-User-2017-Conference/countreg-Tools-for-count-data-regression | |
library(countreg) | |
rzinbinom(3e3, size = 4, mu = 20, pi = 0.05) -> x | |
table(x) | |
hist(x, col = "orange") | |
rootogram(glm(x ~ 1, family = "poisson")) # zeros under fit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# https://www.kellogg.northwestern.edu/faculty/weber/decs-430/Notes%20on%20the%20Poisson%20and%20exponential%20distributions.pdf | |
# http://www.mscs.mu.edu/~jsta/issues/11(3)/JSTA11(3)p2.pdf | |
# Case 1: memoryless intra-arrival times. They're not correlated. | |
tibble::tibble( | |
durations_between = rexp(3e4, rate = 20) | |
) %>% | |
mutate(time = cumsum(durations_between)) %>% | |
mutate(time_interval = cut(time, breaks = seq(0, 1e4, 1), labels = FALSE)) %>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(brms) | |
library(tidybayes) | |
library(ggthemes) | |
# Simulation settings | |
innings <- 9 | |
games <- 162 | |
number_of_opposing_pitchers <- 20 # purposly set low to illustrate variation | |
opposing_pitchers <- rnorm(number_of_opposing_pitchers, sd = 0.5) # a set of opposing pitcher effects |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
plot.new() | |
par(bg = "black") | |
plot(density(rbeta(1e6, 50, 50)), col = "pink3", lwd = 1, ylim = c(0, 8), | |
xlab = "", ylab = "", main = "Beta distribution", axes = FALSE, | |
col.main = 'white') | |
map2(c(seq(1, 49, 1), 50), | |
c(seq(1, 49, 1), 50), | |
function(x, y){ message(x,y);rbeta(1e6, x + 1, y + 1)}) %>% | |
map(function(x) { density(x) %>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
expand.grid( | |
risk = seq(0.001, 0.02, 0.001), | |
units_of_exposure = seq_len(24*7) | |
) %>% as_tibble() %>% | |
mutate(total_risk = map2_dbl(risk, units_of_exposure, ~ 1 - (1 - .x)^(.y))) %>% | |
ggplot(aes(x = risk, y = units_of_exposure)) + | |
geom_raster(aes(fill = total_risk), alpha = 0.90) + | |
scale_fill_gradient2(name = "Total chance", | |
low = "white", mid = "white", high = "purple4", midpoint = 0.5, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://news.ycombinator.com/item?id=20356610 | |
# chance of the cluster size being >5 when we can cluster events within a one-week period | |
# and the rate of events is one every 2 months (debatable, but feels right to me)? | |
# My re-write: | |
# probability of more than four outages in a given week given a rate of every other month | |
# _assuming_: totally unrelated infra (none share AWS, etc.) | |
ppois(4, lambda = 1/2/4.5, lower.tail = FALSE) # ~ 1/7M chance. | |
# Given that we've observed five in one-week, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(survival) | |
rweibull_cens <- function(n, shape, scale) { | |
# http://statwonk.com/weibull.html | |
a_random_death_time <- rweibull(n, shape = shape, scale = scale) | |
a_random_censor_time <- rweibull(n, shape = shape, scale = scale) | |
observed_time <- pmin(a_random_censor_time, a_random_death_time) | |
censor <- observed_time == a_random_death_time | |
tibble(time = observed_time, censor = censor) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(brms) | |
library(tidybayes) | |
N <- 1e3 | |
number_of_groups <- 12 | |
group_means <- rnorm(number_of_groups) | |
tibble(obs = seq_len(N)) %>% | |
mutate(group = sample(seq_len(number_of_groups), n(), replace = TRUE), | |
contribution = group_means[group], |