Skip to content

Instantly share code, notes, and snippets.

@turgeonmaxime
turgeonmaxime / extract_annotations.R
Created March 15, 2024 15:18
Extract some filename annotations using regex
library(stringr)
data_path <- 'somefolder/planet04_rgbn_c9_r8.txt'
# We want to extract 9 and 8
basename(data_path)
# Too many numbers
str_extract_all(basename(data_path), "([0-9]+)")
# [[1]]
@turgeonmaxime
turgeonmaxime / sens_spec_sim.R
Last active June 17, 2022 22:10
Sensitivity and specificity are not properties of the test, they depend on the population
library(tidyverse)
expit <- function(t) exp(t)/(1 + exp(t))
n <- 1000000
prev_vec <- c(0.01, 0.05, 0.1, 0.25, 0.5)
results <- purrr::map_df(prev_vec, \(prev) {
# Generate data
dvec <- rbinom(n, prob = prev, size = 1)
@turgeonmaxime
turgeonmaxime / dotplot_ToothGrowth.R
Created April 28, 2022 20:47
Dotplot ggplot bug?
library(tidyverse)
library(cowplot)
gg1 <- ggplot(ToothGrowth, aes(x = len)) +
geom_dotplot(aes(fill = as.factor(dose)),
binwidth = .5) +
guides(fill = FALSE)
ToothGrowth2 <- arrange(ToothGrowth, len)
gg2 <- ggplot(ToothGrowth, aes(x = len, group = factor(dose))) +
library(tidyverse)
library(nycflights13)

delay <- flights |> 
    group_by(dest) |> 
    summarize(means = mean(dep_delay, na.rm = TRUE))

joined <- inner_join(delay, flights, by = "dest")
@turgeonmaxime
turgeonmaxime / cox_cb_poisson_comparison.R
Last active November 7, 2021 18:11
Comparing Cox regression, Poisson regression with splines, and case-base sampling with splines on two datasets: lung from survival, ERSPC from casebase
#----
# Poisson vs casebase
# authors: Max Turgeon, Jesse Islam and Sahir Bhatnagar
# date: 10/1/2021
#----
set.seed(1952)
library(casebase)
library(cowplot)
library(Epi)
@turgeonmaxime
turgeonmaxime / t-test_outliers.md
Last active October 28, 2021 19:26
Simulation study looking at the effect of outliers on the Type I error rate of the t-test
B <- 1000
n <- 20
sigma <- 10
p <- 0.9
alpha <- 0.05

results <- replicate(B, {
    norm_vars1 <- rnorm(n)
 # Contaminated normal
@turgeonmaxime
turgeonmaxime / casebase_inset.R
Created September 30, 2021 20:37
Adding inset plots using the cowplot package
library(casebase)
library(survival)
library(splines)
library(tidyverse)
library(cowplot)
# 1. Fit casebase with splines----
data("ERSPC")
ERSPC <- mutate(ERSPC, ScrArm = factor(ScrArm,
levels = c(0,1),
@turgeonmaxime
turgeonmaxime / sk_vaccine.R
Last active August 4, 2021 19:54
Visualize vaccine uptake in Saskatchewan using the Government's official numbers
library(tidyverse)
library(RcppRoll)
library(rvest)
# Create temporary directory
tmp_dir <- tempdir()
file_path <- paste0(tmp_dir, "/vaccine_sk.csv")
# Download file
@turgeonmaxime
turgeonmaxime / covid_alberta_sask.R
Last active August 8, 2020 19:23
Visualization of COVID-19 in Canada
library(tidyverse)
library(lubridate)
library(readr)
data_hr <- read_csv("https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_hr/cases_timeseries_hr.csv")
# Coerce string to dates and factor provinces
data_cum <- data_hr %>%
filter(province %in% c("Alberta", "Saskatchewan")) %>%
mutate(date_report = lubridate::dmy(date_report)) %>%
@turgeonmaxime
turgeonmaxime / covid_sk_jitter.R
Last active June 5, 2020 20:36
Visualize COVID cases in Saskatchewan
library(tidyverse)
library(lubridate)
library(readr)
library(ggbeeswarm)
# Download data on cases by health region
data_hr <- read_csv("https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_hr/cases_timeseries_hr.csv")
# Filter for province, coerce string to dates, and add variable
# to highlight certain regions