Skip to content

Instantly share code, notes, and snippets.

View danlewer's full-sized avatar

Dan Lewer danlewer

View GitHub Profile
# linear interpolation of lines between x and y coordinates, with option to find specific intercepts
# x must be positive monotonic, y does not need to be
interpolate <- function (x, y, xIncrements = 0.01, findx = NULL, findy = NULL) {
if (length(x) != length(y)) stop('length(x) does not equal length(y)')
if (is.unsorted(x)) stop('x is not positive monotonic')
dx <- diff(x)
dy <- diff(y)
xNotches <- round(dx / xIncrements, 0)
yIncrements <- dy / xNotches
# compare Cohen's kappa and % agreement
# for two raters making a binary (yes/no) decision
# assuming the same prevalence for both raters
library(viridisLite)
k <- function (Po, prevalence) {
Pe <- (prevalence^2) + ((1-prevalence)^2)
(Po - Pe) / (1 - Pe)
}
# Here's a function that allows you to specify the number of letters you want (eg. 5 would be A, B, C, D, and E), the length of the code (eg. 3 would be AAA, AAB, AAC, etc), the number of results you want (NA for all of them), and and separating character (eg. '-' would give A-A-A, A-A-B, A-A-C.)
letterCodes <- function(nletters, case = 'upper', lengthCode, nResults = NA, sep = '') {
f <- if (case == 'upper') LETTERS else letters
a <- expand.grid(rep(list(f[1:nletters]), lengthCode))
a <- a[, ncol(a):1]
a <- do.call("paste", c(a, sep = sep))
if (is.na(nResults)) a else a[1:min(nResults, length(a))]
}
# return a table, but specify the values you want to be tabulated
# works like base::table, but supplies only specified values, and returns 0 if none of those values exist in the vector
tab_specific_values <- function(vector, values = unique(vector)) `names<-`(rowSums(outer(values, vector, `==`)), values)
which.median <- function (x, ties.method = c('first', 'last', 'random', 'all'), ...) {
med <- median(x, ...)
dif <- abs(med - x)
ind <- which(dif == min(dif))
if (length(ind) > 1 & ties.method[1] != 'all') {
ind <- if (ties.method[1] == 'first') ind[1] else {if (ties.method[1] == 'last') tail(ind, 1) else sample(ind, 1)}
}
return (ind)
}
install.packages('Rtools')
install.packages('data.table')
install.packages('stringi')
install.packages('stringr')
install.packages('Amelia')
install.packages('epitools')
install.packages('epiR')
install.packages('RColorBrewer')
install.packages('viridis')
install.packages('viridisLite')
@danlewer
danlewer / montyHall.R
Created December 3, 2021 08:47
Simulate the Monty Hall problem
# function runs the monty hall problem 'n' times and returns the proportions of 'wins' under sticking and switching strategies
montyHall <- function (n = 1e5) {
prize <- sample(1:3, n, replace = T)
firstChoice <- sample(1:3, n, replace = T)
reveal <- matrix(rep.int(1L, n * 3), ncol = 3)
reveal[cbind(rep(seq_len(n), 2), c(prize, firstChoice))] <- 0L
reveal <- max.col(reveal, ties.method = 'random')
switchTo <- 6L - (firstChoice + reveal)
c(stick = mean(firstChoice == prize), switch = mean(switchTo == prize))
# Calculate predicted Forced Exhaled Volume in 1 second (FEV1), based on age, sex, height, and ethnicity
# Function takes vectors
# Formula is from:
# ERS TASK FORCE REPORT. Multi-ethnic reference values for spirometry for the 3-95-yr age range: the global lung function 2012 equations
# Philip H.
# Source: Guideline 2012
# https://www.ers-education.org/lr/show-details/?idP=138497
fev1pred <- function (sex = 'f', heightCM = 180, ageYears = 50, eth = 'White') {
ind <- (sex == 'f') + 1
# following the observation in Lewis et al (2005) https://onlinelibrary.wiley.com/doi/abs/10.1002/pds.1115
# that incidence of various events is higher in the months after patients join an electronic database
# this code provides a function for measuring incidence stratified by time after joining a cohort
# -- function reporting incidence stratified by duration after cohort entry --
# reformat dates as integers with an arbitrary origin, e.g. 1970-01-01
# entry = date of cohort entry
# exit = date of cohort exit
# diagnosis = date of event
# enter a maximum value in your data and generate tick-points for a plot axis
yax <- function(x, tickabove = F, ntick = 5) {
l <- c(c(1, 2, 4, 5, 25) %o% 10^(0:8))
d <- l[which.min(abs(x/ntick - l))]
d <- 0:(ntick+1) * d
i <- findInterval(x, d)
if (tickabove) {i <- i + 1}
d[seq_len(i)]
}