Dan Lewer danlewer

## gist:10581359
# save your sudoku as a tab delimited text file. Stringr is for the function str_count

library(stringr)
puz <- as.matrix(read.csv("sudpuz.txt", sep="\t", header = F))
y <- c(puz)

# make keys

sq_start <- c(1, 4, 7, 28, 31, 34, 55, 58, 61)
key <- matrix(1:81, 9, 9)

## direct_standardise.R
#-------------
# example data
#-------------

actual <- expand.grid(age = 1:3, sex = c('m', 'f')) # age groups 1:3
actual$n <- c(103, 313, 584, 606, 293, 101)
actual$true_prob <- c(0.1, 0.2, 0.4, 0.15, 0.25, 0.45)

dat <- actual
dat$sample_success <- rbinom(nrow(dat), dat$n, dat$true_prob) # study sample

## bootstrap_means_paired.R
# function

mbPaired <- function(x, y, B = 1000) {
  lx <- length(x)
  ly <- length(y)
  if (lx != ly) stop('x and y differ in length')
  # sample indices
  M <- sample(seq_len(lx), size = lx * B, replace = T)
  # calculate differences and mean of differences
  diffs <- x[M] - y[M]

## make_age_groups.R
make_age_group <- function(ages, min_val = seq(10, 100, 10)) {
  x <- findInterval(ages, min_val)
  y <- setNames(as.data.frame.list(aggregate(ages, list(x), range)), c('group', 'min', 'max'))
  y$label <- ifelse(y$min == y$max, y$min, paste0(y$min,'-',y$max))
  return(factor(x, y$group, y$label))
}

## easyRoc.R
# function
# 'screen' is numeric vector of scores from the screening tool
# 'gs' is a logical vector showing gold standard disease status
# 'vals' is a numeric vector of screening tool values to test (including all values in 'screen')
# returns a ROC plot and a list where the first value is the sensitivity and specificity for all values of 'val', and the second is the area under ROC

easyRoc <- function(screen, gs, vals) {
  pos <- screen >= rep(vals, each = length(screen))
  pos <- matrix(pos, ncol = length(vals))
  sens <- colSums(gs & pos) / sum(gs)

## linear_rescaling.R
# rescale a numeric vector to new maximum and minimum values
# original values will map to new values in a linear fashion

rescale <- function(x, MIN, MAX) { # linear rescaling
  y <- x - min(x)
  y <- y * (MAX - MIN) / max(y)
  y + MIN
}

# example

## exact_match.R
# function requires data.table

library(data.table)

#------------------------
# exact matching function
#------------------------

# stratifies dataset and then selects random observations within strata
# data = dataset containing:

## life_tables_mc.R
# this code calculates a life table using mortality rates by single-year-of-age
# confidence intervals are calculated assuming that each year-of-age is an independent sample

#-----------------
# make sample data
#-----------------

set.seed(56)
dat <- data.frame(age = 0:99)
dat$person_years <- 50000 / (1 + exp(-(seq(10, -5, length.out = nrow(dat)))))

## Last status in a vector equalling c(x, y, z)
# 'a' is a vector
# 'vals' is a vector
# returns a vector of same lengths as 'a' showing the last value in 'a' equal to any value in 'vals'
# 'fill' is the value given if none of the values in 'vals' has yet occurred in 'a'
# 'excl' is a vector specifying values of 'vals' that are to be excluded. Best used when vals is not specified (and defaults to unique(a))

last_status <- function(a, vals = unique(a), excl = NA, fill = NA) {
  if (!is.na(excl)) {vals <- setdiff(vals, excl)}
  i <- a %in% vals
  j <- a[i][cumsum(i)]

## merrimekko.R
# make a merrimekko chart (100% stacked bar chart with variable widths)
# m is a matrix
# cols is a vector of colours of same length as number of rows in matrix
# ... is other arguments passed to plot
# set par(xpd = NA) to see x-axis labels

mm <- function(m, cols = NA, ...) {
  widths <- colSums(m)
  xs <- c(0, cumsum(widths)) / sum(m)
  xl <- rep(xs[-length(xs)], each = nrow(m)) # x-left
	# save your sudoku as a tab delimited text file. Stringr is for the function str_count

	library(stringr)
	puz <- as.matrix(read.csv("sudpuz.txt", sep="\t", header = F))
	y <- c(puz)

	# make keys

	sq_start <- c(1, 4, 7, 28, 31, 34, 55, 58, 61)
	key <- matrix(1:81, 9, 9)
	#-------------
	# example data
	#-------------

	actual <- expand.grid(age = 1:3, sex = c('m', 'f')) # age groups 1:3
	actual$n <- c(103, 313, 584, 606, 293, 101)
	actual$true_prob <- c(0.1, 0.2, 0.4, 0.15, 0.25, 0.45)

	dat <- actual
	dat$sample_success <- rbinom(nrow(dat), dat$n, dat$true_prob) # study sample
	# function

	mbPaired <- function(x, y, B = 1000) {
	lx <- length(x)
	ly <- length(y)
	if (lx != ly) stop('x and y differ in length')
	# sample indices
	M <- sample(seq_len(lx), size = lx * B, replace = T)
	# calculate differences and mean of differences
	diffs <- x[M] - y[M]
	make_age_group <- function(ages, min_val = seq(10, 100, 10)) {
	x <- findInterval(ages, min_val)
	y <- setNames(as.data.frame.list(aggregate(ages, list(x), range)), c('group', 'min', 'max'))
	y$label <- ifelse(y$min == y$max, y$min, paste0(y$min,'-',y$max))
	return(factor(x, y$group, y$label))
	}
	# function
	# 'screen' is numeric vector of scores from the screening tool
	# 'gs' is a logical vector showing gold standard disease status
	# 'vals' is a numeric vector of screening tool values to test (including all values in 'screen')
	# returns a ROC plot and a list where the first value is the sensitivity and specificity for all values of 'val', and the second is the area under ROC

	easyRoc <- function(screen, gs, vals) {
	pos <- screen >= rep(vals, each = length(screen))
	pos <- matrix(pos, ncol = length(vals))
	sens <- colSums(gs & pos) / sum(gs)
	# rescale a numeric vector to new maximum and minimum values
	# original values will map to new values in a linear fashion

	rescale <- function(x, MIN, MAX) { # linear rescaling
	y <- x - min(x)
	y <- y * (MAX - MIN) / max(y)
	y + MIN
	}

	# example
	# function requires data.table

	library(data.table)

	#------------------------
	# exact matching function
	#------------------------

	# stratifies dataset and then selects random observations within strata
	# data = dataset containing:
	# this code calculates a life table using mortality rates by single-year-of-age
	# confidence intervals are calculated assuming that each year-of-age is an independent sample

	#-----------------
	# make sample data
	#-----------------

	set.seed(56)
	dat <- data.frame(age = 0:99)
	dat$person_years <- 50000 / (1 + exp(-(seq(10, -5, length.out = nrow(dat)))))
	# 'a' is a vector
	# 'vals' is a vector
	# returns a vector of same lengths as 'a' showing the last value in 'a' equal to any value in 'vals'
	# 'fill' is the value given if none of the values in 'vals' has yet occurred in 'a'
	# 'excl' is a vector specifying values of 'vals' that are to be excluded. Best used when vals is not specified (and defaults to unique(a))

	last_status <- function(a, vals = unique(a), excl = NA, fill = NA) {
	if (!is.na(excl)) {vals <- setdiff(vals, excl)}
	i <- a %in% vals
	j <- a[i][cumsum(i)]
	# make a merrimekko chart (100% stacked bar chart with variable widths)
	# m is a matrix
	# cols is a vector of colours of same length as number of rows in matrix
	# ... is other arguments passed to plot
	# set par(xpd = NA) to see x-axis labels

	mm <- function(m, cols = NA, ...) {
	widths <- colSums(m)
	xs <- c(0, cumsum(widths)) / sum(m)
	xl <- rep(xs[-length(xs)], each = nrow(m)) # x-left