Skip to content

Instantly share code, notes, and snippets.

@nachocab
Last active Dec 15, 2015
Embed
What would you like to do?
Worry about correctness and repeatability, not p-values.
# generate the input data
input_data <- data.frame(muscle_strength = c("lower", "middle", "upper"), num_deaths = c(214, 143, 146), group_size = c(2920, 2919, 2923))
input_data$mortality_rate <- input_data$num_deaths / input_data$group_size # this is a ratio, the sample proportion, not the population probability
input_data$standard_error <- sqrt(input_data$mortality_rate*(1-input_data$mortality_rate)/input_data$group_size)
# visualize it
library(plyr)
viz_data <- ddply(input_data, 1, function(group){
range_num_deaths <- 0:300
df <- data.frame(muscle_strength = group$muscle_strength,
num_deaths = range_num_deaths,
mortality_rate = range_num_deaths/group$group_size ,
probability = sapply(range_num_deaths, function(x) dbinom(x, group$group_size, group$mortality_rate)))
df
})
library(ggplot2)
ggplot(viz_data, aes(mortality_rate, probability)) +
geom_area(aes(fill = muscle_strength), alpha=.5) +
geom_vline(data = input_data, aes(xintercept = mortality_rate, color = muscle_strength), linetype = 2) + xlim(.025,.1) +
geom_text(data = input_data, aes(c(.079, .045, .054), 2e-3, label = paste0(signif(mortality_rate,2)*100, "%"), color = muscle_strength), size = 8)
# Fit a logistic regression model
model_data <- data.frame(muscle_strength=rep(levels(input_data$muscle_strength), times=2), outcome = rep(c("survived","died"), each = 3), counts = c(input_data$group_size - input_data$num_deaths, input_data$num_deaths))
model_data$muscle_strength <- relevel(model_data$muscle_strength, ref = "upper")
model <- glm(outcome == "died" ~ muscle_strength, weights = model_data$counts, family = binomial(link = "logit"), data = model_data)
summary(model)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment