Skip to content

Instantly share code, notes, and snippets.

@dantalus
Last active April 3, 2017 06:01
Show Gist options
  • Save dantalus/d619b129867aee7ed4063e31d13ee2e5 to your computer and use it in GitHub Desktop.
Save dantalus/d619b129867aee7ed4063e31d13ee2e5 to your computer and use it in GitHub Desktop.
# Table 1
# A basic, descriptive table that you would usually see as Table 1 in a
# publication
# Tests if multiple groups (data$arm)
tests.1 <- function(data, ...) {
tests.list <- list()
require(dplyr)
require(broom)
for (j in seq_along(data)) {
if(is.numeric(data[[j]])){
t <- aov(data[[j]] ~ arm, data) %>%
tidy()
tests.list[[j]] <- round(t$p.value[1], 2)
}
if(is.factor(data[[j]])){
c <- table(data[[j]], data$arm) %>%
chisq.test() %>%
tidy()
tests.list[[j]] <- c(round(c$p.value[1], 2),
rep("", length(levels(data[[j]]))))
}
}
unlist(tests.list)
}
tests.2 <- function(data, ...) {
tests.list <- list()
require(dplyr)
require(broom)
for (j in seq_along(data)) {
if(is.numeric(data[[j]])){
k <- kruskal.test(data[[j]] ~ arm, data) %>%
tidy()
tests.list[[j]] <- round(k$p.value[1], 2)
}
if(is.factor(data[[j]])){
c <- table(data[[j]], data$arm) %>%
chisq.test() %>%
tidy()
tests.list[[j]] <- c(round(c$p.value[1], 2),
rep("", length(levels(data[[j]]))))
}
}
unlist(tests.list)
}
# Generate the list of names for the table
name.1 <- function(x, ...) {
var.names <- list()
for (i in seq_along(x)) {
if(is.numeric(x[[i]])){
var.names[[i]] <- names(x[i])
}
if(is.factor(x[[i]])){
var.names[[i]] <- c(names(x[i]), levels(x[[i]]))
}
}
unlist(var.names)
}
# Means(sds) or counts(%)
summary.1 <- function(x, ...) {
summary.list <- list()
for (i in seq_along(x)) {
if(is.numeric(x[[i]])){
summary.list[[i]] <- paste0(round(mean(x[[i]], na.rm = TRUE), 1),
" \u00B1 ",
round(sd(x[[i]], na.rm = TRUE), 1))
}
if(is.factor(x[[i]])){
summary.list[[i]] <- c("", paste0(table(x[[i]]),
" (",
round(table(x[[i]]) /
sum(table(x[[i]])), 3) * 100,
"%)"))
}
}
unlist(summary.list)
}
summary.2 <- function(x, ...) {
summary.list <- list()
for (i in seq_along(x)) {
if(is.numeric(x[[i]])){
summary.list[[i]] <- paste0(round(quantile(x[[i]], probs = c(0.50),
na.rm = TRUE), 1),
" [",
round(quantile(x[[i]], probs = c(0.25),
na.rm = TRUE), 1),
", ",
round(quantile(x[[i]], probs = c(0.75),
na.rm = TRUE), 1),
"]")
}
if(is.factor(x[[i]])){
summary.list[[i]] <- c("", paste0(table(x[[i]]),
" (",
round(table(x[[i]]) /
sum(table(x[[i]])), 3) * 100,
"%)"))
}
}
unlist(summary.list)
}
# Missing observations
n.miss <- function(x, ...) {
miss.list <- list()
for (i in seq_along(x)) {
if(is.numeric(x[[i]])){
miss.list[[i]] <- length(x[[i]][!is.na(x[[i]])])
}
if(is.factor(x[[i]])){
miss.list[[i]] <- c(length(x[[i]][!is.na(x[[i]])]),
rep("", length(levels(x[[i]]))))
}
}
unlist(miss.list)
}
# Min and max
min.max <- function(x, ...) {
min.max.list <- list()
for (i in seq_along(x)) {
if(is.numeric(x[[i]])){
min.max.list[[i]] <- paste0("(",
round(min(x[[i]], na.rm = TRUE), 1),
", ",
round(max(x[[i]], na.rm = TRUE), 1),
")")
}
if(is.factor(x[[i]])){
min.max.list[[i]] <- c("", rep("", length(levels(x[[i]]))))
}
}
unlist(min.max.list)
}
# Quartiles
tiles <- function(x, ...) {
quantiles.list <- list()
for (i in seq_along(x)) {
if(is.numeric(x[[i]])){
quantiles.list[[i]] <- paste0(round(quantile(x[[i]], probs = c(0.25),
na.rm = TRUE), 1),
", ",
round(quantile(x[[i]], probs = c(0.50),
na.rm = TRUE), 1),
", ",
round(quantile(x[[i]], probs = c(0.75),
na.rm = TRUE), 1))
}
if(is.factor(x[[i]])){
quantiles.list[[i]] <- c("", rep("", length(levels(x[[i]]))))
}
}
unlist(quantiles.list)
}
# Median, IQR
med.iqr <- function(x, ...) {
quantiles.list <- list()
for (i in seq_along(x)) {
if(is.numeric(x[[i]])){
quantiles.list[[i]] <- paste0(round(quantile(x[[i]], probs = c(0.5),
na.rm = TRUE), 1),
" (",
round(quantile(x[[i]], probs = c(0.25),
na.rm = TRUE), 1),
", ",
round(quantile(x[[i]], probs = c(0.75),
na.rm = TRUE), 1),
")")
}
if(is.factor(x[[i]])){
quantiles.list[[i]] <- c("", rep("", length(levels(x[[i]]))))
}
}
unlist(quantiles.list)
}
# Select the data
table.1.data <- select(data,
var1, var2)
# Give more descriptive names
colnames(table.1.data) <- c("var1", "var2")
# Give factor levels better names if neeed
# Put it all together
data_frame(Variable = name.1(x),
Obs = n.miss(x),
col2 = summary.1(x),
"(Min, Max)" = min.max(x),
"25th, 50th, 75th quantiles" = tiles(x)) %>%
# Export html table for Word
stargazer(type = "html",
summary = FALSE,
out = "table1.htm",
digits = 1, rownames = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment