Skip to content

Instantly share code, notes, and snippets.

@dantalus
Last active June 9, 2017 10:54
Show Gist options
  • Save dantalus/3ffe96112c77be50c7d1c53f2c9b100a to your computer and use it in GitHub Desktop.
Save dantalus/3ffe96112c77be50c7d1c53f2c9b100a to your computer and use it in GitHub Desktop.
urlfile <-"https://raw.githubusercontent.com/dantalus/intro_workshop/master/plot.csv"
plotDf <-read.csv(urlfile)
# install.packages(c("ggthemes", "ggbeeswarm"))
library(ggthemes)
library(ggbeeswarm)
library(ggplot2)
library(dplyr)
# WTF is a tibble?
class(plotDf)
as.tbl(plotDf)
plotDf <- as.tbl(plotDf)
class(plotDf) <- "data.frame"
library(readr)
plotDf <-read_csv(urlfile)
plotDf <-as_tbl(read.csv(urlfile))
plotDf <-read.csv(urlfile) %>% as_tbl()
# Rename variables
# Base R
plotDf$age <- plotDf$demo_age
plotDf[, -5]
plotDf[, !grepl("demo_age", names(plotDf))]
names(plotDf)[5] <- "age"
# dplyr
plotDf <-read_csv(urlfile)
rename(plotDf, age = demo_age)
# Select rows ####
# Base R
plotDf[1:5, ]
plotDf[plotDf$id > 199, ]
plotDf[plotDf$id > 199 & plotDf$demo_gender == "Male", ]
plotDf[grepl(" A", plotDf$arm), ] # character matching
r <- plotDf$id > 199
plotDf[r, ]
r <- plotDf$id %in% c(201, 202, 101)
plotDf[r, ]
subset(plotDf, id > 199)
# dplyr
filter(plotDf, id > 199 & demo_gender == "Male")
filter(plotDf, id > 199 | demo_gender == "Male")
filter(plotDf, (id > 199 & demo_gender == "Male") |
(id < 199 & demo_gender == "Female"))
# Putting rows back together
a <- filter(plotDf, id > 199)
b <- filter(plotDf, id <= 199)
rbind(a, b)
a <- filter(plotDf, id > 199) %>%
rename(age = demo_age)
b <- filter(plotDf, id <= 199)
rbind(a, b) #error
names(b) <- names(a)
# select columns ####
# Base R
plotDf$glvef %>% class()
plotDf[, 1] %>% class()
plotDf[[1]]
plotDf[1]
plotDf[1, 1]
plotDf[[1]][1]
plotDf[, "glvef"]
plotDf[, grepl("demo", names(plotDf))]
# dplyr
select(plotDf, id, glvef)
select(plotDf, glvef:id)
select(plotDf, starts_with("demo"))
select(plotDf, id, GLVEF = glvef)
plotDf <- select(plotDf, arm, id, time, everything() )
# Putting columns back together
a <- select(plotDf, id, glvef)
b <-select(plotDf, starts_with("demo"))
cbind(a, b) # danger
a <- select(plotDf, id, time, glvef)
b <- select(plotDf, id, time, starts_with("demo"))
g <- full_join(a, b, by = c("id", "time"))
cbind(a, b) # danger
# New/modify variables
# Base R
plotDf$bmi <- plotDf$demo_wt_kg / (plotDf$demo_ht_cm / 100)^2
plotDf[plotDf$id == 107, ]$demo_ht_cm <- NA
View(plotDf)
plotDf[plotDf$demo_gender == "Male", ]$demo_age <- NA
plotDf <-read_csv(urlfile)
# dplyr
plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2)
mutate(plotDf, bmi_rank = percent_rank(bmi)) %>% View()
plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2) %>%
mutate(bmi_rank = percent_rank(bmi))
# Arrange by rows
# Base R
plotDf[order(plotDf$demo_age), ]
plotDf[order(plotDf$arm, plotDf$id, plotDf$time), ]
# dplyr
arrange(plotDf, arm, id, time)
# Unique rows
plotDf[!duplicated(plotDf$id), ]
unique(plotDf[c("id")])
# dplyr
distinct(plotDf, id, .keep_all = TRUE)
distinct(plotDf, id) %>% unlist() %>% as.numeric()
distinct(plotDf, id, time, .keep_all = TRUE)
# Summarizing
summarise(plotDf,
n = n(),
mean_glvef = mean(glvef, na.rm = TRUE),
min = min(glvef, na.rm = TRUE))
group_by(plotDf, arm, demo_gender) %>%
summarise(n = n(),
mean_glvef = mean(glvef, na.rm = TRUE))
# Group, summarise, join
group_by(plotDf, arm) %>%
summarise(n = n(),
mean_glvef = mean(glvef, na.rm = TRUE)) %>%
full_join(plotDf, by = "arm") %>% View()
# Group, mutate, join
group_by(plotDf, demo_gender) %>%
mutate(bmi_rank = percent_rank(demo_ht_cm)) %>%
full_join(plotDf, by = "demo_gender") %>% View()
# Back to our plot ####
plotDf <-read.csv(urlfile)
plotDf <- mutate(plotDf, arm = factor(arm, labels = c("Placebo", "Low Dose", "High Dose"))) %>%
mutate(time = factor(time, levels = c("Baseline", "8 weeks")))
ggplot(plotDf, aes(y = glvef, shape = arm)) +
geom_violin(aes(x = as.numeric(time), group = time),
color = "grey90", fill = "grey90", width = .5) +
geom_line(aes(group = id,
x = as.numeric(time)),
alpha = 0.7, linetype = "dashed") +
facet_wrap(~arm) +
theme_base() +
scale_x_continuous(breaks = c(1, 2),
labels = c("Baseline", "8 weeks")) +
geom_point(aes(group = id, x = as.numeric(time))) +
geom_smooth(aes(x = as.numeric(time)),
method = "lm", se = FALSE, size = 2, color = "black") +
geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "Baseline"),
aes(x = 0.5,
y = mean(glvef, na.rm = TRUE),
ymax = mean(glvef, na.rm = TRUE) +
sd(glvef, na.rm = TRUE),
ymin = mean(glvef, na.rm = TRUE) -
sd(glvef, na.rm = TRUE))) +
geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "8 weeks"),
aes(x = 2.5,
y = mean(glvef, na.rm = TRUE),
ymax = mean(glvef, na.rm = TRUE) +
sd(glvef, na.rm = TRUE),
ymin = mean(glvef, na.rm = TRUE) -
sd(glvef, na.rm = TRUE))) +
geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "Baseline"),
aes(x = 0.5,
y = mean(glvef, na.rm = TRUE),
ymax = mean(glvef, na.rm = TRUE) +
sd(glvef, na.rm = TRUE),
ymin = mean(glvef, na.rm = TRUE) -
sd(glvef, na.rm = TRUE))) +
geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "8 weeks"),
aes(x = 2.5,
y = mean(glvef, na.rm = TRUE),
ymax = mean(glvef, na.rm = TRUE) +
sd(glvef, na.rm = TRUE),
ymin = mean(glvef, na.rm = TRUE) -
sd(glvef, na.rm = TRUE))) +
geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "Baseline"),
aes(x = 0.5,
y = mean(glvef, na.rm = TRUE),
ymax = mean(glvef, na.rm = TRUE) +
sd(glvef, na.rm = TRUE),
ymin = mean(glvef, na.rm = TRUE) -
sd(glvef, na.rm = TRUE))) +
geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "8 weeks"),
aes(x = 2.5,
y = mean(glvef, na.rm = TRUE),
ymax = mean(glvef, na.rm = TRUE) +
sd(glvef, na.rm = TRUE),
ymin = mean(glvef, na.rm = TRUE) -
sd(glvef, na.rm = TRUE))) +
# scale_color_brewer(guide = FALSE, palette = "Set1") +
# scale_fill_manual(guide = FALSE, palette = "Set1") +
scale_shape(guide = FALSE) +
xlab("")+
ylab("GLVEF (%)") +
ylim(0, 75) +
theme(panel.spacing = unit(2, "lines"),
panel.border = element_rect(color = "white"),
axis.text.y = element_text(size = 16),
strip.text = element_text(size = 16))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment