Last active
June 9, 2017 10:54
-
-
Save dantalus/3ffe96112c77be50c7d1c53f2c9b100a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
urlfile <-"https://raw.githubusercontent.com/dantalus/intro_workshop/master/plot.csv" | |
plotDf <-read.csv(urlfile) | |
# install.packages(c("ggthemes", "ggbeeswarm")) | |
library(ggthemes) | |
library(ggbeeswarm) | |
library(ggplot2) | |
library(dplyr) | |
# WTF is a tibble? | |
class(plotDf) | |
as.tbl(plotDf) | |
plotDf <- as.tbl(plotDf) | |
class(plotDf) <- "data.frame" | |
library(readr) | |
plotDf <-read_csv(urlfile) | |
plotDf <-as_tbl(read.csv(urlfile)) | |
plotDf <-read.csv(urlfile) %>% as_tbl() | |
# Rename variables | |
# Base R | |
plotDf$age <- plotDf$demo_age | |
plotDf[, -5] | |
plotDf[, !grepl("demo_age", names(plotDf))] | |
names(plotDf)[5] <- "age" | |
# dplyr | |
plotDf <-read_csv(urlfile) | |
rename(plotDf, age = demo_age) | |
# Select rows #### | |
# Base R | |
plotDf[1:5, ] | |
plotDf[plotDf$id > 199, ] | |
plotDf[plotDf$id > 199 & plotDf$demo_gender == "Male", ] | |
plotDf[grepl(" A", plotDf$arm), ] # character matching | |
r <- plotDf$id > 199 | |
plotDf[r, ] | |
r <- plotDf$id %in% c(201, 202, 101) | |
plotDf[r, ] | |
subset(plotDf, id > 199) | |
# dplyr | |
filter(plotDf, id > 199 & demo_gender == "Male") | |
filter(plotDf, id > 199 | demo_gender == "Male") | |
filter(plotDf, (id > 199 & demo_gender == "Male") | | |
(id < 199 & demo_gender == "Female")) | |
# Putting rows back together | |
a <- filter(plotDf, id > 199) | |
b <- filter(plotDf, id <= 199) | |
rbind(a, b) | |
a <- filter(plotDf, id > 199) %>% | |
rename(age = demo_age) | |
b <- filter(plotDf, id <= 199) | |
rbind(a, b) #error | |
names(b) <- names(a) | |
# select columns #### | |
# Base R | |
plotDf$glvef %>% class() | |
plotDf[, 1] %>% class() | |
plotDf[[1]] | |
plotDf[1] | |
plotDf[1, 1] | |
plotDf[[1]][1] | |
plotDf[, "glvef"] | |
plotDf[, grepl("demo", names(plotDf))] | |
# dplyr | |
select(plotDf, id, glvef) | |
select(plotDf, glvef:id) | |
select(plotDf, starts_with("demo")) | |
select(plotDf, id, GLVEF = glvef) | |
plotDf <- select(plotDf, arm, id, time, everything() ) | |
# Putting columns back together | |
a <- select(plotDf, id, glvef) | |
b <-select(plotDf, starts_with("demo")) | |
cbind(a, b) # danger | |
a <- select(plotDf, id, time, glvef) | |
b <- select(plotDf, id, time, starts_with("demo")) | |
g <- full_join(a, b, by = c("id", "time")) | |
cbind(a, b) # danger | |
# New/modify variables | |
# Base R | |
plotDf$bmi <- plotDf$demo_wt_kg / (plotDf$demo_ht_cm / 100)^2 | |
plotDf[plotDf$id == 107, ]$demo_ht_cm <- NA | |
View(plotDf) | |
plotDf[plotDf$demo_gender == "Male", ]$demo_age <- NA | |
plotDf <-read_csv(urlfile) | |
# dplyr | |
plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2) | |
mutate(plotDf, bmi_rank = percent_rank(bmi)) %>% View() | |
plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2) %>% | |
mutate(bmi_rank = percent_rank(bmi)) | |
# Arrange by rows | |
# Base R | |
plotDf[order(plotDf$demo_age), ] | |
plotDf[order(plotDf$arm, plotDf$id, plotDf$time), ] | |
# dplyr | |
arrange(plotDf, arm, id, time) | |
# Unique rows | |
plotDf[!duplicated(plotDf$id), ] | |
unique(plotDf[c("id")]) | |
# dplyr | |
distinct(plotDf, id, .keep_all = TRUE) | |
distinct(plotDf, id) %>% unlist() %>% as.numeric() | |
distinct(plotDf, id, time, .keep_all = TRUE) | |
# Summarizing | |
summarise(plotDf, | |
n = n(), | |
mean_glvef = mean(glvef, na.rm = TRUE), | |
min = min(glvef, na.rm = TRUE)) | |
group_by(plotDf, arm, demo_gender) %>% | |
summarise(n = n(), | |
mean_glvef = mean(glvef, na.rm = TRUE)) | |
# Group, summarise, join | |
group_by(plotDf, arm) %>% | |
summarise(n = n(), | |
mean_glvef = mean(glvef, na.rm = TRUE)) %>% | |
full_join(plotDf, by = "arm") %>% View() | |
# Group, mutate, join | |
group_by(plotDf, demo_gender) %>% | |
mutate(bmi_rank = percent_rank(demo_ht_cm)) %>% | |
full_join(plotDf, by = "demo_gender") %>% View() | |
# Back to our plot #### | |
plotDf <-read.csv(urlfile) | |
plotDf <- mutate(plotDf, arm = factor(arm, labels = c("Placebo", "Low Dose", "High Dose"))) %>% | |
mutate(time = factor(time, levels = c("Baseline", "8 weeks"))) | |
ggplot(plotDf, aes(y = glvef, shape = arm)) + | |
geom_violin(aes(x = as.numeric(time), group = time), | |
color = "grey90", fill = "grey90", width = .5) + | |
geom_line(aes(group = id, | |
x = as.numeric(time)), | |
alpha = 0.7, linetype = "dashed") + | |
facet_wrap(~arm) + | |
theme_base() + | |
scale_x_continuous(breaks = c(1, 2), | |
labels = c("Baseline", "8 weeks")) + | |
geom_point(aes(group = id, x = as.numeric(time))) + | |
geom_smooth(aes(x = as.numeric(time)), | |
method = "lm", se = FALSE, size = 2, color = "black") + | |
geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "Baseline"), | |
aes(x = 0.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "8 weeks"), | |
aes(x = 2.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "Baseline"), | |
aes(x = 0.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "8 weeks"), | |
aes(x = 2.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "Baseline"), | |
aes(x = 0.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "8 weeks"), | |
aes(x = 2.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
# scale_color_brewer(guide = FALSE, palette = "Set1") + | |
# scale_fill_manual(guide = FALSE, palette = "Set1") + | |
scale_shape(guide = FALSE) + | |
xlab("")+ | |
ylab("GLVEF (%)") + | |
ylim(0, 75) + | |
theme(panel.spacing = unit(2, "lines"), | |
panel.border = element_rect(color = "white"), | |
axis.text.y = element_text(size = 16), | |
strip.text = element_text(size = 16)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment