Data via Goodreads + Day One
library(tidyverse)
library(forcats)
library(ggstance)
library(gridExtra)
library(viridis)
library(scales)
library(lubridate)
theme_reading <- function(base_size = 11) {
theme_minimal(base_family = "Raleway", base_size = base_size) +
theme(plot.title = element_text(family = "Raleway SemiBold", face = "plain", size = rel(1.4)),
plot.subtitle = element_text(family = "Raleway ExtraLight", face = "plain"),
plot.caption = element_text(family = "Raleway ExtraLight", face = "plain", size = rel(0.8)))
}
# Load books
books_raw <- read_csv("data/goodreads_library_export.csv") %>%
rename(date_read = `Date Read`,
pages = `Number of Pages`,
year_published = `Year Published`,
rating = `My Rating`, avg_rating = `Average Rating`) %>%
mutate(Title = recode(Title, `Holy Bible with The Apocrypha New Revised Standard Version` =
"The New Testament (NRSV)")) %>%
mutate(Binding = recode(Binding,
`Kindle Edition` = "Kindle",
`ebook` = "Kindle",
`paper` = "Paperback")) %>%
mutate(pages = ifelse(`Book Id` == 6599138, 205, pages))
books_religious <- books_raw %>%
filter(Bookshelves == "religious") %>%
filter(date_read > ymd("2015-01-21")) %>%
arrange(date_read)
# Export books for manual categorization
books_religious %>%
select(Title, Author) %>%
write_csv("data/categorized_WILL_BE_OVERWRITTEN.csv")
categorized <- read_csv("data/categorized.csv") %>%
select(-Author)
# Add manual categorization
books <- books_religious %>%
left_join(categorized, by = "Title")
# Pretty output for Markdown
books_pretty <- books %>%
mutate(Author.clean = ifelse(is.na(`Additional Authors`), Author,
paste0(Author, " (", `Additional Authors`, ")"))) %>%
select(Title, Author = Author.clean, Publisher,
`Year published` = year_published,
`Date finished` = date_read, Rating = rating,
Category, Type = Binding, Pages = pages)
# library(jsonlite)
# fromJSON("data/Gospel study.json")$entries %>%
# select(timeZone, creationDate) %>%
# write_csv("data/reading_times.csv")
reading_raw <- read_csv("data/reading_times.csv")
reading <- reading_raw %>%
mutate(creationDate = ymd_hms(creationDate, tz = "UTC")) %>%
nest(creationDate, timeZone) %>%
# Convert UTC datetime to local datetime based on timezone column
# Except ughhhh this doesn't work with lubridate::with_tz()
# https://stackoverflow.com/questions/33848563/with-tz-with-a-vector-of-timezones
# https://github.com/tidyverse/lubridate/issues/359
mutate(local_datetime = data %>% map(~ format(.x$creationDate,
tz = .x$timeZone[1],
.usetz = TRUE))) %>%
unnest() %>%
mutate(local_datetime = ymd_hms(local_datetime)) %>%
# If the reading time is after midnight, but before 5 AM, count it as the previous day
# mutate(bloop = hour(study.date)) %>%
mutate(reading_date_actual = if_else(hour(local_datetime) < 5,
local_datetime - days(1),
local_datetime),
reading_date_only = ymd(format(reading_date_actual, "%Y-%m-%d")))
reading_pre <- reading %>%
filter(reading_date_actual <= ymd("2015-01-21")) %>%
select(reading_day = reading_date_only) %>%
mutate(read = TRUE)
calendar <- data_frame(reading_day = seq(ymd("2014-10-06"), ymd("2017-10-15"),
by = "1 day")) %>%
left_join(reading_pre, by = "reading_day") %>%
mutate(read = ifelse(is.na(read) & reading_day >= ymd("2015-01-20"),
TRUE, read)) %>%
replace_na(list(read = FALSE)) %>%
mutate(month_year = paste(month(reading_day), year(reading_day)),
day = day(reading_day)) %>%
mutate(month_year = fct_rev(fct_inorder(month_year, ordered = TRUE)))
nice_months <- expand.grid(month = month(1:12, label = TRUE, abbr = FALSE),
year = 2014:2017) %>%
mutate(temp_date = ymd(paste(year, month, "1"))) %>%
filter(temp_date >= ymd("2014-10-01"), temp_date <= ymd("2017-10-01")) %>%
mutate(month_year_clean = case_when(
month == "January" ~ paste0(year, " ", month),
month == "October" & year == 2014 ~ paste0(year, " ", month),
TRUE ~ as.character(month)
))
plot_calendar <- ggplot(calendar, aes(x = day, y = month_year, fill = read)) +
geom_tile(color = "white", size = 0.25) +
scale_fill_manual(values = c("grey90", "#F0A84F")) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_discrete(labels = rev(nice_months$month_year_clean)) +
labs(x = NULL, y = NULL, title = "Don’t break the chain!",
subtitle = "After months of sputtering, the chain started on January 15, 2015") +
guides(fill = FALSE) +
coord_equal() +
theme_reading() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.text.x = element_blank())
plot_calendar
ggsave(plot_calendar, filename = "output/chain-calendar.png",
width = 6, height = 6, dpi = 300, type = "cairo")
reading_streak <- reading %>%
filter(local_datetime > ymd("2015-01-21")) %>%
# For the sake of the histogram, move any times between midnight and 5 AM to
# the next day
mutate(time_only = case_when(
hour(local_datetime) >= 0 & hour(local_datetime) <= 5 ~
update(local_datetime, year = 2017, month = 10, mday = 14),
TRUE ~ update(local_datetime, year = 2017, month = 10, mday = 13)
))
plot_times <- ggplot(reading_streak, aes(x = time_only)) +
geom_histogram(binwidth = 15 * 60, fill = "#84329F") +
theme(axis.text.x = element_text(angle = 45)) +
scale_x_datetime(breaks = date_breaks("2 hour"),
labels = date_format("%H:%M")) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = NULL, y = "Times read at given time",
title = "Do not procrastinate the [time] of your [reading]*",
subtitle = "lolz so many midnight readings", caption = "*Alma 34:33") +
theme_reading() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
axis.text.x = element_text(angle = 0))
plot_times
ggsave(plot_times, filename = "output/reading-times.png",
width = 6, height = 3, dpi = 300, type = "cairo")
md_books <- books_pretty %>%
mutate(Title = paste0("*", Title, "*"))
# md_books %>% knitr::kable()
write_csv(books_pretty, path = "output/books-read.csv")
# DT::datatable(books_pretty)
pander::pandoc.table.return(books, split.tables = Inf, style = "rmarkdown") %>%
cat(file = "output/books-read.md")
books_types <- books %>%
group_by(Binding) %>%
summarize(num = n()) %>%
ungroup() %>%
arrange(desc(num)) %>%
mutate(Binding = fct_inorder(Binding, ordered = TRUE))
plot_types <- ggplot(books_types, aes(x = num, y = fct_rev(Binding), color = Binding)) +
geom_pointrangeh(aes(xmin = 0, xmax = num), size = 2, fatten = 3) +
scale_color_viridis(discrete = TRUE, option = "plasma", begin = 0.3, end = 0.8) +
scale_x_continuous(expand = expand_scale(add = c(0, 1))) +
labs(x = "Number of books", y = NULL, title = "Physical > electronic",
subtitle = "I obviously prefer physical books over electronic ones") +
guides(color = FALSE) +
theme_reading() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank())
plot_types
ggsave(plot_types, filename = "output/types.png",
width = 6, height = 2, dpi = 300, type = "cairo")
books_categories <- books %>%
group_by(Category) %>%
summarize(num = n(),
pages = sum(pages)) %>%
ungroup() %>%
arrange(desc(num)) %>%
mutate(Category = fct_inorder(Category, ordered = TRUE))
num_books <- ggplot(books_categories, aes(x = num, y = fct_rev(Category), fill = Category)) +
geom_barh(stat = "identity") +
scale_fill_viridis(discrete = TRUE, option = "plasma") +
scale_x_reverse(expand = c(.1, .1)) +
labs(x = "Number of books", y = NULL, title = "I’m a Mormon... obviously",
subtitle = "I read more pages from the Qur’an than from either the Bible or Book of Mormon,\nbut that’s because of the book's intensely detailed commentaries") +
guides(fill = FALSE) +
theme_reading() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_blank())
num_pages <- ggplot(books_categories, aes(x = pages, y = fct_rev(Category), fill = Category)) +
geom_barh(stat = "identity") +
scale_fill_viridis(discrete = TRUE, option = "plasma") +
scale_x_continuous(labels = scales::comma, expand = c(.1, .1)) +
labs(x = "Number of pages", y = NULL) +
guides(fill = FALSE) +
theme_reading() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(hjust=0.5))
books_pages_combined <- cbind(ggplotGrob(num_books),
ggplotGrob(num_pages))
grid::grid.newpage()
grid::grid.draw(books_pages_combined)
ggsave(books_pages_combined, filename = "output/books-pages.png",
width = 6, height = 4, dpi = 300, type = "cairo")
# Use Font Awesome icons!
# Look at the Unicode lookup table at http://fontawesome.io/cheatsheet/
#  = \UF005
#
# Download font from https://github.com/FortAwesome/Font-Awesome
fa_star <- "\Uf005"
fa_star_o <- "\Uf006"
stars3 <- paste(fa_star, fa_star, fa_star, fa_star_o, fa_star_o, collapse = "")
stars4 <- paste(fa_star, fa_star, fa_star, fa_star, fa_star_o, collapse = "")
stars5 <- paste(fa_star, fa_star, fa_star, fa_star, fa_star, collapse = "")
books_ratings <- books %>%
group_by(rating) %>%
summarize(num = n()) %>%
ungroup() %>%
mutate(rating = recode(rating, `3` = stars3, `4` = stars4, `5` = stars5))
plot_ratings <- ggplot(books_ratings, aes(x = num, y = fct_rev(rating), color = rating)) +
geom_pointrangeh(aes(xmin = 0, xmax = num), size = 2, fatten = 3) +
scale_color_viridis(discrete = TRUE, option = "plasma") +
scale_x_continuous(expand = expand_scale(add = c(0, 1))) +
labs(x = "Number of books", y = NULL, title = "Stars everywhere",
subtitle = "I loved pretty much every book I read (sorry Augstine's Confessions, though)") +
guides(color = FALSE) +
theme_reading() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(family = "FontAwesome"))
plot_ratings
ggsave(plot_ratings, filename = "output/ratings.png",
width = 6, height = 2, dpi = 300, type = "cairo")