andrewheiss/reading-charts.md

## reading-charts.md

      
    Raw
  

              reading-charts.md
            
          
    Data via Goodreads + Day One
Load and process data

library(tidyverse)
library(forcats)
library(ggstance)
library(gridExtra)
library(viridis)
library(scales)
library(lubridate)

theme_reading <- function(base_size = 11) {
  theme_minimal(base_family = "Raleway", base_size = base_size) +
    theme(plot.title = element_text(family = "Raleway SemiBold", face = "plain", size = rel(1.4)),
          plot.subtitle = element_text(family = "Raleway ExtraLight", face = "plain"),
          plot.caption = element_text(family = "Raleway ExtraLight", face = "plain", size = rel(0.8)))
}
# Load books
books_raw <- read_csv("data/goodreads_library_export.csv") %>%
  rename(date_read = `Date Read`,
         pages = `Number of Pages`,
         year_published = `Year Published`,
         rating = `My Rating`, avg_rating = `Average Rating`) %>%
  mutate(Title = recode(Title, `Holy Bible with The Apocrypha New Revised Standard Version` = 
                          "The New Testament (NRSV)")) %>%
  mutate(Binding = recode(Binding, 
                          `Kindle Edition` = "Kindle",
                          `ebook` = "Kindle",
                          `paper` = "Paperback")) %>%
  mutate(pages = ifelse(`Book Id` == 6599138, 205, pages))
  
books_religious <- books_raw %>%
  filter(Bookshelves == "religious") %>%
  filter(date_read > ymd("2015-01-21")) %>%
  arrange(date_read)
  
# Export books for manual categorization
books_religious %>%
  select(Title, Author) %>%
  write_csv("data/categorized_WILL_BE_OVERWRITTEN.csv")
  
categorized <- read_csv("data/categorized.csv") %>%
  select(-Author)
  
# Add manual categorization
books <- books_religious %>%
  left_join(categorized, by = "Title")
  
# Pretty output for Markdown
books_pretty <- books %>%
  mutate(Author.clean = ifelse(is.na(`Additional Authors`), Author, 
                               paste0(Author, " (", `Additional Authors`, ")"))) %>%
  select(Title, Author = Author.clean, Publisher, 
         `Year published` = year_published, 
         `Date finished` = date_read, Rating = rating,
         Category, Type = Binding, Pages = pages)
# library(jsonlite)
# fromJSON("data/Gospel study.json")$entries %>%
#   select(timeZone, creationDate) %>%
#   write_csv("data/reading_times.csv")

reading_raw <- read_csv("data/reading_times.csv")

reading <- reading_raw %>%
  mutate(creationDate = ymd_hms(creationDate, tz = "UTC")) %>%
  nest(creationDate, timeZone) %>%
  # Convert UTC datetime to local datetime based on timezone column
  # Except ughhhh this doesn't work with lubridate::with_tz()
  # https://stackoverflow.com/questions/33848563/with-tz-with-a-vector-of-timezones
  # https://github.com/tidyverse/lubridate/issues/359
  mutate(local_datetime = data %>% map(~ format(.x$creationDate, 
                                                tz = .x$timeZone[1], 
                                                .usetz = TRUE))) %>%
  unnest() %>%
  mutate(local_datetime = ymd_hms(local_datetime)) %>%
  # If the reading time is after midnight, but before 5 AM, count it as the previous day
  # mutate(bloop = hour(study.date)) %>%
  mutate(reading_date_actual = if_else(hour(local_datetime) < 5, 
                                       local_datetime - days(1),
                                       local_datetime),
         reading_date_only = ymd(format(reading_date_actual, "%Y-%m-%d")))
The chain!

reading_pre <- reading %>%
  filter(reading_date_actual <= ymd("2015-01-21")) %>%
  select(reading_day = reading_date_only) %>%
  mutate(read = TRUE)
  
calendar <- data_frame(reading_day = seq(ymd("2014-10-06"), ymd("2017-10-15"),
                                         by = "1 day")) %>%
  left_join(reading_pre, by = "reading_day") %>%
  mutate(read = ifelse(is.na(read) & reading_day >= ymd("2015-01-20"),
                       TRUE, read)) %>%
  replace_na(list(read = FALSE)) %>%
  mutate(month_year = paste(month(reading_day), year(reading_day)),
         day = day(reading_day)) %>%
  mutate(month_year = fct_rev(fct_inorder(month_year, ordered = TRUE)))
  
nice_months <- expand.grid(month = month(1:12, label = TRUE, abbr = FALSE),
                           year = 2014:2017) %>%
  mutate(temp_date = ymd(paste(year, month, "1"))) %>%
  filter(temp_date >= ymd("2014-10-01"), temp_date <= ymd("2017-10-01")) %>%
  mutate(month_year_clean = case_when(
    month == "January" ~ paste0(year, "           ", month),
    month == "October" & year == 2014 ~ paste0(year, "           ", month),
    TRUE ~ as.character(month)
  ))
  
plot_calendar <- ggplot(calendar, aes(x = day, y = month_year, fill = read)) +
  geom_tile(color = "white", size = 0.25) +
  scale_fill_manual(values = c("grey90", "#F0A84F")) + 
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_discrete(labels = rev(nice_months$month_year_clean)) +
  labs(x = NULL, y = NULL, title = "Don’t break the chain!",
       subtitle = "After months of sputtering, the chain started on January 15, 2015") +
  guides(fill = FALSE) +
  coord_equal() +
  theme_reading() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        axis.text.x = element_blank())
        
plot_calendar

ggsave(plot_calendar, filename = "output/chain-calendar.png",
       width = 6, height = 6, dpi = 300, type = "cairo")
Reading times

reading_streak <- reading %>%
  filter(local_datetime > ymd("2015-01-21")) %>%
  # For the sake of the histogram, move any times between midnight and 5 AM to
  # the next day
  mutate(time_only = case_when(
    hour(local_datetime) >= 0 & hour(local_datetime) <= 5 ~ 
      update(local_datetime, year = 2017, month = 10, mday = 14),
    TRUE ~ update(local_datetime, year = 2017, month = 10, mday = 13)
  )) 
  
plot_times <- ggplot(reading_streak, aes(x = time_only)) + 
  geom_histogram(binwidth = 15 * 60, fill = "#84329F") +
  theme(axis.text.x = element_text(angle = 45)) + 
  scale_x_datetime(breaks = date_breaks("2 hour"),
                   labels = date_format("%H:%M")) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = NULL, y = "Times read at given time", 
       title = "Do not procrastinate the [time] of your [reading]*",
       subtitle = "lolz so many midnight readings", caption = "*Alma 34:33") +
  theme_reading() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.x = element_text(angle = 0))
        
plot_times

ggsave(plot_times, filename = "output/reading-times.png",
       width = 6, height = 3, dpi = 300, type = "cairo")
List of books read

md_books <- books_pretty %>% 
  mutate(Title = paste0("*", Title, "*"))
  
# md_books %>% knitr::kable()

write_csv(books_pretty, path = "output/books-read.csv")

# DT::datatable(books_pretty)

pander::pandoc.table.return(books, split.tables = Inf, style = "rmarkdown") %>%
  cat(file = "output/books-read.md")
Binding type

books_types <- books %>%
  group_by(Binding) %>%
  summarize(num = n()) %>%
  ungroup() %>%
  arrange(desc(num)) %>%
  mutate(Binding = fct_inorder(Binding, ordered = TRUE))
  
plot_types <- ggplot(books_types, aes(x = num, y = fct_rev(Binding), color = Binding)) +
  geom_pointrangeh(aes(xmin = 0, xmax = num), size = 2, fatten = 3) + 
  scale_color_viridis(discrete = TRUE, option = "plasma", begin = 0.3, end = 0.8) +
  scale_x_continuous(expand = expand_scale(add = c(0, 1))) +
  labs(x = "Number of books", y = NULL, title = "Physical > electronic",
       subtitle = "I obviously prefer physical books over electronic ones") +
  guides(color = FALSE) +
  theme_reading() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank())
        
plot_types

ggsave(plot_types, filename = "output/types.png",
       width = 6, height = 2, dpi = 300, type = "cairo")
Genre of books

books_categories <- books %>%
  group_by(Category) %>%
  summarize(num = n(),
            pages = sum(pages)) %>%
  ungroup() %>%
  arrange(desc(num)) %>%
  mutate(Category = fct_inorder(Category, ordered = TRUE))
  
num_books <- ggplot(books_categories, aes(x = num, y = fct_rev(Category), fill = Category)) +
  geom_barh(stat = "identity") + 
  scale_fill_viridis(discrete = TRUE, option = "plasma") +
  scale_x_reverse(expand = c(.1, .1)) +
  labs(x = "Number of books", y = NULL, title = "I’m a Mormon... obviously",
       subtitle = "I read more pages from the Qur’an than from either the Bible or Book of Mormon,\nbut that’s because of the book's intensely detailed commentaries") +
  guides(fill = FALSE) +
  theme_reading() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        axis.text.y = element_blank())
        
num_pages <- ggplot(books_categories, aes(x = pages, y = fct_rev(Category), fill = Category)) +
  geom_barh(stat = "identity") + 
  scale_fill_viridis(discrete = TRUE, option = "plasma") +
  scale_x_continuous(labels = scales::comma, expand = c(.1, .1)) + 
  labs(x = "Number of pages", y = NULL) +
  guides(fill = FALSE) +
  theme_reading() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(hjust=0.5))
        
books_pages_combined <- cbind(ggplotGrob(num_books),
                              ggplotGrob(num_pages))
                              
grid::grid.newpage()
grid::grid.draw(books_pages_combined)

ggsave(books_pages_combined, filename = "output/books-pages.png",
       width = 6, height = 4, dpi = 300, type = "cairo")
Book ratings

# Use Font Awesome icons!
# Look at the Unicode lookup table at http://fontawesome.io/cheatsheet/
# &#xf005; = \UF005
#
# Download font from https://github.com/FortAwesome/Font-Awesome
fa_star <- "\Uf005"
fa_star_o <- "\Uf006"

stars3 <- paste(fa_star, fa_star, fa_star, fa_star_o, fa_star_o, collapse = "")
stars4 <- paste(fa_star, fa_star, fa_star, fa_star, fa_star_o, collapse = "")
stars5 <- paste(fa_star, fa_star, fa_star, fa_star, fa_star, collapse = "")

books_ratings <- books %>%
  group_by(rating) %>%
  summarize(num = n()) %>%
  ungroup() %>%
  mutate(rating = recode(rating, `3` = stars3, `4` = stars4, `5` = stars5))
  
plot_ratings <- ggplot(books_ratings, aes(x = num, y = fct_rev(rating), color = rating)) +
  geom_pointrangeh(aes(xmin = 0, xmax = num), size = 2, fatten = 3) + 
  scale_color_viridis(discrete = TRUE, option = "plasma") +
  scale_x_continuous(expand = expand_scale(add = c(0, 1))) +
  labs(x = "Number of books", y = NULL, title = "Stars everywhere",
       subtitle = "I loved pretty much every book I read (sorry Augstine's Confessions, though)") + 
  guides(color = FALSE) +
  theme_reading() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(family = "FontAwesome"))
        
plot_ratings

ggsave(plot_ratings, filename = "output/ratings.png",
       width = 6, height = 2, dpi = 300, type = "cairo")