Skip to content

Instantly share code, notes, and snippets.

@mine-cetinkaya-rundel
Created September 5, 2020 22:50
Show Gist options
  • Save mine-cetinkaya-rundel/ac9471a03500b15a82eba233037af9d5 to your computer and use it in GitHub Desktop.
Save mine-cetinkaya-rundel/ac9471a03500b15a82eba233037af9d5 to your computer and use it in GitHub Desktop.
The one with the most words
# Load packages ----------------------------------------------------------------
library(tidytext)
library(friends)
library(tidyverse)
# The six friends --------------------------------------------------------------
six_friends <- c("Phoebe Buffay", "Chandler Bing", "Rachel Green", "Monica Geller", "Joey Tribbiani", "Ross Geller")
# Count episodes per season ----------------------------------------------------
episode_count <- friends_info %>%
count(season, name = "episode_n")
# Make df to plot --------------------------------------------------------------
df <- friends %>%
filter(speaker != "Scene Directions", speaker != "#ALL#") %>%
mutate(
speaker = fct_other(speaker, keep = six_friends, other_level = "Others"),
speaker = case_when(
speaker == "Chandler Bing" ~ "Chandler",
speaker == "Phoebe Buffay" ~ "Phoebe",
speaker == "Rachel Green" ~ "Rachel",
speaker == "Monica Geller" ~ "Monica",
speaker == "Joey Tribbiani" ~ "Joey",
speaker == "Ross Geller" ~ "Ross",
TRUE ~ "Others"
),
speaker = fct_relevel(speaker, "Ross", "Rachel", "Phoebe", "Monica", "Joey", "Chandler", "Others")
) %>%
unnest_tokens(word, text) %>%
# anti_join(stop_words) %>%
count(season, speaker, sort = TRUE) %>%
left_join(episode_count) %>%
mutate(n_avg = n / episode_n)
# Plot -------------------------------------------------------------------------
ggplot(df, aes(x = season, y = n_avg, group = speaker, color = speaker)) +
geom_smooth(se = FALSE, span = 0.4, show.legend = FALSE) +
geom_point(size = 0) +
scale_x_continuous(breaks = 1:10, minor_breaks = NULL) +
scale_y_continuous(minor_breaks = NULL) +
scale_color_manual(values = c("#42a2db", "#00944B", "#ff4238", "#9C8CD4", "#ffdc00", "#9a0006", "grey")) +
theme_minimal() +
guides(
color = guide_legend(override.aes = list(size = 5))
) +
theme(
plot.title = element_text(size = 14, family = "Gabriel Weiss' Friends Font", hjust = 0.5),
plot.subtitle = element_text(size = 10, family = "Gabriel Weiss' Friends Font"),
axis.title = element_text(size = 9, family = "Gabriel Weiss' Friends Font"),
axis.text = element_text(size = 9, family = "Gabriel Weiss' Friends Font"),
legend.text = element_text(size = 10, family = "Gabriel Weiss' Friends Font"),
legend.title = element_blank(),
legend.box.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "pt"),
panel.grid.major = element_line(size = 0.3),
legend.position = "bottom"
) +
labs(
x = "Season",
y = "Number of words per episode",
title = "The one with the most words",
subtitle = ""
)
ggsave(filename = "friends.png", width = 7, height = 7 * 0.618, bg = "transparent")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment