library(tidyverse)
library(tidytext)
library(ggraph)
library(igraph)
#>
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:dplyr':
#>
#> as_data_frame, groups, union
#> The following objects are masked from 'package:purrr':
#>
#> compose, simplify
#> The following object is masked from 'package:tidyr':
#>
#> crossing
#> The following object is masked from 'package:tibble':
#>
#> as_data_frame
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
library(widyr)
horror_movies <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-01/horror_movies.csv')
#> Rows: 32540 Columns: 20
#> ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#> Delimiter: ","
#> chr (10): original_title, title, original_language, overview, tagline, post...
#> dbl (8): id, popularity, vote_count, vote_average, budget, revenue, runtim...
#> lgl (1): adult
#> date (1): release_date
#>
#> βΉ Use `spec()` to retrieve the full column specification for this data.
#> βΉ Specify the column types or set `show_col_types = FALSE` to quiet this message.
horror_tidy <-
horror_movies %>%
unnest_tokens(word, overview) %>%
anti_join(get_stopwords())
#> Joining, by = "word"
words_cooccur <-
horror_tidy %>%
group_by(word) %>%
filter(n() > 300) %>%
ungroup() %>%
pairwise_cor(word, id, sort = TRUE, upper = FALSE) %>%
filter(correlation > 0.15)
word_counts <-
horror_tidy %>%
count(word, sort = TRUE) %>%
filter(word %in% words_cooccur$item1 | word %in% words_cooccur$item2)
set.seed(2021)
words_cooccur %>%
graph_from_data_frame(vertices = word_counts) %>%
ggraph(layout = "nicely") +
geom_edge_link(aes(edge_alpha = correlation),
edge_width = 0.5, show.legend = FALSE) +
geom_node_point(aes(size = n), color = "midnightblue", alpha = 0.8) +
geom_node_text(aes(label = name), repel = TRUE, family = "IBMPlexSans") +
theme_graph(base_family = "IBMPlexSans-Bold") +
theme(legend.position = "none") +
labs(title = "What words are used to describe horror movies?",
subtitle = "Common words that are highly correlated in #TidyTuesday horror movie overviews")
Created on 2022-11-03 with reprex v2.0.2