Breizh Data Day Tweets
library(tidyverse) | |
library(rtweet) | |
library(proustr) | |
library(tidytext) | |
bdd <- search_tweets("#BreizhDataDay", n = 3000) | |
bdd_day <- bdd %>% | |
filter(created_at < "2017-10-20 00:00:00", created_at > "2017-10-18 23:59:59") | |
# Tweet par heure | |
random_viridis <- function(x){ | |
sample(viridisLite::viridis(100), x) | |
} | |
ggplot(bdd_day) + | |
aes(created_at) + | |
geom_histogram(bins = 50, fill = random_viridis(1)) + | |
labs(title = "#BreizhDataDay — Tweets dans la journée", | |
x = "heure", | |
y = "volume", | |
caption = "@breizhdataday") + | |
theme_minimal() | |
# Tweets et retweets pendant la journée | |
ggplot(bdd_day) + | |
aes(created_at, fill = is_retweet) + | |
geom_density() + | |
scale_fill_viridis_d() + | |
labs(title = "Tweets et retweets pendant la journée", | |
x = "", | |
y = "Densité de tweets", | |
caption = "@breizhdataclub") + | |
facet_grid(is_retweet ~ .) + | |
theme_minimal() | |
# Comptes les plus actifs | |
bdd_day %>% | |
count(screen_name) %>% | |
top_n(10) %>% | |
ggplot() + | |
aes(reorder(screen_name, n), n) + | |
geom_col(fill = random_viridis(1)) + | |
labs(title = "Comptes les plus actifs pendant le #BreizhDataDay", | |
x = "", | |
y = "Nombre de tweets", | |
caption = "@breizhdataclub") + | |
coord_flip() + | |
theme_minimal() | |
# Comptes les plus cités | |
df <- unnest_tokens(bdd_day, comptes, mentions_screen_name) %>% | |
count(comptes) %>% | |
na.omit() %>% | |
filter(n >= 5) %>% | |
arrange(desc(n)) | |
ggplot(df) + | |
aes(reorder(comptes, n), n) + | |
geom_col(fill = random_viridis(1)) + | |
labs(title = "Comptes les plus cités pendant le #BreizhDataDay", | |
subtitle = "5 fois ou plus", | |
x = "", | |
y = "volume", | |
caption = "@breizhdataclub") + | |
coord_flip() + | |
theme_minimal() | |
# Hashtags les plus utilisés (hormis BreizhDataDay) | |
df <- unnest_tokens(bdd_day, hashtags, hashtags) %>% | |
count(hashtags) %>% | |
filter(n >= 5, hashtags != "breizhdataday") %>% | |
arrange(desc(n)) | |
ggplot(df) + | |
aes(reorder(hashtags, n), n) + | |
geom_col(fill = random_viridis(1)) + | |
labs(title = "Hashtags accompagnant le plus #BreizhDataDay", | |
x = "", | |
y = "volume", | |
caption = "@breizhdataclub") + | |
coord_flip() + | |
theme_minimal() | |
# Sentiment analysis | |
sentiments <- proust_sentiments() | |
df <- unnest_tokens(bdd_day, word, text) %>% | |
left_join(sentiments) %>% | |
select(word, polarity, created_at) %>% | |
na.omit() | |
ggplot(df) + | |
aes(created_at, fill = polarity) + | |
geom_histogram(bins = 50) + | |
facet_grid(polarity ~ .) + | |
scale_fill_manual(values = random_viridis(2)) + | |
labs(title = "Sentiment analysis pendant le #BreizhDataDay", | |
x = "", | |
y = "volume de tweets", | |
caption = "@breizhdataclub") + | |
theme_minimal() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment