Created
October 25, 2017 20:21
-
-
Save ColinFay/bfd08c7c016e4acf719c8659c98ec0f8 to your computer and use it in GitHub Desktop.
Breizh Data Day Tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rtweet) | |
library(proustr) | |
library(tidytext) | |
bdd <- search_tweets("#BreizhDataDay", n = 3000) | |
bdd_day <- bdd %>% | |
filter(created_at < "2017-10-20 00:00:00", created_at > "2017-10-18 23:59:59") | |
# Tweet par heure | |
random_viridis <- function(x){ | |
sample(viridisLite::viridis(100), x) | |
} | |
ggplot(bdd_day) + | |
aes(created_at) + | |
geom_histogram(bins = 50, fill = random_viridis(1)) + | |
labs(title = "#BreizhDataDay — Tweets dans la journée", | |
x = "heure", | |
y = "volume", | |
caption = "@breizhdataday") + | |
theme_minimal() | |
# Tweets et retweets pendant la journée | |
ggplot(bdd_day) + | |
aes(created_at, fill = is_retweet) + | |
geom_density() + | |
scale_fill_viridis_d() + | |
labs(title = "Tweets et retweets pendant la journée", | |
x = "", | |
y = "Densité de tweets", | |
caption = "@breizhdataclub") + | |
facet_grid(is_retweet ~ .) + | |
theme_minimal() | |
# Comptes les plus actifs | |
bdd_day %>% | |
count(screen_name) %>% | |
top_n(10) %>% | |
ggplot() + | |
aes(reorder(screen_name, n), n) + | |
geom_col(fill = random_viridis(1)) + | |
labs(title = "Comptes les plus actifs pendant le #BreizhDataDay", | |
x = "", | |
y = "Nombre de tweets", | |
caption = "@breizhdataclub") + | |
coord_flip() + | |
theme_minimal() | |
# Comptes les plus cités | |
df <- unnest_tokens(bdd_day, comptes, mentions_screen_name) %>% | |
count(comptes) %>% | |
na.omit() %>% | |
filter(n >= 5) %>% | |
arrange(desc(n)) | |
ggplot(df) + | |
aes(reorder(comptes, n), n) + | |
geom_col(fill = random_viridis(1)) + | |
labs(title = "Comptes les plus cités pendant le #BreizhDataDay", | |
subtitle = "5 fois ou plus", | |
x = "", | |
y = "volume", | |
caption = "@breizhdataclub") + | |
coord_flip() + | |
theme_minimal() | |
# Hashtags les plus utilisés (hormis BreizhDataDay) | |
df <- unnest_tokens(bdd_day, hashtags, hashtags) %>% | |
count(hashtags) %>% | |
filter(n >= 5, hashtags != "breizhdataday") %>% | |
arrange(desc(n)) | |
ggplot(df) + | |
aes(reorder(hashtags, n), n) + | |
geom_col(fill = random_viridis(1)) + | |
labs(title = "Hashtags accompagnant le plus #BreizhDataDay", | |
x = "", | |
y = "volume", | |
caption = "@breizhdataclub") + | |
coord_flip() + | |
theme_minimal() | |
# Sentiment analysis | |
sentiments <- proust_sentiments() | |
df <- unnest_tokens(bdd_day, word, text) %>% | |
left_join(sentiments) %>% | |
select(word, polarity, created_at) %>% | |
na.omit() | |
ggplot(df) + | |
aes(created_at, fill = polarity) + | |
geom_histogram(bins = 50) + | |
facet_grid(polarity ~ .) + | |
scale_fill_manual(values = random_viridis(2)) + | |
labs(title = "Sentiment analysis pendant le #BreizhDataDay", | |
x = "", | |
y = "volume de tweets", | |
caption = "@breizhdataclub") + | |
theme_minimal() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment