Skip to content

Instantly share code, notes, and snippets.

@ColinFay
Created October 25, 2017 20:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ColinFay/bfd08c7c016e4acf719c8659c98ec0f8 to your computer and use it in GitHub Desktop.
Save ColinFay/bfd08c7c016e4acf719c8659c98ec0f8 to your computer and use it in GitHub Desktop.
Breizh Data Day Tweets
library(tidyverse)
library(rtweet)
library(proustr)
library(tidytext)
bdd <- search_tweets("#BreizhDataDay", n = 3000)
bdd_day <- bdd %>%
filter(created_at < "2017-10-20 00:00:00", created_at > "2017-10-18 23:59:59")
# Tweet par heure
random_viridis <- function(x){
sample(viridisLite::viridis(100), x)
}
ggplot(bdd_day) +
aes(created_at) +
geom_histogram(bins = 50, fill = random_viridis(1)) +
labs(title = "#BreizhDataDay — Tweets dans la journée",
x = "heure",
y = "volume",
caption = "@breizhdataday") +
theme_minimal()
# Tweets et retweets pendant la journée
ggplot(bdd_day) +
aes(created_at, fill = is_retweet) +
geom_density() +
scale_fill_viridis_d() +
labs(title = "Tweets et retweets pendant la journée",
x = "",
y = "Densité de tweets",
caption = "@breizhdataclub") +
facet_grid(is_retweet ~ .) +
theme_minimal()
# Comptes les plus actifs
bdd_day %>%
count(screen_name) %>%
top_n(10) %>%
ggplot() +
aes(reorder(screen_name, n), n) +
geom_col(fill = random_viridis(1)) +
labs(title = "Comptes les plus actifs pendant le #BreizhDataDay",
x = "",
y = "Nombre de tweets",
caption = "@breizhdataclub") +
coord_flip() +
theme_minimal()
# Comptes les plus cités
df <- unnest_tokens(bdd_day, comptes, mentions_screen_name) %>%
count(comptes) %>%
na.omit() %>%
filter(n >= 5) %>%
arrange(desc(n))
ggplot(df) +
aes(reorder(comptes, n), n) +
geom_col(fill = random_viridis(1)) +
labs(title = "Comptes les plus cités pendant le #BreizhDataDay",
subtitle = "5 fois ou plus",
x = "",
y = "volume",
caption = "@breizhdataclub") +
coord_flip() +
theme_minimal()
# Hashtags les plus utilisés (hormis BreizhDataDay)
df <- unnest_tokens(bdd_day, hashtags, hashtags) %>%
count(hashtags) %>%
filter(n >= 5, hashtags != "breizhdataday") %>%
arrange(desc(n))
ggplot(df) +
aes(reorder(hashtags, n), n) +
geom_col(fill = random_viridis(1)) +
labs(title = "Hashtags accompagnant le plus #BreizhDataDay",
x = "",
y = "volume",
caption = "@breizhdataclub") +
coord_flip() +
theme_minimal()
# Sentiment analysis
sentiments <- proust_sentiments()
df <- unnest_tokens(bdd_day, word, text) %>%
left_join(sentiments) %>%
select(word, polarity, created_at) %>%
na.omit()
ggplot(df) +
aes(created_at, fill = polarity) +
geom_histogram(bins = 50) +
facet_grid(polarity ~ .) +
scale_fill_manual(values = random_viridis(2)) +
labs(title = "Sentiment analysis pendant le #BreizhDataDay",
x = "",
y = "volume de tweets",
caption = "@breizhdataclub") +
theme_minimal()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment