Created
November 8, 2019 19:02
-
-
Save raynamharris/cf598a1cfdda4d5c150c99b8e9d87235 to your computer and use it in GitHub Desktop.
código para un análisis `rtweet` de tweets de SACNAS 2019
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rtweet) | |
library(tidyverse) | |
library(magick) | |
library(cowplot) | |
library(rtweet) | |
estados <- search_tweets('2019sacnas AND CienciaPR OR CienciaBoricua OR PuertoRico', n=2000) | |
estaditos <- estados %>% | |
filter(is_retweet == "FALSE") %>% # obtener tweets originales | |
select(screen_name, favorite_count,retweet_count, text) %>% # columnas de interes | |
arrange(desc(favorite_count)) # ordenar por los más favorecidos | |
head(estaditos) | |
nrow(estaditos) # el número total de tweets originales | |
estaditos %>% | |
select(retweet_count,favorite_count) %>% | |
colSums() # el número total de retweets y favoritos | |
resumen <- estaditos %>% | |
group_by(screen_name) %>% # agrupar por screen name | |
summarize(n_tweets = n(), # cantidad de tweets totales | |
n_fav = sum(favorite_count), # total de favoritos | |
n_rt = sum(retweet_count), # total de retweets | |
promedio_fav = round(mean(favorite_count), digits = 1), # número promedio de favoritos | |
promedio_rt = round(mean(retweet_count), digits = 1)) %>% #número promedio de retweets | |
arrange(desc(n_fav)) # ordenar | |
head(resumen) | |
mitema <- function(){ . # tema personalizado | |
theme_minimal(base_size = 8) + # tema y tamaño de fuente | |
theme(panel.grid = element_blank()) # sin líneas de cuadrícula | |
} | |
img1 <- image_read("http://www.gradpost.ucsb.edu/images/default-source/default-album/sacnas.jpg?sfvrsn=1") | |
img2 <- image_read("https://upload.wikimedia.org/wikipedia/commons/thumb/2/28/Flag_of_Puerto_Rico.svg/1024px-Flag_of_Puerto_Rico.svg.png") | |
img <- image_read("https://pbs.twimg.com/media/EIPodEKX0AAdF8C?format=jpg&name=small") | |
rast <- grid::rasterGrob(img, interpolate = T) | |
tiempodetweets <- ts_plot(estados, "8 hour") + | |
ggplot2::labs(y = "Número de tweets y retweets por 8 horas", | |
x = "Datos obtenidos de la API REST de Twitter a través de rtweet", | |
title = "Estados de Twitter con #2019SACNAS y #CienciaPR o #CienciaBoricua") + | |
theme_minimal(base_size = 8) + | |
theme(panel.grid = element_blank()) | |
ggdraw(tiempodetweets) + | |
draw_image(img1, scale = 0.3, x = 0.07, y = 0.25) + | |
draw_image(img2, scale = 0.25, x = 0.3, y = 0.25) | |
resumen %>% top_n(15, n_tweets) %>% | |
ggplot() + | |
geom_bar(aes(x = reorder(screen_name, n_tweets), y = n_tweets), | |
stat = "identity", fill = "#505050") + | |
geom_text(aes(label = n_tweets, y = n_tweets, x = screen_name), | |
hjust=1, size = 2, color = "#E1E9E8") + | |
labs(x = NULL, y = "Tweets por usuario", | |
title = "¿Quién usó #2019SACNAS y #CienciaPR o #CienciaBoricua más?", | |
caption = "Foto de @ncoloncarrion") + | |
coord_flip() + # Cambio los ejes | |
mitema() + # añadir tema personalizado | |
annotation_custom(rast, ymin = 2, ymax = 4, xmin = -7) # añadir foto | |
a <- resumen %>% top_n(7, n_fav) %>% | |
ggplot() + | |
geom_bar(aes(x = reorder(screen_name, n_fav), y = n_fav), | |
stat = "identity", fill = "blue") + | |
geom_text(aes(label = n_fav, y = n_fav, x = screen_name), | |
hjust=1, size = 2.5, color = "white") + | |
labs(x = NULL, y = "Número de favoritos", title = "¿Quién recibió la mayoría de los favoritos?") + | |
coord_flip() + | |
mitema() | |
b <- resumen %>% top_n(7, n_rt) %>% | |
ggplot() + | |
geom_bar(aes(x = reorder(screen_name, n_rt), y = n_rt), | |
stat = "identity", fill = "red") + | |
geom_text(aes(label = n_rt, y = n_rt, x = screen_name), | |
hjust=1, size = 2.5, color = "white") + | |
labs(x = NULL, y = "Número de retweets", title = "¿La mayoría de los retweets?") + | |
coord_flip() + | |
mitema() | |
c <- resumen %>% top_n(7, promedio_fav) %>% | |
ggplot() + | |
geom_bar(aes(x = reorder(screen_name, promedio_fav), y = promedio_fav), | |
stat = "identity", fill = "blue") + | |
geom_text(aes(label = promedio_fav, y = promedio_fav, x = screen_name), | |
hjust=1, size = 2.5, color = "white") + | |
labs(x = NULL, y = "Promedio de favoritos por tweet", | |
title = "¿Quién recibió la mayoría de los favoritos en promedio?") + | |
coord_flip() + | |
mitema() | |
d <- resumen %>% top_n(7, promedio_rt) %>% | |
ggplot() + | |
geom_bar(aes(x = reorder(screen_name, promedio_rt), y = promedio_rt), | |
stat = "identity", fill = "red") + | |
geom_text(aes(label = promedio_rt, y = promedio_rt, x = screen_name), | |
hjust=1, size = 2.5, color = "white") + | |
labs(x = NULL, y = "Promedio de retweets por tweet", title = "La mayoría de los retweets en promedio?") + | |
coord_flip() + | |
mitema() | |
plot_grid(a,b, c,d) | |
personasenSACNAS <- resumen$screen_name | |
personasenSACNAS | |
retweets_total <- estados %>% | |
filter(is_retweet == "TRUE" | is_quote == "TRUE") %>% | |
select(screen_name, retweet_screen_name, retweet_count, text) | |
retweets_total | |
retweets_sinpersonasenSACNAS <- estados %>% | |
filter(is_retweet == "TRUE" | is_quote == "TRUE") %>% | |
filter(!screen_name %in% personasenSACNAS) %>% | |
select(screen_name, retweet_screen_name, retweet_count, text) | |
retweets_sinpersonasenSACNAS | |
nrow(retweets_sinpersonasenSACNAS) / nrow(retweets_total) * 100 | |
e <- retweets_sinpersonasenSACNAS %>% | |
group_by(screen_name) %>% | |
summarize(n_rt = n()) %>% | |
arrange(desc(n_rt)) %>% | |
head(10) %>% | |
ggplot() + | |
geom_bar(aes(x = reorder(screen_name, n_rt), y = n_rt), | |
stat = "identity", fill = "#00acee") + | |
geom_text(aes(label = n_rt, y = n_rt, x = screen_name), | |
hjust=1, size = 2.5, color = "white") + | |
labs(x = "Non-SACNAS retweeters", y = "Cantidad de retweets", | |
title = "¿Quién no estaba en SACNAS pero amplificó nuestro mensaje?") + | |
coord_flip() + | |
mitema() | |
f <- retweets_sinpersonasenSACNAS %>% | |
group_by(retweet_count,retweet_screen_name, text) %>% | |
summarize(n_rt = n()) %>% | |
arrange(desc(n_rt)) %>% | |
mutate(quien.texto = paste(retweet_screen_name, text, sep = " - "), | |
texto.corto = substr(quien.texto, start=1, stop=80), | |
porcentaje = round((n_rt / retweet_count *100),2)) %>% | |
head(10) %>% | |
ggplot() + | |
geom_bar(aes(x = reorder(texto.corto, porcentaje), y = porcentaje), | |
stat = "identity", fill = "#00acee") + | |
geom_text(aes(label = porcentaje, y = porcentaje, x = texto.corto), | |
hjust=1, size = 2.5, color = "white") + | |
labs(x = "Tweeter y texto", y = "% retweets de personas que no están en SACNAS", | |
title = "¿Qué tweets se compartieron más ampliamente?") + | |
coord_flip() + | |
mitema() | |
plot_grid(e,f, nrow = 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment