Skip to content

Instantly share code, notes, and snippets.

@lgelape
Created March 10, 2021 21:56
Show Gist options
  • Save lgelape/d854f7f23a900531e3fd4977d574e492 to your computer and use it in GitHub Desktop.
Save lgelape/d854f7f23a900531e3fd4977d574e492 to your computer and use it in GitHub Desktop.
Código de análise da matéria "Em meio ao caos, cientistas brasileiros florescem no Twitter", Núcleo Jornalismo
###################################################################################################
#####
##### ENGAJAMENTO DE PERFIS NO SCIENCE PULSE AO LONGO DA COBERTURA
#####
#####
# Pacotes
library(lubridate)
library(stringr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(zoo)
library(forcats)
###################################################################################################
# Abre a base de tweets e perfis
perfis <- readRDS("perfis_022021.rds")
base <- readRDS("sciencepulse_ate09032021.rds") %>%
select(-c(categoria, person_institution))
# Cria variaveis de interesse e filtra pelo periodo da analise
base <- base %>%
select(-c(followers_count, screen_name)) %>%
# Transforma a variavel created_at em data
# (a primeira linha e pra evitar um problema na leitura de POSICXt)
mutate(created_at = substr(created_at, 1, 10),
created_at = ymd(created_at)) %>%
# Filtra entre junho e janeiro
filter(created_at > ymd("2020-05-31") & created_at < ymd("2021-03-08")) %>%
# Cria uma variavel semana_ano em formato character
mutate(semana = week(created_at),
ano = year(created_at),
semana = str_pad(semana, 2, "left", "0"),
semana_ano = paste0(ano, semana)) %>%
# Engajamento de cada tweet
mutate(engajamento = retweet_count + favorite_count,
engajamento_w = retweet_count + (0.166666*favorite_count)) %>%
left_join(perfis, by = "user_id")
# Cria banco que sinaliza o dia inicial de cada semana
semana_dia <- select(base, created_at, semana_ano) %>%
distinct() %>%
arrange(created_at) %>%
group_by(semana_ano) %>%
slice_min(created_at, n = 1) %>%
ungroup() %>%
rename(dia_inicial = created_at)
# Incorpora a informacao do dia inicial de cada semana
base <- left_join(base, semana_dia)
# Vetor com o screen_name dos perfis brasileiros
perfis_brasileiros <- c("AndersonBrito_", "dadourado", "marciacastrorj", "MBittencourtMD", "mellziland", "oatila",
"otavio_ranzani", "PauloLotufo", "TaschnerNatalia", "ThomasVConti",
"dogarrett", "luizacaires3")
###################################################################################################
### ANALISE DO ENGAJAMENTO DOS PRINCIPAIS DIVULGADORES - SP/IBPAD
# Calcula o engajamento de cada perfil por semana
engajamento_semana <- base %>%
filter(is_retweet == "FALSE") %>%
group_by(user_id, screen_name, categoria, semana_ano, dia_inicial) %>%
mutate(n_tweets = n()) %>%
summarise(taxa = (sum(retweet_count) + sum(favorite_count))/unique(n_tweets),
n_tweets = unique(n_tweets),
engajamento = sum(retweet_count) + sum(favorite_count)) %>%
ungroup()
###################################################################################################
## Total de engajamento da primeira e ultima semana
# Calcula o total de engajamento das 12 contas brasileiras
# Primeira semana que cada um esteve na base
primeira_semana <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros) %>%
group_by(user_id) %>%
slice_min(semana_ano, n = 1) %>%
ungroup() %>%
mutate(tipo = 1)
# Ultima semana de fevereiro
ultima_semana <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros,
dia_inicial == ymd("2021-02-26"))
# Empilha os dois bancos
semana_inicio_fim <- bind_rows(primeira_semana, ultima_semana) %>%
group_by(tipo) %>%
mutate(taxa = sum(engajamento)/sum(n_tweets),
engajamento_p = 100*(engajamento/sum(engajamento))) %>%
ungroup()
###################################################################################################
## Engajamento total dessas contas por semana (5+)
# Calcula o agregado de engajamento das 12 contas brasileiras,
# a partir da semana em que todas estavam na base
total_semana_brasileiros <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros,
semana_ano > "202029") %>%
group_by(semana_ano) %>%
summarise(taxa = sum(engajamento)/sum(n_tweets),
engajamento = sum(engajamento)) %>%
ungroup() %>%
slice_max(engajamento, n = 5)
###################################################################################################
## Calcula a taxa e tendencia de engajamento diario de cada perfil
# Somente a partir do dia em que todos os perfis estavam na base
total_dia_brasileiros <- base %>%
filter(is_retweet == "FALSE") %>%
filter(screen_name %in% perfis_brasileiros,
created_at > as.Date("2020-07-26")) %>%
group_by(created_at) %>%
mutate(n_tweets = n()) %>%
summarise(taxa = sum(engajamento)/unique(n_tweets),
total = sum(engajamento)) %>%
ungroup() %>%
mutate(media_15d = zoo::rollmean(taxa, k = 15, fill = NA, align = "right"))
# Primeiro grafico da materia
total_dia_brasileiros %>%
ggplot() +
geom_point(aes(x = created_at, y = media_15d), size = 0.1) +
geom_line(aes(x = created_at, y = media_15d)) +
geom_smooth(aes(x = created_at, y = media_15d), method = "lm", color = "red", se = F) +
geom_smooth(aes(x = created_at, y = media_15d), method = "loess", se = F) +
theme_light() +
theme(axis.text = element_text(size = 6)) +
scale_x_date(breaks = c(as.Date("2020-08-01"),
as.Date("2020-09-01"),
as.Date("2020-10-01"),
as.Date("2020-11-01"),
as.Date("2020-12-01"),
as.Date("2021-01-01"),
as.Date("2021-02-01"),
as.Date("2021-03-01")),
labels = c("Ago", "Set",
"Out", "Nov", "Dez",
"Jan", "Fev", "Mar")) +
labs(title = "Tendência de Engajamento diário (média móvel, 15 dias)",
x = "", y = "\nEngajamento")
###################################################################################################
## Calcula a tendencia de engajamento para os 12 perfis individualmente
# Cria tabela base pra todas as combinacoes de dia-perfil
# (para preencher quando usuarios nao tuitam)
tabela_base_dia <- base %>%
select(created_at, user_id) %>%
distinct() %>%
complete(created_at, user_id)
# Calcula estatisticas sobre engajamento de cada perfil por dia
engajamento_dia <- base %>%
filter(is_retweet == "FALSE") %>%
group_by(user_id, created_at) %>%
# Conta o n. de tweets/dia do usuario
mutate(n_tweets = n()) %>%
# Calcula a taxa de engajamento e o total diario
summarise(taxa = sum(engajamento)/unique(n_tweets),
total = sum(engajamento)) %>%
ungroup() %>%
# Junta a tabela base para ter info de quando nao tuitaram
right_join(tabela_base_dia) %>%
group_by(user_id) %>%
# Calcula a media movel de 15 dias so dentro de cada usuario
mutate(media_15d = zoo::rollmean(taxa, k = 15, fill = NA, align = "right")) %>%
ungroup() %>%
left_join(perfis)
# Grafico base para segunda imagem da materia
engajamento_dia %>%
filter(screen_name %in% perfis_brasileiros) %>%
ggplot() +
geom_point(aes(x = created_at, y = media_15d), size = 0.1) +
geom_line(aes(x = created_at, y = media_15d)) +
geom_smooth(aes(x = created_at, y = media_15d), method = "lm", color = "red", se = F) +
geom_smooth(aes(x = created_at, y = media_15d), method = "loess", se = F) +
facet_wrap(~ screen_name, nrow = 4, scales = "free") +
theme_light() +
theme(axis.text = element_text(size = 6)) +
scale_x_date(breaks = c(as.Date("2020-07-01"),
as.Date("2020-08-01"),
as.Date("2020-09-01"),
as.Date("2020-10-01"),
as.Date("2020-11-01"),
as.Date("2020-12-01"),
as.Date("2021-01-01"),
as.Date("2021-02-01"),
as.Date("2021-03-01")),
labels = c("Jul", "Ago", "Set",
"Out", "Nov", "Dez",
"Jan", "Fev", "Mar")) +
labs(title = "Tendência de engajamento diário (média móvel 15 dias)",
x = "", y = "\nEngajamento")
###################################################################################################
## Pluralizacao e dados do Atila
# Calcula o percentual de engajamento de cada perfil
# em relacao ao total de cada semana, considerando somente
# semanas em que estavam todas as contas.
semana_brasileiros <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros,
semana_ano > "202029") %>%
group_by(semana_ano) %>%
mutate(taxa = sum(engajamento)/sum(n_tweets),
engajamento_p = 100*(engajamento/sum(engajamento))) %>%
ungroup()
# Grafico base da terceira figura da materia
semana_brasileiros %>%
mutate(screen_name = factor(screen_name,
levels = c("oatila", "luizacaires3", "otavio_ranzani",
"MBittencourtMD", "mellziland", "ThomasVConti",
"dadourado", "TaschnerNatalia", "dogarrett",
"PauloLotufo", "marciacastrorj", "AndersonBrito_")),
screen_name = fct_rev(screen_name)) %>%
ggplot() +
geom_col(aes(x = semana_ano, y = engajamento_p, fill = screen_name)) +
theme_light() +
theme(axis.text.x = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal") +
scale_fill_grey(start = 0.8, end = 0.2) +
labs(title = "Volume de engajamento entre as principais contas",
x = "\nSemanas", y = "\nEngajamento (em %)")
# Perfis com 20 semanas com mais engajamento
semana_brasileiros %>%
slice_max(engajamento, n = 20) %>%
group_by(screen_name) %>%
summarise(numero = n()) %>%
ungroup()
# Semanas com menor engajamento do atila
semana_brasileiros %>%
filter(screen_name == "oatila") %>%
slice_min(engajamento_p, n = 5)
# Ranking geral desses perfis pela taxa de engajamento
semana_brasileiros %>%
group_by(screen_name) %>%
summarise(soma_engajamento = sum(engajamento),
soma_tweets = sum(n_tweets)) %>%
ungroup() %>%
mutate(taxa = soma_engajamento/soma_tweets) %>%
arrange(desc(taxa))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment