Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Código de análise da matéria "Em meio ao caos, cientistas brasileiros florescem no Twitter", Núcleo Jornalismo
###################################################################################################
#####
##### ENGAJAMENTO DE PERFIS NO SCIENCE PULSE AO LONGO DA COBERTURA
#####
#####
# Pacotes
library(lubridate)
library(stringr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(zoo)
library(forcats)
###################################################################################################
# Abre a base de tweets e perfis
perfis <- readRDS("perfis_022021.rds")
base <- readRDS("sciencepulse_ate09032021.rds") %>%
select(-c(categoria, person_institution))
# Cria variaveis de interesse e filtra pelo periodo da analise
base <- base %>%
select(-c(followers_count, screen_name)) %>%
# Transforma a variavel created_at em data
# (a primeira linha e pra evitar um problema na leitura de POSICXt)
mutate(created_at = substr(created_at, 1, 10),
created_at = ymd(created_at)) %>%
# Filtra entre junho e janeiro
filter(created_at > ymd("2020-05-31") & created_at < ymd("2021-03-08")) %>%
# Cria uma variavel semana_ano em formato character
mutate(semana = week(created_at),
ano = year(created_at),
semana = str_pad(semana, 2, "left", "0"),
semana_ano = paste0(ano, semana)) %>%
# Engajamento de cada tweet
mutate(engajamento = retweet_count + favorite_count,
engajamento_w = retweet_count + (0.166666*favorite_count)) %>%
left_join(perfis, by = "user_id")
# Cria banco que sinaliza o dia inicial de cada semana
semana_dia <- select(base, created_at, semana_ano) %>%
distinct() %>%
arrange(created_at) %>%
group_by(semana_ano) %>%
slice_min(created_at, n = 1) %>%
ungroup() %>%
rename(dia_inicial = created_at)
# Incorpora a informacao do dia inicial de cada semana
base <- left_join(base, semana_dia)
# Vetor com o screen_name dos perfis brasileiros
perfis_brasileiros <- c("AndersonBrito_", "dadourado", "marciacastrorj", "MBittencourtMD", "mellziland", "oatila",
"otavio_ranzani", "PauloLotufo", "TaschnerNatalia", "ThomasVConti",
"dogarrett", "luizacaires3")
###################################################################################################
### ANALISE DO ENGAJAMENTO DOS PRINCIPAIS DIVULGADORES - SP/IBPAD
# Calcula o engajamento de cada perfil por semana
engajamento_semana <- base %>%
filter(is_retweet == "FALSE") %>%
group_by(user_id, screen_name, categoria, semana_ano, dia_inicial) %>%
mutate(n_tweets = n()) %>%
summarise(taxa = (sum(retweet_count) + sum(favorite_count))/unique(n_tweets),
n_tweets = unique(n_tweets),
engajamento = sum(retweet_count) + sum(favorite_count)) %>%
ungroup()
###################################################################################################
## Total de engajamento da primeira e ultima semana
# Calcula o total de engajamento das 12 contas brasileiras
# Primeira semana que cada um esteve na base
primeira_semana <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros) %>%
group_by(user_id) %>%
slice_min(semana_ano, n = 1) %>%
ungroup() %>%
mutate(tipo = 1)
# Ultima semana de fevereiro
ultima_semana <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros,
dia_inicial == ymd("2021-02-26"))
# Empilha os dois bancos
semana_inicio_fim <- bind_rows(primeira_semana, ultima_semana) %>%
group_by(tipo) %>%
mutate(taxa = sum(engajamento)/sum(n_tweets),
engajamento_p = 100*(engajamento/sum(engajamento))) %>%
ungroup()
###################################################################################################
## Engajamento total dessas contas por semana (5+)
# Calcula o agregado de engajamento das 12 contas brasileiras,
# a partir da semana em que todas estavam na base
total_semana_brasileiros <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros,
semana_ano > "202029") %>%
group_by(semana_ano) %>%
summarise(taxa = sum(engajamento)/sum(n_tweets),
engajamento = sum(engajamento)) %>%
ungroup() %>%
slice_max(engajamento, n = 5)
###################################################################################################
## Calcula a taxa e tendencia de engajamento diario de cada perfil
# Somente a partir do dia em que todos os perfis estavam na base
total_dia_brasileiros <- base %>%
filter(is_retweet == "FALSE") %>%
filter(screen_name %in% perfis_brasileiros,
created_at > as.Date("2020-07-26")) %>%
group_by(created_at) %>%
mutate(n_tweets = n()) %>%
summarise(taxa = sum(engajamento)/unique(n_tweets),
total = sum(engajamento)) %>%
ungroup() %>%
mutate(media_15d = zoo::rollmean(taxa, k = 15, fill = NA, align = "right"))
# Primeiro grafico da materia
total_dia_brasileiros %>%
ggplot() +
geom_point(aes(x = created_at, y = media_15d), size = 0.1) +
geom_line(aes(x = created_at, y = media_15d)) +
geom_smooth(aes(x = created_at, y = media_15d), method = "lm", color = "red", se = F) +
geom_smooth(aes(x = created_at, y = media_15d), method = "loess", se = F) +
theme_light() +
theme(axis.text = element_text(size = 6)) +
scale_x_date(breaks = c(as.Date("2020-08-01"),
as.Date("2020-09-01"),
as.Date("2020-10-01"),
as.Date("2020-11-01"),
as.Date("2020-12-01"),
as.Date("2021-01-01"),
as.Date("2021-02-01"),
as.Date("2021-03-01")),
labels = c("Ago", "Set",
"Out", "Nov", "Dez",
"Jan", "Fev", "Mar")) +
labs(title = "Tendência de Engajamento diário (média móvel, 15 dias)",
x = "", y = "\nEngajamento")
###################################################################################################
## Calcula a tendencia de engajamento para os 12 perfis individualmente
# Cria tabela base pra todas as combinacoes de dia-perfil
# (para preencher quando usuarios nao tuitam)
tabela_base_dia <- base %>%
select(created_at, user_id) %>%
distinct() %>%
complete(created_at, user_id)
# Calcula estatisticas sobre engajamento de cada perfil por dia
engajamento_dia <- base %>%
filter(is_retweet == "FALSE") %>%
group_by(user_id, created_at) %>%
# Conta o n. de tweets/dia do usuario
mutate(n_tweets = n()) %>%
# Calcula a taxa de engajamento e o total diario
summarise(taxa = sum(engajamento)/unique(n_tweets),
total = sum(engajamento)) %>%
ungroup() %>%
# Junta a tabela base para ter info de quando nao tuitaram
right_join(tabela_base_dia) %>%
group_by(user_id) %>%
# Calcula a media movel de 15 dias so dentro de cada usuario
mutate(media_15d = zoo::rollmean(taxa, k = 15, fill = NA, align = "right")) %>%
ungroup() %>%
left_join(perfis)
# Grafico base para segunda imagem da materia
engajamento_dia %>%
filter(screen_name %in% perfis_brasileiros) %>%
ggplot() +
geom_point(aes(x = created_at, y = media_15d), size = 0.1) +
geom_line(aes(x = created_at, y = media_15d)) +
geom_smooth(aes(x = created_at, y = media_15d), method = "lm", color = "red", se = F) +
geom_smooth(aes(x = created_at, y = media_15d), method = "loess", se = F) +
facet_wrap(~ screen_name, nrow = 4, scales = "free") +
theme_light() +
theme(axis.text = element_text(size = 6)) +
scale_x_date(breaks = c(as.Date("2020-07-01"),
as.Date("2020-08-01"),
as.Date("2020-09-01"),
as.Date("2020-10-01"),
as.Date("2020-11-01"),
as.Date("2020-12-01"),
as.Date("2021-01-01"),
as.Date("2021-02-01"),
as.Date("2021-03-01")),
labels = c("Jul", "Ago", "Set",
"Out", "Nov", "Dez",
"Jan", "Fev", "Mar")) +
labs(title = "Tendência de engajamento diário (média móvel 15 dias)",
x = "", y = "\nEngajamento")
###################################################################################################
## Pluralizacao e dados do Atila
# Calcula o percentual de engajamento de cada perfil
# em relacao ao total de cada semana, considerando somente
# semanas em que estavam todas as contas.
semana_brasileiros <- engajamento_semana %>%
filter(screen_name %in% perfis_brasileiros,
semana_ano > "202029") %>%
group_by(semana_ano) %>%
mutate(taxa = sum(engajamento)/sum(n_tweets),
engajamento_p = 100*(engajamento/sum(engajamento))) %>%
ungroup()
# Grafico base da terceira figura da materia
semana_brasileiros %>%
mutate(screen_name = factor(screen_name,
levels = c("oatila", "luizacaires3", "otavio_ranzani",
"MBittencourtMD", "mellziland", "ThomasVConti",
"dadourado", "TaschnerNatalia", "dogarrett",
"PauloLotufo", "marciacastrorj", "AndersonBrito_")),
screen_name = fct_rev(screen_name)) %>%
ggplot() +
geom_col(aes(x = semana_ano, y = engajamento_p, fill = screen_name)) +
theme_light() +
theme(axis.text.x = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal") +
scale_fill_grey(start = 0.8, end = 0.2) +
labs(title = "Volume de engajamento entre as principais contas",
x = "\nSemanas", y = "\nEngajamento (em %)")
# Perfis com 20 semanas com mais engajamento
semana_brasileiros %>%
slice_max(engajamento, n = 20) %>%
group_by(screen_name) %>%
summarise(numero = n()) %>%
ungroup()
# Semanas com menor engajamento do atila
semana_brasileiros %>%
filter(screen_name == "oatila") %>%
slice_min(engajamento_p, n = 5)
# Ranking geral desses perfis pela taxa de engajamento
semana_brasileiros %>%
group_by(screen_name) %>%
summarise(soma_engajamento = sum(engajamento),
soma_tweets = sum(n_tweets)) %>%
ungroup() %>%
mutate(taxa = soma_engajamento/soma_tweets) %>%
arrange(desc(taxa))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment