Skip to content

Instantly share code, notes, and snippets.

@sergiospagnuolo
Last active July 1, 2021 20:46
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sergiospagnuolo/a342c1179284deafa5c508dad33373f5 to your computer and use it in GitHub Desktop.
Save sergiospagnuolo/a342c1179284deafa5c508dad33373f5 to your computer and use it in GitHub Desktop.
Código para captura de busca de tuítes
library(lubridate)
library(scales)
library(tidyverse)
# regulariza o formato da data do tweet, corrige para fuso certo
d$created_at <- as.POSIXct(strptime(d$created_at, "%Y-%m-%d %H:%M:%S"))
d$created_at <- d$created_at - hours(3)
d$dia <- as.POSIXct(strptime(d$created_at, "%Y-%m-%d"))
# cria coluna com minuto arredondado
# alternativas de agregação “secs”, “mins”, “hours”, “days”, “months”, “years”
d$min_redondo <- round(d$created_at, units = "hours")
d$new <- as.character(d$min_redondo)
# transformat status_id em caractere, em vez de numero
d$status_id <- as.character(d$status_id)
contagem <- d %>%
group_by(new) %>%
#group_by(dia) %>%
#summarize(count=n())
count()
contagem$new <- as.POSIXct(strptime(contagem$new, "%Y-%m-%d %H:%M:%S"))
contagem$new <- contagem$new - hours(3)
#filtrado <- contagem %>%
#filter(new > "2019-11-07 23:59:00")
ggplot(contagem, aes(new, n)) +
geom_point(alpha = 0.6) +
geom_bar(stat="identity")
library(rtweet)
## armazena as chaves do Twitter
api_key <- ""
api_secret_key <- ""
access_token <- "0"
access_token_secret <- ""
## faz autenticação via navegador
token <- create_token(
app = "popularity_R",
consumer_key = api_key,
consumer_secret = api_secret_key,
access_token = access_token,
access_secret = access_token_secret)
# faz a busca
tt_corona <- search_tweets(
q = 'coronavirus OR covid-19 -filter:retweets -filter:replies',
n = 18000,
type = "recent",
include_rts = TRUE,
geocode = NULL,
max_id = '',
parse = TRUE,
token = NULL,
lang = "pt",
retryonratelimit = TRUE,
verbose = TRUE)
# cria nofo DF
d <- tt_corona
# faz um unlist das colunas que são listas, para salvar csv
d0 <- subset(d, select=-c(symbols,
urls_expanded_url,
media_t.co,
media_expanded_url,
ext_media_url,
ext_media_t.co,
ext_media_expanded_url,
geo_coords,
coords_coords,
bbox_coords,
urls_t.co
))
# limpeza de colunas
d0$hashtags <- vapply(d0$hashtags, paste, collapse = ", ", character(1L))
d0$mentions_user_id <- vapply(d0$mentions_user_id, paste, collapse = ", ", character(1L))
d0$mentions_screen_name <- vapply(d0$mentions_screen_name, paste, collapse = ", ", character(1L))
d0$urls_url <- vapply(d0$urls_url, paste, collapse = ", ", character(1L))
d0$media_url <- vapply(d0$media_url, paste, collapse = ", ", character(1L))
d0$media_type <- vapply(d0$media_type, paste, collapse = ", ", character(1L))
# escreve o CSV
write.csv(d0, "tweets_covid_XX_XX_marco.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment