Skip to content

Instantly share code, notes, and snippets.

@gubiithefish
Created April 9, 2021 22:15
Show Gist options
  • Save gubiithefish/0b0425d287e9950688834e069784cb08 to your computer and use it in GitHub Desktop.
Save gubiithefish/0b0425d287e9950688834e069784cb08 to your computer and use it in GitHub Desktop.
Recursive twitter full_search
# Based on: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/master/Full-Archive-Search/full-archive-search.r
# API reference : https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all#tab1
library(httr)
# SETUP
#################
### Create a function for calling the API, which is cleaner to make as a function, as we will be calling it several times
history_search <- function(){
return(httr::GET(url = 'https://api.twitter.com/2/tweets/search/all', httr::add_headers(.headers=api_headers), query = api_params))
}
### Set the bearer token for the API and set the API header
bearer_token <- "INSERT_TOKEN_HERE"
api_headers <- c(`Authorization` = sprintf('Bearer %s', bearer_token))
### Set the parameters for the first calling of the API
api_params <- list(
`query` = 'from:elonmusk lang:en',
`max_results` = '500',
`tweet.fields` = 'created_at,lang,context_annotations',
`start_time` = '2020-01-01T00:00:00Z',
`end_time` = '2021-01-01T00:00:00Z'
)
# RUNNING THE CALLING OF THE API
##################################
### This variable will be TRUE if it's the first run
### Or if there is a "next_token" in the query result
call_next <- TRUE
query_df <- FALSE
while (isTRUE(call_next)){
### Call the API function
api_response <- history_search()
### Get content from API query
api_contents <- content(
api_response,
as = 'parsed',
type = 'application/json',
simplifyDataFrame = TRUE
)
### Save the content as DataFrame
if (isFALSE(query_df)){
### If it's the first time running, then create DataFrame
query_df <- as.data.frame(api_contents$data)
} else {
### If DataFrame exist, then append data to end of DataFrame
query_df <- rbind(query_df, api_contents$data)
}
### Check and see if there is a next_token in the response
if ('next_token' %in% names(api_contents$meta)){
print("Getting next chunk of data in 30 seconds")
api_params['next_token'] <- api_contents$meta$next_token
Sys.sleep(5)
} else {
print("No more tweets were found")
call_next <- FALSE
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment