Created
April 9, 2021 22:15
-
-
Save gubiithefish/0b0425d287e9950688834e069784cb08 to your computer and use it in GitHub Desktop.
Recursive twitter full_search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/master/Full-Archive-Search/full-archive-search.r | |
# API reference : https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all#tab1 | |
library(httr) | |
# SETUP | |
################# | |
### Create a function for calling the API, which is cleaner to make as a function, as we will be calling it several times | |
history_search <- function(){ | |
return(httr::GET(url = 'https://api.twitter.com/2/tweets/search/all', httr::add_headers(.headers=api_headers), query = api_params)) | |
} | |
### Set the bearer token for the API and set the API header | |
bearer_token <- "INSERT_TOKEN_HERE" | |
api_headers <- c(`Authorization` = sprintf('Bearer %s', bearer_token)) | |
### Set the parameters for the first calling of the API | |
api_params <- list( | |
`query` = 'from:elonmusk lang:en', | |
`max_results` = '500', | |
`tweet.fields` = 'created_at,lang,context_annotations', | |
`start_time` = '2020-01-01T00:00:00Z', | |
`end_time` = '2021-01-01T00:00:00Z' | |
) | |
# RUNNING THE CALLING OF THE API | |
################################## | |
### This variable will be TRUE if it's the first run | |
### Or if there is a "next_token" in the query result | |
call_next <- TRUE | |
query_df <- FALSE | |
while (isTRUE(call_next)){ | |
### Call the API function | |
api_response <- history_search() | |
### Get content from API query | |
api_contents <- content( | |
api_response, | |
as = 'parsed', | |
type = 'application/json', | |
simplifyDataFrame = TRUE | |
) | |
### Save the content as DataFrame | |
if (isFALSE(query_df)){ | |
### If it's the first time running, then create DataFrame | |
query_df <- as.data.frame(api_contents$data) | |
} else { | |
### If DataFrame exist, then append data to end of DataFrame | |
query_df <- rbind(query_df, api_contents$data) | |
} | |
### Check and see if there is a next_token in the response | |
if ('next_token' %in% names(api_contents$meta)){ | |
print("Getting next chunk of data in 30 seconds") | |
api_params['next_token'] <- api_contents$meta$next_token | |
Sys.sleep(5) | |
} else { | |
print("No more tweets were found") | |
call_next <- FALSE | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment