gubiithefish/twitter_history_search.r

## twitter_history_search.r
# Based on: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/master/Full-Archive-Search/full-archive-search.r
# API reference : https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all#tab1

library(httr)

# SETUP
#################
### Create a function for calling the API, which is cleaner to make as a function, as we will be calling it several times
history_search <- function(){
  return(httr::GET(url = 'https://api.twitter.com/2/tweets/search/all', httr::add_headers(.headers=api_headers), query = api_params))
}

### Set the bearer token for the API and set the API header
bearer_token <- "INSERT_TOKEN_HERE"
api_headers  <-  c(`Authorization` = sprintf('Bearer %s', bearer_token))

### Set the parameters for the first calling of the API
api_params   <- list(
  `query` = 'from:elonmusk lang:en',
  `max_results` = '500',
  `tweet.fields` = 'created_at,lang,context_annotations',
  `start_time` = '2020-01-01T00:00:00Z',
  `end_time` = '2021-01-01T00:00:00Z'
)

# RUNNING THE CALLING OF THE API
##################################
### This variable will be TRUE if it's the first run
### Or if there is a "next_token" in the query result
call_next <- TRUE
query_df <- FALSE

while (isTRUE(call_next)){
  ### Call the API function
  api_response <- history_search()

  ### Get content from API query
  api_contents <- content(
    api_response,
    as = 'parsed',
    type = 'application/json',
    simplifyDataFrame = TRUE
  )

  ### Save the content as DataFrame
  if (isFALSE(query_df)){
    ### If it's the first time running, then create DataFrame
    query_df <- as.data.frame(api_contents$data)
  } else {
    ### If DataFrame exist, then append data to end of DataFrame
    query_df <- rbind(query_df, api_contents$data)
  }

  ### Check and see if there is a next_token in the response
  if ('next_token' %in% names(api_contents$meta)){
    print("Getting next chunk of data in 30 seconds")
    api_params['next_token'] <- api_contents$meta$next_token
    Sys.sleep(5)
  } else {
    print("No more tweets were found")
    call_next <- FALSE
  }
}
	# Based on: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/master/Full-Archive-Search/full-archive-search.r
	# API reference : https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all#tab1

	library(httr)

	# SETUP
	#################
	### Create a function for calling the API, which is cleaner to make as a function, as we will be calling it several times
	history_search <- function(){
	return(httr::GET(url = 'https://api.twitter.com/2/tweets/search/all', httr::add_headers(.headers=api_headers), query = api_params))
	}

	### Set the bearer token for the API and set the API header
	bearer_token <- "INSERT_TOKEN_HERE"
	api_headers <- c(`Authorization` = sprintf('Bearer %s', bearer_token))

	### Set the parameters for the first calling of the API
	api_params <- list(
	`query` = 'from:elonmusk lang:en',
	`max_results` = '500',
	`tweet.fields` = 'created_at,lang,context_annotations',
	`start_time` = '2020-01-01T00:00:00Z',
	`end_time` = '2021-01-01T00:00:00Z'
	)

	# RUNNING THE CALLING OF THE API
	##################################
	### This variable will be TRUE if it's the first run
	### Or if there is a "next_token" in the query result
	call_next <- TRUE
	query_df <- FALSE

	while (isTRUE(call_next)){
	### Call the API function
	api_response <- history_search()

	### Get content from API query
	api_contents <- content(
	api_response,
	as = 'parsed',
	type = 'application/json',
	simplifyDataFrame = TRUE
	)

	### Save the content as DataFrame
	if (isFALSE(query_df)){
	### If it's the first time running, then create DataFrame
	query_df <- as.data.frame(api_contents$data)
	} else {
	### If DataFrame exist, then append data to end of DataFrame
	query_df <- rbind(query_df, api_contents$data)
	}

	### Check and see if there is a next_token in the response
	if ('next_token' %in% names(api_contents$meta)){
	print("Getting next chunk of data in 30 seconds")
	api_params['next_token'] <- api_contents$meta$next_token
	Sys.sleep(5)
	} else {
	print("No more tweets were found")
	call_next <- FALSE
	}
	}