AdamSpannbauer/get_twitter_time_dataframe.R

## get_twitter_time_dataframe.R
get_timeline_df <- function(user, n_tweets=200, oauth_sig) {
  i <- 0
  n_left <- n_tweets
  timeline_df <- NULL
  #loop until n_tweets are all got
  while (n_left > 0) {
    n_to_get <- min(200, n_left)
    i <- i+1
    #incorporae max id in get_url (so as not to download same 200 tweets repeatedly)
    if (i==1) {
      get_url <- paste0("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=",
      user,"&count=", n_to_get)
    } else {
      get_url <- paste0("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=",
      user,"&count=",n_to_get,"&max_id=", max_id)
    }
    #GET tweets
    response <- httr::GET(get_url, oauth_sig)
    #extract content and clean up
    response_content <- httr::content(response)
    json_content     <- jsonlite::toJSON(response_content)
    #clean out evil special chars
    json_conv <- iconv(json_content, "UTF-8", "ASCII", sub = "") %>%
    stringr::str_replace_all("\003", "") #special character (^C) not caught by above   clean
    timeline_list <- jsonlite::fromJSON(json_conv)
    #extract desired fields
    fields_i_care_about <- c("id", "text", "favorite_count", "retweet_count",   "created_at")
    timeline_df <- purrr::map(fields_i_care_about, ~unlist(timeline_list[[.x]])) %>%
    purrr::set_names(fields_i_care_about) %>%
    dplyr::as_data_frame() %>%
    dplyr::bind_rows(timeline_df) %>%
    dplyr::distinct()
    #store min id (oldest tweet) to set as max id for next GET
    max_id <- min(purrr::map_dbl(timeline_list$id, 1))
    #update number of tweets left
    n_left <- n_left-n_to_get
  }
  return(timeline_df)
}
	get_timeline_df <- function(user, n_tweets=200, oauth_sig) {
	i <- 0
	n_left <- n_tweets
	timeline_df <- NULL
	#loop until n_tweets are all got
	while (n_left > 0) {
	n_to_get <- min(200, n_left)
	i <- i+1
	#incorporae max id in get_url (so as not to download same 200 tweets repeatedly)
	if (i==1) {
	get_url <- paste0("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=",
	user,"&count=", n_to_get)
	} else {
	get_url <- paste0("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=",
	user,"&count=",n_to_get,"&max_id=", max_id)
	}
	#GET tweets
	response <- httr::GET(get_url, oauth_sig)
	#extract content and clean up
	response_content <- httr::content(response)
	json_content <- jsonlite::toJSON(response_content)
	#clean out evil special chars
	json_conv <- iconv(json_content, "UTF-8", "ASCII", sub = "") %>%
	stringr::str_replace_all("\003", "") #special character (^C) not caught by above clean
	timeline_list <- jsonlite::fromJSON(json_conv)
	#extract desired fields
	fields_i_care_about <- c("id", "text", "favorite_count", "retweet_count", "created_at")
	timeline_df <- purrr::map(fields_i_care_about, ~unlist(timeline_list[[.x]])) %>%
	purrr::set_names(fields_i_care_about) %>%
	dplyr::as_data_frame() %>%
	dplyr::bind_rows(timeline_df) %>%
	dplyr::distinct()
	#store min id (oldest tweet) to set as max id for next GET
	max_id <- min(purrr::map_dbl(timeline_list$id, 1))
	#update number of tweets left
	n_left <- n_left-n_to_get
	}
	return(timeline_df)
	}