micahwoods/tweet_activity.R

## tweet_activity.R
# some retweet analysis

# load necessary packages
library("twitteR")
library("dplyr")
library("lubridate")
library("ggplot2")

# make a twitter app at https://apps.twitter.com/
# and get the api key, secret, and access token info

api_key <- "yourAPIkey"

api_secret <- "yourAPIsecret"

access_token <- "yourAccesstoken"

access_token_secret <- "yourAccesstokenSecret"

setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)

# download user timeline for a single account
# in this case to get last 500 tweets of @asianturfgrass
 tweets <- userTimeline("asianturfgrass", n = 500, includeRts = TRUE)

# convert to data frame
 tweetsDF <- twListToDF(tweets)

# look only at original tweets, not retweets
 original <- filter(tweetsDF, isRetweet == FALSE)

# mean and media of various tweet activity
 meanFav <- mean(original$favoriteCount)
 meanRT <- mean(original$retweetCount)
 medFav <- median(tweetsDF$favoriteCount)
 medRT <- median(tweetsDF$retweetCount)
 meanIsRT <- mean(tweetsDF$isRetweet)

# date of last tweet, useful to identify inactive accounts
 lastTweet <- ymd_hms(tweetsDF[[1, 5]])

# find which tweets are replies, calculate activity on those
 reply <- filter(original, replyToSN != "NA")
 notReply <- dplyr::anti_join(original, reply, by = "id")
 nRFav <- mean(notReply$favoriteCount)
 nRRT <- mean(notReply$retweetCount)
 nRMedFav <- median(notReply$favoriteCount)
 nRMedRT <- median(notReply$retweetCount)

# for a loop to get this information for follower and following accounts
# choose the account to work with, in this case I use @asianturfgrass
atc <- getUser("asianturfgrass")

# gets all following and followers
atc.following <- lookupUsers(atc$getFriendIDs())
atc.follower <- lookupUsers(atc$getFollowerIDs())

# converts lists to data frames
followers <- twListToDF(atc.follower)
following <- twListToDF(atc.following)

# filter followers to be only open accounts, if not these requests are denied
# for following, access to the account already granted
followers2 <- filter(followers, protected == FALSE)

# combine the accounts, all following, and open followers
accounts <- dplyr::union(following, followers2)

accountsD <- unique(accounts)

# analyze only those accounts with >= than 500 tweets
accountsBusy <- filter(accountsD, statusesCount >= 500)

# set a blank data frame to fill in the loop with all calculations
atcAdd <- data.frame()

# set this if one wants to save the downloaded user timelines
# tweetText <- "data/TweetsDFX.csv"

j <- length(accountsBusy$id)

# this loop will download the timeline for all the accounts, calculate various
# measures of tweet activity, and write to a file for later analysis.
# the timelines can be written to file for string analysis

for (i in 1:j) {
  tweets <- userTimeline(accountsBusy[[i, 14]], n = 500, includeRts = TRUE)
  tweetsDF <- twListToDF(tweets)
  allFav <- mean(tweetsDF$favoriteCount)
  allRT <- mean(tweetsDF$retweetCount)
  medFav <- median(tweetsDF$favoriteCount)
  medRT <- median(tweetsDF$retweetCount)
  original <- filter(tweetsDF, isRetweet == FALSE)
  meanFav <- mean(original$favoriteCount)
  meanRT <- mean(original$retweetCount)
  medOFav <- median(original$favoriteCount)
  medORT <- median(original$retweetCount)
  meanIsRT <- mean(tweetsDF$isRetweet)
  lastTweet <- ymd_hms(tweetsDF[[1, 5]])
  reply <- filter(original, replyToSN != "NA")
  notReply <- dplyr::anti_join(original, reply, by = "id")
  nRFav <- mean(notReply$favoriteCount)
  nRRT <- mean(notReply$retweetCount)
  nRMedFav <- median(notReply$favoriteCount)
  nRMedRT <- median(notReply$retweetCount)
  orgTweet <- length(original$id)
  nRTweet <- length(notReply$id)
  newline <- cbind.data.frame(accountsBusy[[i, 2]],
                              accountsBusy[[i, 3]],
                              accountsBusy[[i, 5]],
                              accountsBusy[[i, 11]],
                              accountsBusy[[i, 14]],
                   meanFav, meanRT, meanIsRT, lastTweet,
                   allFav, allRT, medFav, medRT, medOFav, medORT,
                   nRFav, nRRT, nRMedFav, nRMedRT, orgTweet, nRTweet)
  atcAdd <-rbind.data.frame(atcAdd, newline)
  write.table(atcAdd, "data/atcAdd.csv", sep = ",", row.names = FALSE)
 # outputFile <- gsub("X", accountsBusy[[i, 14]], tweetText)
 # write.table(tweetsDF, outputFile, sep = ",", row.names = FALSE)
  Sys.sleep(15)
}
	# some retweet analysis

	# load necessary packages
	library("twitteR")
	library("dplyr")
	library("lubridate")
	library("ggplot2")

	# make a twitter app at https://apps.twitter.com/
	# and get the api key, secret, and access token info

	api_key <- "yourAPIkey"

	api_secret <- "yourAPIsecret"

	access_token <- "yourAccesstoken"

	access_token_secret <- "yourAccesstokenSecret"

	setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)

	# download user timeline for a single account
	# in this case to get last 500 tweets of @asianturfgrass
	tweets <- userTimeline("asianturfgrass", n = 500, includeRts = TRUE)

	# convert to data frame
	tweetsDF <- twListToDF(tweets)

	# look only at original tweets, not retweets
	original <- filter(tweetsDF, isRetweet == FALSE)

	# mean and media of various tweet activity
	meanFav <- mean(original$favoriteCount)
	meanRT <- mean(original$retweetCount)
	medFav <- median(tweetsDF$favoriteCount)
	medRT <- median(tweetsDF$retweetCount)
	meanIsRT <- mean(tweetsDF$isRetweet)

	# date of last tweet, useful to identify inactive accounts
	lastTweet <- ymd_hms(tweetsDF[[1, 5]])

	# find which tweets are replies, calculate activity on those
	reply <- filter(original, replyToSN != "NA")
	notReply <- dplyr::anti_join(original, reply, by = "id")
	nRFav <- mean(notReply$favoriteCount)
	nRRT <- mean(notReply$retweetCount)
	nRMedFav <- median(notReply$favoriteCount)
	nRMedRT <- median(notReply$retweetCount)

	# for a loop to get this information for follower and following accounts
	# choose the account to work with, in this case I use @asianturfgrass
	atc <- getUser("asianturfgrass")

	# gets all following and followers
	atc.following <- lookupUsers(atc$getFriendIDs())
	atc.follower <- lookupUsers(atc$getFollowerIDs())

	# converts lists to data frames
	followers <- twListToDF(atc.follower)
	following <- twListToDF(atc.following)

	# filter followers to be only open accounts, if not these requests are denied
	# for following, access to the account already granted
	followers2 <- filter(followers, protected == FALSE)

	# combine the accounts, all following, and open followers
	accounts <- dplyr::union(following, followers2)

	accountsD <- unique(accounts)

	# analyze only those accounts with >= than 500 tweets
	accountsBusy <- filter(accountsD, statusesCount >= 500)

	# set a blank data frame to fill in the loop with all calculations
	atcAdd <- data.frame()

	# set this if one wants to save the downloaded user timelines
	# tweetText <- "data/TweetsDFX.csv"

	j <- length(accountsBusy$id)

	# this loop will download the timeline for all the accounts, calculate various
	# measures of tweet activity, and write to a file for later analysis.
	# the timelines can be written to file for string analysis

	for (i in 1:j) {
	tweets <- userTimeline(accountsBusy[[i, 14]], n = 500, includeRts = TRUE)
	tweetsDF <- twListToDF(tweets)
	allFav <- mean(tweetsDF$favoriteCount)
	allRT <- mean(tweetsDF$retweetCount)
	medFav <- median(tweetsDF$favoriteCount)
	medRT <- median(tweetsDF$retweetCount)
	original <- filter(tweetsDF, isRetweet == FALSE)
	meanFav <- mean(original$favoriteCount)
	meanRT <- mean(original$retweetCount)
	medOFav <- median(original$favoriteCount)
	medORT <- median(original$retweetCount)
	meanIsRT <- mean(tweetsDF$isRetweet)
	lastTweet <- ymd_hms(tweetsDF[[1, 5]])
	reply <- filter(original, replyToSN != "NA")
	notReply <- dplyr::anti_join(original, reply, by = "id")
	nRFav <- mean(notReply$favoriteCount)
	nRRT <- mean(notReply$retweetCount)
	nRMedFav <- median(notReply$favoriteCount)
	nRMedRT <- median(notReply$retweetCount)
	orgTweet <- length(original$id)
	nRTweet <- length(notReply$id)
	newline <- cbind.data.frame(accountsBusy[[i, 2]],
	accountsBusy[[i, 3]],
	accountsBusy[[i, 5]],
	accountsBusy[[i, 11]],
	accountsBusy[[i, 14]],
	meanFav, meanRT, meanIsRT, lastTweet,
	allFav, allRT, medFav, medRT, medOFav, medORT,
	nRFav, nRRT, nRMedFav, nRMedRT, orgTweet, nRTweet)
	atcAdd <-rbind.data.frame(atcAdd, newline)
	write.table(atcAdd, "data/atcAdd.csv", sep = ",", row.names = FALSE)
	# outputFile <- gsub("X", accountsBusy[[i, 14]], tweetText)
	# write.table(tweetsDF, outputFile, sep = ",", row.names = FALSE)
	Sys.sleep(15)
	}