Skip to content

Instantly share code, notes, and snippets.

@micahwoods
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save micahwoods/ed605716649d1edf785d to your computer and use it in GitHub Desktop.
Save micahwoods/ed605716649d1edf785d to your computer and use it in GitHub Desktop.
downloads user timelines and calculates favorites and retweets for last X tweets
# some retweet analysis
# load necessary packages
library("twitteR")
library("dplyr")
library("lubridate")
library("ggplot2")
# make a twitter app at https://apps.twitter.com/
# and get the api key, secret, and access token info
api_key <- "yourAPIkey"
api_secret <- "yourAPIsecret"
access_token <- "yourAccesstoken"
access_token_secret <- "yourAccesstokenSecret"
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)
# download user timeline for a single account
# in this case to get last 500 tweets of @asianturfgrass
tweets <- userTimeline("asianturfgrass", n = 500, includeRts = TRUE)
# convert to data frame
tweetsDF <- twListToDF(tweets)
# look only at original tweets, not retweets
original <- filter(tweetsDF, isRetweet == FALSE)
# mean and media of various tweet activity
meanFav <- mean(original$favoriteCount)
meanRT <- mean(original$retweetCount)
medFav <- median(tweetsDF$favoriteCount)
medRT <- median(tweetsDF$retweetCount)
meanIsRT <- mean(tweetsDF$isRetweet)
# date of last tweet, useful to identify inactive accounts
lastTweet <- ymd_hms(tweetsDF[[1, 5]])
# find which tweets are replies, calculate activity on those
reply <- filter(original, replyToSN != "NA")
notReply <- dplyr::anti_join(original, reply, by = "id")
nRFav <- mean(notReply$favoriteCount)
nRRT <- mean(notReply$retweetCount)
nRMedFav <- median(notReply$favoriteCount)
nRMedRT <- median(notReply$retweetCount)
# for a loop to get this information for follower and following accounts
# choose the account to work with, in this case I use @asianturfgrass
atc <- getUser("asianturfgrass")
# gets all following and followers
atc.following <- lookupUsers(atc$getFriendIDs())
atc.follower <- lookupUsers(atc$getFollowerIDs())
# converts lists to data frames
followers <- twListToDF(atc.follower)
following <- twListToDF(atc.following)
# filter followers to be only open accounts, if not these requests are denied
# for following, access to the account already granted
followers2 <- filter(followers, protected == FALSE)
# combine the accounts, all following, and open followers
accounts <- dplyr::union(following, followers2)
accountsD <- unique(accounts)
# analyze only those accounts with >= than 500 tweets
accountsBusy <- filter(accountsD, statusesCount >= 500)
# set a blank data frame to fill in the loop with all calculations
atcAdd <- data.frame()
# set this if one wants to save the downloaded user timelines
# tweetText <- "data/TweetsDFX.csv"
j <- length(accountsBusy$id)
# this loop will download the timeline for all the accounts, calculate various
# measures of tweet activity, and write to a file for later analysis.
# the timelines can be written to file for string analysis
for (i in 1:j) {
tweets <- userTimeline(accountsBusy[[i, 14]], n = 500, includeRts = TRUE)
tweetsDF <- twListToDF(tweets)
allFav <- mean(tweetsDF$favoriteCount)
allRT <- mean(tweetsDF$retweetCount)
medFav <- median(tweetsDF$favoriteCount)
medRT <- median(tweetsDF$retweetCount)
original <- filter(tweetsDF, isRetweet == FALSE)
meanFav <- mean(original$favoriteCount)
meanRT <- mean(original$retweetCount)
medOFav <- median(original$favoriteCount)
medORT <- median(original$retweetCount)
meanIsRT <- mean(tweetsDF$isRetweet)
lastTweet <- ymd_hms(tweetsDF[[1, 5]])
reply <- filter(original, replyToSN != "NA")
notReply <- dplyr::anti_join(original, reply, by = "id")
nRFav <- mean(notReply$favoriteCount)
nRRT <- mean(notReply$retweetCount)
nRMedFav <- median(notReply$favoriteCount)
nRMedRT <- median(notReply$retweetCount)
orgTweet <- length(original$id)
nRTweet <- length(notReply$id)
newline <- cbind.data.frame(accountsBusy[[i, 2]],
accountsBusy[[i, 3]],
accountsBusy[[i, 5]],
accountsBusy[[i, 11]],
accountsBusy[[i, 14]],
meanFav, meanRT, meanIsRT, lastTweet,
allFav, allRT, medFav, medRT, medOFav, medORT,
nRFav, nRRT, nRMedFav, nRMedRT, orgTweet, nRTweet)
atcAdd <-rbind.data.frame(atcAdd, newline)
write.table(atcAdd, "data/atcAdd.csv", sep = ",", row.names = FALSE)
# outputFile <- gsub("X", accountsBusy[[i, 14]], tweetText)
# write.table(tweetsDF, outputFile, sep = ",", row.names = FALSE)
Sys.sleep(15)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment