Last active
August 29, 2015 14:14
-
-
Save micahwoods/ed605716649d1edf785d to your computer and use it in GitHub Desktop.
downloads user timelines and calculates favorites and retweets for last X tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# some retweet analysis | |
# load necessary packages | |
library("twitteR") | |
library("dplyr") | |
library("lubridate") | |
library("ggplot2") | |
# make a twitter app at https://apps.twitter.com/ | |
# and get the api key, secret, and access token info | |
api_key <- "yourAPIkey" | |
api_secret <- "yourAPIsecret" | |
access_token <- "yourAccesstoken" | |
access_token_secret <- "yourAccesstokenSecret" | |
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret) | |
# download user timeline for a single account | |
# in this case to get last 500 tweets of @asianturfgrass | |
tweets <- userTimeline("asianturfgrass", n = 500, includeRts = TRUE) | |
# convert to data frame | |
tweetsDF <- twListToDF(tweets) | |
# look only at original tweets, not retweets | |
original <- filter(tweetsDF, isRetweet == FALSE) | |
# mean and media of various tweet activity | |
meanFav <- mean(original$favoriteCount) | |
meanRT <- mean(original$retweetCount) | |
medFav <- median(tweetsDF$favoriteCount) | |
medRT <- median(tweetsDF$retweetCount) | |
meanIsRT <- mean(tweetsDF$isRetweet) | |
# date of last tweet, useful to identify inactive accounts | |
lastTweet <- ymd_hms(tweetsDF[[1, 5]]) | |
# find which tweets are replies, calculate activity on those | |
reply <- filter(original, replyToSN != "NA") | |
notReply <- dplyr::anti_join(original, reply, by = "id") | |
nRFav <- mean(notReply$favoriteCount) | |
nRRT <- mean(notReply$retweetCount) | |
nRMedFav <- median(notReply$favoriteCount) | |
nRMedRT <- median(notReply$retweetCount) | |
# for a loop to get this information for follower and following accounts | |
# choose the account to work with, in this case I use @asianturfgrass | |
atc <- getUser("asianturfgrass") | |
# gets all following and followers | |
atc.following <- lookupUsers(atc$getFriendIDs()) | |
atc.follower <- lookupUsers(atc$getFollowerIDs()) | |
# converts lists to data frames | |
followers <- twListToDF(atc.follower) | |
following <- twListToDF(atc.following) | |
# filter followers to be only open accounts, if not these requests are denied | |
# for following, access to the account already granted | |
followers2 <- filter(followers, protected == FALSE) | |
# combine the accounts, all following, and open followers | |
accounts <- dplyr::union(following, followers2) | |
accountsD <- unique(accounts) | |
# analyze only those accounts with >= than 500 tweets | |
accountsBusy <- filter(accountsD, statusesCount >= 500) | |
# set a blank data frame to fill in the loop with all calculations | |
atcAdd <- data.frame() | |
# set this if one wants to save the downloaded user timelines | |
# tweetText <- "data/TweetsDFX.csv" | |
j <- length(accountsBusy$id) | |
# this loop will download the timeline for all the accounts, calculate various | |
# measures of tweet activity, and write to a file for later analysis. | |
# the timelines can be written to file for string analysis | |
for (i in 1:j) { | |
tweets <- userTimeline(accountsBusy[[i, 14]], n = 500, includeRts = TRUE) | |
tweetsDF <- twListToDF(tweets) | |
allFav <- mean(tweetsDF$favoriteCount) | |
allRT <- mean(tweetsDF$retweetCount) | |
medFav <- median(tweetsDF$favoriteCount) | |
medRT <- median(tweetsDF$retweetCount) | |
original <- filter(tweetsDF, isRetweet == FALSE) | |
meanFav <- mean(original$favoriteCount) | |
meanRT <- mean(original$retweetCount) | |
medOFav <- median(original$favoriteCount) | |
medORT <- median(original$retweetCount) | |
meanIsRT <- mean(tweetsDF$isRetweet) | |
lastTweet <- ymd_hms(tweetsDF[[1, 5]]) | |
reply <- filter(original, replyToSN != "NA") | |
notReply <- dplyr::anti_join(original, reply, by = "id") | |
nRFav <- mean(notReply$favoriteCount) | |
nRRT <- mean(notReply$retweetCount) | |
nRMedFav <- median(notReply$favoriteCount) | |
nRMedRT <- median(notReply$retweetCount) | |
orgTweet <- length(original$id) | |
nRTweet <- length(notReply$id) | |
newline <- cbind.data.frame(accountsBusy[[i, 2]], | |
accountsBusy[[i, 3]], | |
accountsBusy[[i, 5]], | |
accountsBusy[[i, 11]], | |
accountsBusy[[i, 14]], | |
meanFav, meanRT, meanIsRT, lastTweet, | |
allFav, allRT, medFav, medRT, medOFav, medORT, | |
nRFav, nRRT, nRMedFav, nRMedRT, orgTweet, nRTweet) | |
atcAdd <-rbind.data.frame(atcAdd, newline) | |
write.table(atcAdd, "data/atcAdd.csv", sep = ",", row.names = FALSE) | |
# outputFile <- gsub("X", accountsBusy[[i, 14]], tweetText) | |
# write.table(tweetsDF, outputFile, sep = ",", row.names = FALSE) | |
Sys.sleep(15) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment