tts/twsentiment.R

## twsentiment.R
########################################################################################################
#
# A light-weight sentiment analysis of the last update of (some) of my English-writing Twitter followers.
#
# Algorithm by http://jeffreybreen.wordpress.com/2011/07/04/twitter-text-mining-r-slides/
# at https://github.com/jeffreybreen/twitter-sentiment-analysis-tutorial-201107/blob/master/R/sentiment.R
#
# Word list from http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html
# (Linux: unrar e opinion-lexicon-English.rar)
#
# Tuija Sonkkila
# 9.7.2013
#
########################################################################################################

library(twitteR)
library(ROAuth)
library(plyr)

source("score.sentiment.R")

options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))

reqURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
cKey <- "your consumer key"
cSecret <- "your consumer secret"

credentials <- OAuthFactory$new(consumerKey=cKey,
                                consumerSecret=cSecret,
                                requestURL=reqURL,
                                accessURL=accessURL,
                                authURL=authURL)

credentials$handshake()
registerTwitterOAuth(credentials)

save(credentials, file="twittercred.Rdata")

me <- getUser('ttso')

# My followers as a list
followers <- me$getFollowers()

# Transform to a data frame
fDf <- twListToDF(followers)

# The API limit is apparently 16 so I have to do this in parts.
# https://dev.twitter.com/docs/rate-limiting/1.1/limits
# Nevertheless, only 176 obs. is returned before the API throws a Client Error (429)
# I hit some other limit I guess.
# EDIT 10.7: this morning I got 208. Need to get my head around this limit thing...
# EDIT 11.7: look at https://gist.github.com/tts/5973930 where I circumvent the limit
allT <- data.frame(text = character(),
                   favorited = logical(),
                   replyToSN = character(),
                   created = character(),
                   truncated = logical(),
                   replyToSID = character(),
                   id = character(),
                   replyToUID = character(),
                   statusSource = character(),
                   screenName = character(),
                   retweetCount = integer(),
                   retweeted = logical(),
                   longitude = logical(),
                   latitude = logical())

for(i in seq(1, nrow(fDf), by = 16)) {
  fDf.set <- fDf[i:nrow(fDf), ]
  t <- lapply(seq_along(fDf.set), function(x) getUser(fDf.set$screenName[x])$lastStatus)
  t.df <- twListToDF(t)
  allT <- rbind(allT, t.df)
}

# http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html
pos <- scan("negative-words.txt", what="character", comment.char=";")
neg <- scan("positive-words.txt", what="character", comment.char=";")

# Add few like Jeffrey does
neg <- c(neg, 'wtf', 'epicfail')

# Which tweets are in English?
# http://stackoverflow.com/questions/8078604/detect-text-language-in-r
library(cldr)
r <- detectLanguage(allT$text)
allT <- cbind(allT, r)

# Analyze
result <- score.sentiment(allT[allT$detectedLanguage == 'ENGLISH', ]$text, pos, neg)

# Group count by score
sc <- aggregate(result$score, by=list(result$score), FUN=length)

pre <- "Mood in some of my followers' last tweets (score:count). "

t <- capture.output(
  cat(paste(sc$Group.1, ":", sc$x, sep = ""),
      paste("(N=", nrow(allT[allT$detectedLanguage == 'ENGLISH', ]), ")", sep = ""),
      sep = " ")
  )

gistURL <- " http://tinyurl.com/ou2cdn8"
post <- " via twitteR"

text <- paste(pre, t, gistURL, post, sep="")

# Tweet the results
updateStatus(text)
	########################################################################################################
	#
	# A light-weight sentiment analysis of the last update of (some) of my English-writing Twitter followers.
	#
	# Algorithm by http://jeffreybreen.wordpress.com/2011/07/04/twitter-text-mining-r-slides/
	# at https://github.com/jeffreybreen/twitter-sentiment-analysis-tutorial-201107/blob/master/R/sentiment.R
	#
	# Word list from http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html
	# (Linux: unrar e opinion-lexicon-English.rar)
	#
	# Tuija Sonkkila
	# 9.7.2013
	#
	########################################################################################################

	library(twitteR)
	library(ROAuth)
	library(plyr)

	source("score.sentiment.R")

	options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))

	reqURL <- "https://api.twitter.com/oauth/request_token"
	accessURL <- "https://api.twitter.com/oauth/access_token"
	authURL <- "https://api.twitter.com/oauth/authorize"
	cKey <- "your consumer key"
	cSecret <- "your consumer secret"

	credentials <- OAuthFactory$new(consumerKey=cKey,
	consumerSecret=cSecret,
	requestURL=reqURL,
	accessURL=accessURL,
	authURL=authURL)

	credentials$handshake()
	registerTwitterOAuth(credentials)

	save(credentials, file="twittercred.Rdata")

	me <- getUser('ttso')

	# My followers as a list
	followers <- me$getFollowers()

	# Transform to a data frame
	fDf <- twListToDF(followers)

	# The API limit is apparently 16 so I have to do this in parts.
	# https://dev.twitter.com/docs/rate-limiting/1.1/limits
	# Nevertheless, only 176 obs. is returned before the API throws a Client Error (429)
	# I hit some other limit I guess.
	# EDIT 10.7: this morning I got 208. Need to get my head around this limit thing...
	# EDIT 11.7: look at https://gist.github.com/tts/5973930 where I circumvent the limit
	allT <- data.frame(text = character(),
	favorited = logical(),
	replyToSN = character(),
	created = character(),
	truncated = logical(),
	replyToSID = character(),
	id = character(),
	replyToUID = character(),
	statusSource = character(),
	screenName = character(),
	retweetCount = integer(),
	retweeted = logical(),
	longitude = logical(),
	latitude = logical())

	for(i in seq(1, nrow(fDf), by = 16)) {
	fDf.set <- fDf[i:nrow(fDf), ]
	t <- lapply(seq_along(fDf.set), function(x) getUser(fDf.set$screenName[x])$lastStatus)
	t.df <- twListToDF(t)
	allT <- rbind(allT, t.df)
	}

	# http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html
	pos <- scan("negative-words.txt", what="character", comment.char=";")
	neg <- scan("positive-words.txt", what="character", comment.char=";")

	# Add few like Jeffrey does
	neg <- c(neg, 'wtf', 'epicfail')

	# Which tweets are in English?
	# http://stackoverflow.com/questions/8078604/detect-text-language-in-r
	library(cldr)
	r <- detectLanguage(allT$text)
	allT <- cbind(allT, r)

	# Analyze
	result <- score.sentiment(allT[allT$detectedLanguage == 'ENGLISH', ]$text, pos, neg)

	# Group count by score
	sc <- aggregate(result$score, by=list(result$score), FUN=length)

	pre <- "Mood in some of my followers' last tweets (score:count). "

	t <- capture.output(
	cat(paste(sc$Group.1, ":", sc$x, sep = ""),
	paste("(N=", nrow(allT[allT$detectedLanguage == 'ENGLISH', ]), ")", sep = ""),
	sep = " ")
	)

	gistURL <- " http://tinyurl.com/ou2cdn8"
	post <- " via twitteR"

	text <- paste(pre, t, gistURL, post, sep="")

	# Tweet the results
	updateStatus(text)