Created
July 25, 2014 13:54
-
-
Save s13731105/bbcb6f83b8446747a38d to your computer and use it in GitHub Desktop.
Twitter Sentiment Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(twitteR) | |
library(ROAuth) | |
library(ggplot2) | |
kejriwal.list <- searchTwitter('#kejriwal', n=1000) | |
#twListToDF() will take a list of objects from a single twitteR class and return a data.frame version of the members | |
kejriwal.df =twListToDF(kejriwal.list) | |
write.csv(kejriwal.df, file='c:/test/kejriwal.csv', row.names=F) | |
library(plyr) | |
library(stringr) | |
#The sentiment analysis uses two packages plyr and stringr to manipulate strings. | |
score.sentiment= function(sentence, pos.words, neg.words, .progress='none') | |
{ | |
require(plyr) | |
require(stringr) | |
scores= laply(sentence, function(sentence, pos.words, neg.words){ | |
sentence = gsub('[[:punct:]]', '', sentence) | |
sentence = gsub('[[:cntrl:]]', '', sentence) | |
sentence = gsub('\\d+', '', sentence) | |
sentence = tolower(sentence) | |
word.list = str_split(sentence, '\\s+') | |
words = unlist(word.list) | |
pos.matches = match(words, pos.words) | |
neg.matches = match(words, neg.words) | |
pos.matches = !is.na(pos.matches) | |
neg.matches = !is.na(neg.matches) | |
score = sum(pos.matches) - sum(neg.matches) | |
return(score) | |
}, pos.words, neg.words, .progress=.progress) | |
scores.df = data.frame(score=scores, text=sentence) | |
return(scores.df) | |
} | |
#scoring tweets and adding column | |
hu.liu.pos = scan('c:/test/positive-words.txt', what='character', comment.char = ';') | |
hu.liu.neg = scan('c:/test/negative-words.txt', what='character', comment.char = ';') | |
pos.words = c(hu.liu.pos, 'upgrade') | |
neg.words = c(hu.liu.neg, 'wtf', 'wait', 'waiting', 'epicfail', 'mechanical') | |
#import the csv file | |
DatasetKejriwal <- read.csv("C:/test/kejriwal.csv") | |
DatasetKejriwal$text <- as.factor(DatasetKejriwal$text) | |
#score all tweets | |
kejriwal.scores = score.sentiment(DatasetKejriwal$text, pos.words, neg.words, .progress = 'text') | |
path <- "c:/test/" | |
write.csv(kejriwal.scores, file=paste(path, "kejriwalScores.csv", sep=""), row.names=TRUE) | |
View(kejriwal.scores) | |
#Visualizing the tweets | |
hist(kejriwal.scores$score, xlab="Score of Tweets", col="red") | |
qplot(kejriwal.scores$score, xlab = "Score of Tweets") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment