Skip to content

Instantly share code, notes, and snippets.

@sureshgorakala
Last active August 29, 2015 14:01
Show Gist options
  • Save sureshgorakala/dd10617a0d1093c8f654 to your computer and use it in GitHub Desktop.
Save sureshgorakala/dd10617a0d1093c8f654 to your computer and use it in GitHub Desktop.
test
library(twitteR)
library(tm)
library(stringr)
library(wordcloud)
#fetch data
load("D:/Suresh R&D/Emirates Airlines SNA/twitteR_credentials")
registerTwitterOAuth(twitCred)
tweets = searchTwitter("@emirates", lang="en",cainfo="D:/Suresh R&D/Emirates Airlines SNA/cacert.pem")
#remove retweets
#remove retweeted
dm_tweets = list()
z =1
for(i in 1:length(tweets)){
if(tweets[[i]]$isRetweet == FALSE){
dm_tweets[z] = tweets[[i]]
z=z+1
}
}
z=0
tweets_txt = sapply(dm_tweets,function(x) x$getText())
#function2 for clean data
corpus = Corpus(VectorSource(tweets_txt))
cleanCorpus <-function(corpus) {
corpus.tmp = tm_map(corpus,removePunctuation)
corpus.tmp = tm_map(corpus.tmp,stripWhitespace)
corpus.tmp = tm_map(corpus.tmp,tolower)
corpus.tmp = tm_map(corpus.tmp,removeWords,stopwords("english"))
#corpus.tmp = tm_map(corpus.tmp,stemDocument)
return(corpus.tmp)
}
#function to clean data
cleanTweets = function(tweets)
{
tweets_cl = gsub("(RT|via)((?:\\b\\W*@\\w+)+)","",tweets)
tweets_cl = gsub("http[^[:blank:]]+", "", tweets_cl)
tweets_cl = gsub("@\\w+", "", tweets_cl)
tweets_cl = gsub("[ \t]{2,}", "", tweets_cl)
tweets_cl = gsub("^\\s+|\\s+$", "", tweets_cl)
tweets_cl = gsub("[[:punct:]]", " ", tweets_cl)
tweets_cl = gsub("[^[:alnum:]]", " ", tweets_cl)
tweets_cl <- gsub('\\d+', '', tweets_cl)
return(tweets_cl)
}
#Sentiment function
sentimentScore <- function(sentences, vNegTerms, negTerms, posTerms, vPosTerms){
final_scores <- matrix('', 0, 5)
scores <- lapply(sentences, function(sentence, vNegTerms, negTerms, posTerms, vPosTerms){
initial_sentence <- sentence
#remove unnecessary characters and split up by word
sentence = cleanTweets(sentence)
sentence <- tolower(sentence)
wordList <- str_split(sentence, '\\s+')
words <- unlist(wordList)
#build vector with matches between sentence and each category
vPosMatches <- match(words, vPosTerms)
posMatches <- match(words, posTerms)
vNegMatches <- match(words, vNegTerms)
negMatches <- match(words, negTerms)
#sum up number of words in each category
vPosMatches <- sum(!is.na(vPosMatches))
posMatches <- sum(!is.na(posMatches))
vNegMatches <- sum(!is.na(vNegMatches))
negMatches <- sum(!is.na(negMatches))
score <- c(vNegMatches, negMatches, posMatches, vPosMatches)
#add row to scores table
newrow <- c(initial_sentence, score)
final_scores <- rbind(final_scores, newrow)
return(final_scores)
}, vNegTerms, negTerms, posTerms, vPosTerms)
return(scores)
}
#convert dataframe to list object
x = list()
data_txt <- function(data){
for(i in 1:nrow(data)){
x[i] = as.character(data[i,1])
}
return(x)
}
#load pos,neg words
afinn_list <- read.delim(file='D:/Suresh R&D/sentiment words/AFINN/AFINN-111.txt', header=FALSE, stringsAsFactors=FALSE)
names(afinn_list) <- c('word', 'score')
afinn_list$word <- tolower(afinn_list$word)
#categorize words as very negative to very positive and add some movie-specific words
vNegTerms <- afinn_list$word[afinn_list$score==-5 | afinn_list$score==-4]
negTerms <- c(afinn_list$word[afinn_list$score==-3 | afinn_list$score==-2 | afinn_list$score==-1], "second-rate", "moronic", "third-rate", "flawed", "juvenile", "boring", "distasteful", "ordinary", "disgusting", "senseless", "static", "brutal", "confused", "disappointing", "bloody", "silly", "tired", "predictable", "stupid", "uninteresting", "trite", "uneven", "outdated", "dreadful", "bland")
posTerms <- c(afinn_list$word[afinn_list$score==3 | afinn_list$score==2 | afinn_list$score==1], "first-rate", "insightful", "clever", "charming", "comical", "charismatic", "enjoyable", "absorbing", "sensitive", "intriguing", "powerful", "pleasant", "surprising", "thought-provoking", "imaginative", "unpretentious")
vPosTerms <- c(afinn_list$word[afinn_list$score==5 | afinn_list$score==4], "uproarious", "riveting", "fascinating", "dazzling", "legendary")
#Calculate score on each tweet
SentiResult <- as.data.frame(sentimentScore(tweets_txt, vNegTerms, negTerms, posTerms, vPosTerms))
new_lst = list()
Convert_toLST <-function(df){
x = 1
i=1
while(x<ncol(df)){
y = x+4
new_lst[[i]]= as.list(df[x:y])
x = y+1
i=i+1
}
return(new_lst)
}
Result_lst = Convert_toLST(SentiResult)
mod_lst = list()
for(i in 1:length(Result_lst)){
negCount = sum(as.numeric(as.character(Result_lst[[i]]$X2)),as.numeric(as.character(Result_lst[[i]]$X3)))
posCount = sum(as.numeric(as.character(Result_lst[[i]]$X4)),as.numeric(as.character(Result_lst[[i]]$X5)))
mod_lst[[i]] = as.list(c(sentence = as.character(Result_lst[[i]]$X1),NEG = negCount,POS = posCount))
}
results = data.frame()
for(i in 1:length(mod_lst)){
results =rbind(results,cbind(as.character(mod_lst[[i]]$sentence[1]),mod_lst[[i]][2],mod_lst[[i]][3]))
out =t(as.data.frame(results[,1],optional=TRUE))
print(sum(as.numeric(unlist(results[,2]))))
print(sum(as.numeric(unlist(results[,3]))))
}
names(results) = c("Tweets","NEG","POS")
shinyServer(function(input, output,session) {
autoInvalidate <- reactiveTimer(5000, session)
output$tweets <-renderTable({
autoInvalidate()
#sample(results)
names(out)=c("Tweets")
out
})
output$neg <-renderTable({
neg = as.data.frame(cbind(sum(as.numeric(unlist(results[,2])))))
names(neg) = c("NEG")
neg
})
output$pos <-renderTable({
pos = as.data.frame(cbind(sum(as.numeric(unlist(results[,3])))))
names(pos) = c("POS")
pos
})
})
library(shiny)
shinyUI(fluidPage(
titlePanel("What People are talking about @emirates"),
mainPanel(
withTags({
#table(td(tableOutput("tweets")),td(tableOutput("pos")))
table(tr(td(rowspan="2",tableOutput("tweets")),td(tableOutput("pos"))),tr(td(tableOutput("neg"))))
})#,
#tableOutput("tweets")
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment