sureshgorakala/server.R

## server.R
library(twitteR)
library(tm)
library(stringr)
library(wordcloud)
#fetch data
load("D:/Suresh R&D/Emirates Airlines SNA/twitteR_credentials")
registerTwitterOAuth(twitCred)
tweets = searchTwitter("@emirates", lang="en",cainfo="D:/Suresh R&D/Emirates Airlines SNA/cacert.pem")

#remove retweets
#remove retweeted
dm_tweets = list()
z =1
for(i in 1:length(tweets)){
  if(tweets[[i]]$isRetweet == FALSE){
    dm_tweets[z] = tweets[[i]]
    z=z+1
  }
}
z=0

tweets_txt = sapply(dm_tweets,function(x) x$getText())
#function2 for clean data
corpus = Corpus(VectorSource(tweets_txt))
cleanCorpus <-function(corpus) {
  corpus.tmp = tm_map(corpus,removePunctuation)
  corpus.tmp = tm_map(corpus.tmp,stripWhitespace)
  corpus.tmp = tm_map(corpus.tmp,tolower)
  corpus.tmp = tm_map(corpus.tmp,removeWords,stopwords("english"))
  #corpus.tmp = tm_map(corpus.tmp,stemDocument)
  return(corpus.tmp)
}

#function to clean data
cleanTweets = function(tweets)
{
  tweets_cl = gsub("(RT|via)((?:\\b\\W*@\\w+)+)","",tweets)
  tweets_cl = gsub("http[^[:blank:]]+", "", tweets_cl)
  tweets_cl = gsub("@\\w+", "", tweets_cl)
  tweets_cl = gsub("[ \t]{2,}", "", tweets_cl)
  tweets_cl = gsub("^\\s+|\\s+$", "", tweets_cl)
  tweets_cl = gsub("[[:punct:]]", " ", tweets_cl)
  tweets_cl = gsub("[^[:alnum:]]", " ", tweets_cl)
  tweets_cl <- gsub('\\d+', '', tweets_cl)
  return(tweets_cl)
}


#Sentiment function
sentimentScore <- function(sentences, vNegTerms, negTerms, posTerms, vPosTerms){
  final_scores <- matrix('', 0, 5)
  scores <- lapply(sentences, function(sentence, vNegTerms, negTerms, posTerms, vPosTerms){
    initial_sentence <- sentence
    #remove unnecessary characters and split up by word
    sentence = cleanTweets(sentence)
    sentence <- tolower(sentence)
    wordList <- str_split(sentence, '\\s+')
    words <- unlist(wordList)
    #build vector with matches between sentence and each category
    vPosMatches <- match(words, vPosTerms)
    posMatches <- match(words, posTerms)
    vNegMatches <- match(words, vNegTerms)
    negMatches <- match(words, negTerms)
    #sum up number of words in each category
    vPosMatches <- sum(!is.na(vPosMatches))
    posMatches <- sum(!is.na(posMatches))
    vNegMatches <- sum(!is.na(vNegMatches))
    negMatches <- sum(!is.na(negMatches))
    score <- c(vNegMatches, negMatches, posMatches, vPosMatches)
    #add row to scores table
    newrow <- c(initial_sentence, score)
    final_scores <- rbind(final_scores, newrow)
    return(final_scores)
  }, vNegTerms, negTerms, posTerms, vPosTerms)
  return(scores)
}
#convert dataframe to list object
x = list()
data_txt <- function(data){
  for(i in 1:nrow(data)){
    x[i] = as.character(data[i,1])
  }
  return(x)
}

#load pos,neg words
afinn_list <- read.delim(file='D:/Suresh R&D/sentiment words/AFINN/AFINN-111.txt', header=FALSE, stringsAsFactors=FALSE)
names(afinn_list) <- c('word', 'score')
afinn_list$word <- tolower(afinn_list$word)

#categorize words as very negative to very positive and add some movie-specific words
vNegTerms <- afinn_list$word[afinn_list$score==-5 | afinn_list$score==-4]
negTerms <- c(afinn_list$word[afinn_list$score==-3 | afinn_list$score==-2 | afinn_list$score==-1], "second-rate", "moronic", "third-rate", "flawed", "juvenile", "boring", "distasteful", "ordinary", "disgusting", "senseless", "static", "brutal", "confused", "disappointing", "bloody", "silly", "tired", "predictable", "stupid", "uninteresting", "trite", "uneven", "outdated", "dreadful", "bland")
posTerms <- c(afinn_list$word[afinn_list$score==3 | afinn_list$score==2 | afinn_list$score==1], "first-rate", "insightful", "clever", "charming", "comical", "charismatic", "enjoyable", "absorbing", "sensitive", "intriguing", "powerful", "pleasant", "surprising", "thought-provoking", "imaginative", "unpretentious")
vPosTerms <- c(afinn_list$word[afinn_list$score==5 | afinn_list$score==4], "uproarious", "riveting", "fascinating", "dazzling", "legendary")


#Calculate score on each tweet
SentiResult <- as.data.frame(sentimentScore(tweets_txt, vNegTerms, negTerms, posTerms, vPosTerms))

new_lst = list()
Convert_toLST <-function(df){
  x = 1
  i=1
  while(x<ncol(df)){
    y = x+4
    new_lst[[i]]= as.list(df[x:y])
    x = y+1
    i=i+1
  }
  return(new_lst)
}
Result_lst = Convert_toLST(SentiResult)


mod_lst = list()
for(i in 1:length(Result_lst)){
  negCount = sum(as.numeric(as.character(Result_lst[[i]]$X2)),as.numeric(as.character(Result_lst[[i]]$X3)))
  posCount = sum(as.numeric(as.character(Result_lst[[i]]$X4)),as.numeric(as.character(Result_lst[[i]]$X5)))
  mod_lst[[i]] = as.list(c(sentence = as.character(Result_lst[[i]]$X1),NEG = negCount,POS = posCount))
}

results = data.frame()
for(i in 1:length(mod_lst)){
 results =rbind(results,cbind(as.character(mod_lst[[i]]$sentence[1]),mod_lst[[i]][2],mod_lst[[i]][3]))
 out =t(as.data.frame(results[,1],optional=TRUE))
  print(sum(as.numeric(unlist(results[,2]))))
  print(sum(as.numeric(unlist(results[,3]))))
}

names(results) = c("Tweets","NEG","POS")


shinyServer(function(input, output,session) {
autoInvalidate <- reactiveTimer(5000, session)
output$tweets <-renderTable({

autoInvalidate()
#sample(results)
names(out)=c("Tweets")
out
})

output$neg <-renderTable({

neg = as.data.frame(cbind(sum(as.numeric(unlist(results[,2])))))
names(neg) = c("NEG")
neg
})

output$pos <-renderTable({

pos = as.data.frame(cbind(sum(as.numeric(unlist(results[,3])))))
names(pos) = c("POS")
pos
})

})

## ui.R
library(shiny)
shinyUI(fluidPage(

titlePanel("What People are talking about @emirates"),


mainPanel(
withTags({
#table(td(tableOutput("tweets")),td(tableOutput("pos")))
table(tr(td(rowspan="2",tableOutput("tweets")),td(tableOutput("pos"))),tr(td(tableOutput("neg"))))
})#,
#tableOutput("tweets")
  )
)

)
	library(twitteR)
	library(tm)
	library(stringr)
	library(wordcloud)
	#fetch data
	load("D:/Suresh R&D/Emirates Airlines SNA/twitteR_credentials")
	registerTwitterOAuth(twitCred)
	tweets = searchTwitter("@emirates", lang="en",cainfo="D:/Suresh R&D/Emirates Airlines SNA/cacert.pem")

	#remove retweets
	#remove retweeted
	dm_tweets = list()
	z =1
	for(i in 1:length(tweets)){
	if(tweets[[i]]$isRetweet == FALSE){
	dm_tweets[z] = tweets[[i]]
	z=z+1
	}
	}
	z=0

	tweets_txt = sapply(dm_tweets,function(x) x$getText())
	#function2 for clean data
	corpus = Corpus(VectorSource(tweets_txt))
	cleanCorpus <-function(corpus) {
	corpus.tmp = tm_map(corpus,removePunctuation)
	corpus.tmp = tm_map(corpus.tmp,stripWhitespace)
	corpus.tmp = tm_map(corpus.tmp,tolower)
	corpus.tmp = tm_map(corpus.tmp,removeWords,stopwords("english"))
	#corpus.tmp = tm_map(corpus.tmp,stemDocument)
	return(corpus.tmp)
	}

	#function to clean data
	cleanTweets = function(tweets)
	{
	tweets_cl = gsub("(RT\|via)((?:\\b\\W*@\\w+)+)","",tweets)
	tweets_cl = gsub("http[^[:blank:]]+", "", tweets_cl)
	tweets_cl = gsub("@\\w+", "", tweets_cl)
	tweets_cl = gsub("[ \t]{2,}", "", tweets_cl)
	tweets_cl = gsub("^\\s+\|\\s+$", "", tweets_cl)
	tweets_cl = gsub("[[:punct:]]", " ", tweets_cl)
	tweets_cl = gsub("[^[:alnum:]]", " ", tweets_cl)
	tweets_cl <- gsub('\\d+', '', tweets_cl)
	return(tweets_cl)
	}


	#Sentiment function
	sentimentScore <- function(sentences, vNegTerms, negTerms, posTerms, vPosTerms){
	final_scores <- matrix('', 0, 5)
	scores <- lapply(sentences, function(sentence, vNegTerms, negTerms, posTerms, vPosTerms){
	initial_sentence <- sentence
	#remove unnecessary characters and split up by word
	sentence = cleanTweets(sentence)
	sentence <- tolower(sentence)
	wordList <- str_split(sentence, '\\s+')
	words <- unlist(wordList)
	#build vector with matches between sentence and each category
	vPosMatches <- match(words, vPosTerms)
	posMatches <- match(words, posTerms)
	vNegMatches <- match(words, vNegTerms)
	negMatches <- match(words, negTerms)
	#sum up number of words in each category
	vPosMatches <- sum(!is.na(vPosMatches))
	posMatches <- sum(!is.na(posMatches))
	vNegMatches <- sum(!is.na(vNegMatches))
	negMatches <- sum(!is.na(negMatches))
	score <- c(vNegMatches, negMatches, posMatches, vPosMatches)
	#add row to scores table
	newrow <- c(initial_sentence, score)
	final_scores <- rbind(final_scores, newrow)
	return(final_scores)
	}, vNegTerms, negTerms, posTerms, vPosTerms)
	return(scores)
	}
	#convert dataframe to list object
	x = list()
	data_txt <- function(data){
	for(i in 1:nrow(data)){
	x[i] = as.character(data[i,1])
	}
	return(x)
	}

	#load pos,neg words
	afinn_list <- read.delim(file='D:/Suresh R&D/sentiment words/AFINN/AFINN-111.txt', header=FALSE, stringsAsFactors=FALSE)
	names(afinn_list) <- c('word', 'score')
	afinn_list$word <- tolower(afinn_list$word)

	#categorize words as very negative to very positive and add some movie-specific words
	vNegTerms <- afinn_list$word[afinn_list$score==-5 \| afinn_list$score==-4]
	negTerms <- c(afinn_list$word[afinn_list$score==-3 \| afinn_list$score==-2 \| afinn_list$score==-1], "second-rate", "moronic", "third-rate", "flawed", "juvenile", "boring", "distasteful", "ordinary", "disgusting", "senseless", "static", "brutal", "confused", "disappointing", "bloody", "silly", "tired", "predictable", "stupid", "uninteresting", "trite", "uneven", "outdated", "dreadful", "bland")
	posTerms <- c(afinn_list$word[afinn_list$score==3 \| afinn_list$score==2 \| afinn_list$score==1], "first-rate", "insightful", "clever", "charming", "comical", "charismatic", "enjoyable", "absorbing", "sensitive", "intriguing", "powerful", "pleasant", "surprising", "thought-provoking", "imaginative", "unpretentious")
	vPosTerms <- c(afinn_list$word[afinn_list$score==5 \| afinn_list$score==4], "uproarious", "riveting", "fascinating", "dazzling", "legendary")


	#Calculate score on each tweet
	SentiResult <- as.data.frame(sentimentScore(tweets_txt, vNegTerms, negTerms, posTerms, vPosTerms))

	new_lst = list()
	Convert_toLST <-function(df){
	x = 1
	i=1
	while(x<ncol(df)){
	y = x+4
	new_lst[[i]]= as.list(df[x:y])
	x = y+1
	i=i+1
	}
	return(new_lst)
	}
	Result_lst = Convert_toLST(SentiResult)


	mod_lst = list()
	for(i in 1:length(Result_lst)){
	negCount = sum(as.numeric(as.character(Result_lst[[i]]$X2)),as.numeric(as.character(Result_lst[[i]]$X3)))
	posCount = sum(as.numeric(as.character(Result_lst[[i]]$X4)),as.numeric(as.character(Result_lst[[i]]$X5)))
	mod_lst[[i]] = as.list(c(sentence = as.character(Result_lst[[i]]$X1),NEG = negCount,POS = posCount))
	}

	results = data.frame()
	for(i in 1:length(mod_lst)){
	results =rbind(results,cbind(as.character(mod_lst[[i]]$sentence[1]),mod_lst[[i]][2],mod_lst[[i]][3]))
	out =t(as.data.frame(results[,1],optional=TRUE))
	print(sum(as.numeric(unlist(results[,2]))))
	print(sum(as.numeric(unlist(results[,3]))))
	}

	names(results) = c("Tweets","NEG","POS")


	shinyServer(function(input, output,session) {
	autoInvalidate <- reactiveTimer(5000, session)
	output$tweets <-renderTable({

	autoInvalidate()
	#sample(results)
	names(out)=c("Tweets")
	out
	})

	output$neg <-renderTable({

	neg = as.data.frame(cbind(sum(as.numeric(unlist(results[,2])))))
	names(neg) = c("NEG")
	neg
	})

	output$pos <-renderTable({

	pos = as.data.frame(cbind(sum(as.numeric(unlist(results[,3])))))
	names(pos) = c("POS")
	pos
	})

	})
	library(shiny)
	shinyUI(fluidPage(

	titlePanel("What People are talking about @emirates"),


	mainPanel(
	withTags({
	#table(td(tableOutput("tweets")),td(tableOutput("pos")))
	table(tr(td(rowspan="2",tableOutput("tweets")),td(tableOutput("pos"))),tr(td(tableOutput("neg"))))
	})#,
	#tableOutput("tweets")
	)
	)

	)