rparatodxs/sentimientoChile.R

## sentimientoChile.R
##-----ESTA ES UN REPLICA DE VARIOS EJEMPLOS DE COMO HACER UN ANALISIS DE SENTIMIENTO EN R----

#----1. conectar r con twitter
#----desde tu cuenta de twitter crea una api en el sitio de twitter: https://apps.twitter.com/
#----luego hay hacer la coneccion, para lo que hay q obtener:

library(twitteR)
library(ROAuth)
library(httr)

# Set API Keys
api_key <- "AQUIPONESLATUTADETUAPI"
api_secret <- "AQUIPONESLATUTADETUAPI"
access_token <- "AQUIPONESLATUTADETUAPI"
access_token_secret <- "AQUIPONESLATUTADETUAPI"
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)


##---diccionarios traducidos con goolge traslator...no es lo mejor, pero es lo que hay para hacerlo ahora.
#el diccionario en positivo:
#https://gist.githubusercontent.com/rparatodxs/29b2bd5a5e4fb265654fd67a83e4cdc4/raw/61424b314432781165e98ac4507bbb90c652940e/positive-words.txt

#el diccionario en negativo
#https://gist.githubusercontent.com/rparatodxs/7055224448e170761451d683511a8b2c/raw/ddd5a7c01ad43796d0828a0fdf9c2977c494cbf4/negative-words.txt


# Read in dictionary of positive and negative works

pos <- scan('positive-words.txt', what='character', comment.char=';') #folder with positive dictionary
neg <- scan('negative-words.txt', what='character', comment.char=';') #folder with negative dictionary
pos.words <- c(pos, 'bkn')
neg.words <- c(neg, 'ctm', 'hdp', 'chanta', 'mierda')


#---esta funcion cuenta las palabras positivas y negativas en los twiits

#' score.sentiment() implements a very simple algorithm to estimate
#' sentiment, assigning a integer score by subtracting the number
#' of occurrences of negative words from that of positive words.
#'
#' @param sentences vector of text to score
#' @param pos.words vector of words of postive sentiment
#' @param neg.words vector of words of negative sentiment
#' @param .progress passed to <code>laply()</code> to control of progress bar.
#' @returnType data.frame
#' @return data.frame of text and corresponding sentiment scores
#' @author Jefrey Breen <jbreen@cambridge.aero>
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
  require(plyr)
  require(stringr)

  # we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
  # we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
  scores = laply(sentences, function(sentence, pos.words, neg.words) {

    # clean up sentences with R's regex-driven global substitute, gsub():
    sentence = gsub('[[:punct:]]', '', sentence)
    sentence = gsub('[[:cntrl:]]', '', sentence)
    sentence = gsub('\\d+', '', sentence)
    # and convert to lower case:
    sentence = tolower(sentence)

    # split into words. str_split is in the stringr package
    word.list = str_split(sentence, '\\s+')
    # sometimes a list() is one level of hierarchy too much
    words = unlist(word.list)

    # compare our words to the dictionaries of positive & negative terms
    pos.matches = match(words, pos.words)
    neg.matches = match(words, neg.words)

    # match() returns the position of the matched term or NA
    # we just want a TRUE/FALSE:
    pos.matches = !is.na(pos.matches)
    neg.matches = !is.na(neg.matches)

    # and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
    score = sum(pos.matches) - sum(neg.matches)

    return(score)
  }, pos.words, neg.words, .progress=.progress )

  scores.df = data.frame(score=scores, text=sentences)
  return(scores.df)
}


#
# Grab latest tweets hacia PIÑERA
tweets_sanders <- searchTwitter("@sebastianpinera", n=3500, since="2017-12-16",
                                until = "2017-12-18")

# Loop over tweets and extract text
library(plyr)
feed_sanders = laply(tweets_sanders, function(t) t$getText())

# Call the function and return a data frame
feelthabern <- score.sentiment(feed_sanders, pos.words, neg.words, .progress='text')
# Cut the text, just gets in the way
plotdat <- feelthabern[c("text", "score")]
# Remove neutral values of 0
#plotdat <- plotdat[!plotdat$score == 0, ]

hist(plotdat$score, breaks = 10)
length(plotdat$score)
mean0<-as.character(mean(plotdat$score))


# Grab latest tweets hacia GUILLIER
tweets_sanders1 <- searchTwitter("@guillier", n=3500,since="2017-12-16",
                                 until = "2017-12-18")

# Loop over tweets and extract text
library(plyr)
feed_sanders1 = laply(tweets_sanders1, function(t) t$getText())

# Call the function and return a data frame
feelthabern1 <- score.sentiment(feed_sanders1, pos.words, neg.words, .progress='text')
# Cut the text, just gets in the way
plotdat1 <- feelthabern1[c("text", "score")]
# Remove neutral values of 0
#plotdat <- plotdat[!plotdat$score == 0, ]

hist(plotdat1$score, breaks = 10)
length(plotdat1$score)

options(digits=2)
mean1<-round(mean(plotdat1$score),2)
mean0<-round(mean(plotdat$score),2)


###----un grafico rápido para comparar y con la hora.
#---despues se puede hacer la evolución si hace el monitoreo cada una hora
par(mfrow = c(2, 1))
hist(plotdat1$score, breaks = 10, col="red",
     main="Sentimiento a @guillier", sub=Sys.time())
text(-4,600,mean1 )

hist(plotdat$score, breaks = 10, col="blue",
     main="Sentimiento a @sebastianpinera", sub=Sys.time())
text(-4,600,mean0)


#------ideas alternativas son bienvenidad
	##-----ESTA ES UN REPLICA DE VARIOS EJEMPLOS DE COMO HACER UN ANALISIS DE SENTIMIENTO EN R----

	#----1. conectar r con twitter
	#----desde tu cuenta de twitter crea una api en el sitio de twitter: https://apps.twitter.com/
	#----luego hay hacer la coneccion, para lo que hay q obtener:

	library(twitteR)
	library(ROAuth)
	library(httr)

	# Set API Keys
	api_key <- "AQUIPONESLATUTADETUAPI"
	api_secret <- "AQUIPONESLATUTADETUAPI"
	access_token <- "AQUIPONESLATUTADETUAPI"
	access_token_secret <- "AQUIPONESLATUTADETUAPI"
	setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)



	##---diccionarios traducidos con goolge traslator...no es lo mejor, pero es lo que hay para hacerlo ahora.
	#el diccionario en positivo:
	#https://gist.githubusercontent.com/rparatodxs/29b2bd5a5e4fb265654fd67a83e4cdc4/raw/61424b314432781165e98ac4507bbb90c652940e/positive-words.txt

	#el diccionario en negativo
	#https://gist.githubusercontent.com/rparatodxs/7055224448e170761451d683511a8b2c/raw/ddd5a7c01ad43796d0828a0fdf9c2977c494cbf4/negative-words.txt



	# Read in dictionary of positive and negative works

	pos <- scan('positive-words.txt', what='character', comment.char=';') #folder with positive dictionary
	neg <- scan('negative-words.txt', what='character', comment.char=';') #folder with negative dictionary
	pos.words <- c(pos, 'bkn')
	neg.words <- c(neg, 'ctm', 'hdp', 'chanta', 'mierda')


	#---esta funcion cuenta las palabras positivas y negativas en los twiits

	#' score.sentiment() implements a very simple algorithm to estimate
	#' sentiment, assigning a integer score by subtracting the number
	#' of occurrences of negative words from that of positive words.
	#'
	#' @param sentences vector of text to score
	#' @param pos.words vector of words of postive sentiment
	#' @param neg.words vector of words of negative sentiment
	#' @param .progress passed to <code>laply()</code> to control of progress bar.
	#' @returnType data.frame
	#' @return data.frame of text and corresponding sentiment scores
	#' @author Jefrey Breen <jbreen@cambridge.aero>
	score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
	{
	require(plyr)
	require(stringr)

	# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
	# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
	scores = laply(sentences, function(sentence, pos.words, neg.words) {

	# clean up sentences with R's regex-driven global substitute, gsub():
	sentence = gsub('[[:punct:]]', '', sentence)
	sentence = gsub('[[:cntrl:]]', '', sentence)
	sentence = gsub('\\d+', '', sentence)
	# and convert to lower case:
	sentence = tolower(sentence)

	# split into words. str_split is in the stringr package
	word.list = str_split(sentence, '\\s+')
	# sometimes a list() is one level of hierarchy too much
	words = unlist(word.list)

	# compare our words to the dictionaries of positive & negative terms
	pos.matches = match(words, pos.words)
	neg.matches = match(words, neg.words)

	# match() returns the position of the matched term or NA
	# we just want a TRUE/FALSE:
	pos.matches = !is.na(pos.matches)
	neg.matches = !is.na(neg.matches)

	# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
	score = sum(pos.matches) - sum(neg.matches)

	return(score)
	}, pos.words, neg.words, .progress=.progress )

	scores.df = data.frame(score=scores, text=sentences)
	return(scores.df)
	}




	#
	# Grab latest tweets hacia PIÑERA
	tweets_sanders <- searchTwitter("@sebastianpinera", n=3500, since="2017-12-16",
	until = "2017-12-18")

	# Loop over tweets and extract text
	library(plyr)
	feed_sanders = laply(tweets_sanders, function(t) t$getText())

	# Call the function and return a data frame
	feelthabern <- score.sentiment(feed_sanders, pos.words, neg.words, .progress='text')
	# Cut the text, just gets in the way
	plotdat <- feelthabern[c("text", "score")]
	# Remove neutral values of 0
	#plotdat <- plotdat[!plotdat$score == 0, ]

	hist(plotdat$score, breaks = 10)
	length(plotdat$score)
	mean0<-as.character(mean(plotdat$score))





	# Grab latest tweets hacia GUILLIER
	tweets_sanders1 <- searchTwitter("@guillier", n=3500,since="2017-12-16",
	until = "2017-12-18")

	# Loop over tweets and extract text
	library(plyr)
	feed_sanders1 = laply(tweets_sanders1, function(t) t$getText())

	# Call the function and return a data frame
	feelthabern1 <- score.sentiment(feed_sanders1, pos.words, neg.words, .progress='text')
	# Cut the text, just gets in the way
	plotdat1 <- feelthabern1[c("text", "score")]
	# Remove neutral values of 0
	#plotdat <- plotdat[!plotdat$score == 0, ]

	hist(plotdat1$score, breaks = 10)
	length(plotdat1$score)

	options(digits=2)
	mean1<-round(mean(plotdat1$score),2)
	mean0<-round(mean(plotdat$score),2)



	###----un grafico rápido para comparar y con la hora.
	#---despues se puede hacer la evolución si hace el monitoreo cada una hora
	par(mfrow = c(2, 1))
	hist(plotdat1$score, breaks = 10, col="red",
	main="Sentimiento a @guillier", sub=Sys.time())
	text(-4,600,mean1 )

	hist(plotdat$score, breaks = 10, col="blue",
	main="Sentimiento a @sebastianpinera", sub=Sys.time())
	text(-4,600,mean0)


	#------ideas alternativas son bienvenidad