Skip to content

Instantly share code, notes, and snippets.

@rparatodxs
Created December 17, 2017 17:14
Show Gist options
  • Save rparatodxs/ce91597d3abbd79611c7ef9cc18df262 to your computer and use it in GitHub Desktop.
Save rparatodxs/ce91597d3abbd79611c7ef9cc18df262 to your computer and use it in GitHub Desktop.
##-----ESTA ES UN REPLICA DE VARIOS EJEMPLOS DE COMO HACER UN ANALISIS DE SENTIMIENTO EN R----
#----1. conectar r con twitter
#----desde tu cuenta de twitter crea una api en el sitio de twitter: https://apps.twitter.com/
#----luego hay hacer la coneccion, para lo que hay q obtener:
library(twitteR)
library(ROAuth)
library(httr)
# Set API Keys
api_key <- "AQUIPONESLATUTADETUAPI"
api_secret <- "AQUIPONESLATUTADETUAPI"
access_token <- "AQUIPONESLATUTADETUAPI"
access_token_secret <- "AQUIPONESLATUTADETUAPI"
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)
##---diccionarios traducidos con goolge traslator...no es lo mejor, pero es lo que hay para hacerlo ahora.
#el diccionario en positivo:
#https://gist.githubusercontent.com/rparatodxs/29b2bd5a5e4fb265654fd67a83e4cdc4/raw/61424b314432781165e98ac4507bbb90c652940e/positive-words.txt
#el diccionario en negativo
#https://gist.githubusercontent.com/rparatodxs/7055224448e170761451d683511a8b2c/raw/ddd5a7c01ad43796d0828a0fdf9c2977c494cbf4/negative-words.txt
# Read in dictionary of positive and negative works
pos <- scan('positive-words.txt', what='character', comment.char=';') #folder with positive dictionary
neg <- scan('negative-words.txt', what='character', comment.char=';') #folder with negative dictionary
pos.words <- c(pos, 'bkn')
neg.words <- c(neg, 'ctm', 'hdp', 'chanta', 'mierda')
#---esta funcion cuenta las palabras positivas y negativas en los twiits
#' score.sentiment() implements a very simple algorithm to estimate
#' sentiment, assigning a integer score by subtracting the number
#' of occurrences of negative words from that of positive words.
#'
#' @param sentences vector of text to score
#' @param pos.words vector of words of postive sentiment
#' @param neg.words vector of words of negative sentiment
#' @param .progress passed to <code>laply()</code> to control of progress bar.
#' @returnType data.frame
#' @return data.frame of text and corresponding sentiment scores
#' @author Jefrey Breen <jbreen@cambridge.aero>
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores = laply(sentences, function(sentence, pos.words, neg.words) {
# clean up sentences with R's regex-driven global substitute, gsub():
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
# and convert to lower case:
sentence = tolower(sentence)
# split into words. str_split is in the stringr package
word.list = str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words = unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
pos.matches = match(words, pos.words)
neg.matches = match(words, neg.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score = sum(pos.matches) - sum(neg.matches)
return(score)
}, pos.words, neg.words, .progress=.progress )
scores.df = data.frame(score=scores, text=sentences)
return(scores.df)
}
#
# Grab latest tweets hacia PIÑERA
tweets_sanders <- searchTwitter("@sebastianpinera", n=3500, since="2017-12-16",
until = "2017-12-18")
# Loop over tweets and extract text
library(plyr)
feed_sanders = laply(tweets_sanders, function(t) t$getText())
# Call the function and return a data frame
feelthabern <- score.sentiment(feed_sanders, pos.words, neg.words, .progress='text')
# Cut the text, just gets in the way
plotdat <- feelthabern[c("text", "score")]
# Remove neutral values of 0
#plotdat <- plotdat[!plotdat$score == 0, ]
hist(plotdat$score, breaks = 10)
length(plotdat$score)
mean0<-as.character(mean(plotdat$score))
# Grab latest tweets hacia GUILLIER
tweets_sanders1 <- searchTwitter("@guillier", n=3500,since="2017-12-16",
until = "2017-12-18")
# Loop over tweets and extract text
library(plyr)
feed_sanders1 = laply(tweets_sanders1, function(t) t$getText())
# Call the function and return a data frame
feelthabern1 <- score.sentiment(feed_sanders1, pos.words, neg.words, .progress='text')
# Cut the text, just gets in the way
plotdat1 <- feelthabern1[c("text", "score")]
# Remove neutral values of 0
#plotdat <- plotdat[!plotdat$score == 0, ]
hist(plotdat1$score, breaks = 10)
length(plotdat1$score)
options(digits=2)
mean1<-round(mean(plotdat1$score),2)
mean0<-round(mean(plotdat$score),2)
###----un grafico rápido para comparar y con la hora.
#---despues se puede hacer la evolución si hace el monitoreo cada una hora
par(mfrow = c(2, 1))
hist(plotdat1$score, breaks = 10, col="red",
main="Sentimiento a @guillier", sub=Sys.time())
text(-4,600,mean1 )
hist(plotdat$score, breaks = 10, col="blue",
main="Sentimiento a @sebastianpinera", sub=Sys.time())
text(-4,600,mean0)
#------ideas alternativas son bienvenidad
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment