Created
December 17, 2017 17:14
-
-
Save rparatodxs/ce91597d3abbd79611c7ef9cc18df262 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##-----ESTA ES UN REPLICA DE VARIOS EJEMPLOS DE COMO HACER UN ANALISIS DE SENTIMIENTO EN R---- | |
#----1. conectar r con twitter | |
#----desde tu cuenta de twitter crea una api en el sitio de twitter: https://apps.twitter.com/ | |
#----luego hay hacer la coneccion, para lo que hay q obtener: | |
library(twitteR) | |
library(ROAuth) | |
library(httr) | |
# Set API Keys | |
api_key <- "AQUIPONESLATUTADETUAPI" | |
api_secret <- "AQUIPONESLATUTADETUAPI" | |
access_token <- "AQUIPONESLATUTADETUAPI" | |
access_token_secret <- "AQUIPONESLATUTADETUAPI" | |
setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret) | |
##---diccionarios traducidos con goolge traslator...no es lo mejor, pero es lo que hay para hacerlo ahora. | |
#el diccionario en positivo: | |
#https://gist.githubusercontent.com/rparatodxs/29b2bd5a5e4fb265654fd67a83e4cdc4/raw/61424b314432781165e98ac4507bbb90c652940e/positive-words.txt | |
#el diccionario en negativo | |
#https://gist.githubusercontent.com/rparatodxs/7055224448e170761451d683511a8b2c/raw/ddd5a7c01ad43796d0828a0fdf9c2977c494cbf4/negative-words.txt | |
# Read in dictionary of positive and negative works | |
pos <- scan('positive-words.txt', what='character', comment.char=';') #folder with positive dictionary | |
neg <- scan('negative-words.txt', what='character', comment.char=';') #folder with negative dictionary | |
pos.words <- c(pos, 'bkn') | |
neg.words <- c(neg, 'ctm', 'hdp', 'chanta', 'mierda') | |
#---esta funcion cuenta las palabras positivas y negativas en los twiits | |
#' score.sentiment() implements a very simple algorithm to estimate | |
#' sentiment, assigning a integer score by subtracting the number | |
#' of occurrences of negative words from that of positive words. | |
#' | |
#' @param sentences vector of text to score | |
#' @param pos.words vector of words of postive sentiment | |
#' @param neg.words vector of words of negative sentiment | |
#' @param .progress passed to <code>laply()</code> to control of progress bar. | |
#' @returnType data.frame | |
#' @return data.frame of text and corresponding sentiment scores | |
#' @author Jefrey Breen <jbreen@cambridge.aero> | |
score.sentiment = function(sentences, pos.words, neg.words, .progress='none') | |
{ | |
require(plyr) | |
require(stringr) | |
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us | |
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply: | |
scores = laply(sentences, function(sentence, pos.words, neg.words) { | |
# clean up sentences with R's regex-driven global substitute, gsub(): | |
sentence = gsub('[[:punct:]]', '', sentence) | |
sentence = gsub('[[:cntrl:]]', '', sentence) | |
sentence = gsub('\\d+', '', sentence) | |
# and convert to lower case: | |
sentence = tolower(sentence) | |
# split into words. str_split is in the stringr package | |
word.list = str_split(sentence, '\\s+') | |
# sometimes a list() is one level of hierarchy too much | |
words = unlist(word.list) | |
# compare our words to the dictionaries of positive & negative terms | |
pos.matches = match(words, pos.words) | |
neg.matches = match(words, neg.words) | |
# match() returns the position of the matched term or NA | |
# we just want a TRUE/FALSE: | |
pos.matches = !is.na(pos.matches) | |
neg.matches = !is.na(neg.matches) | |
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum(): | |
score = sum(pos.matches) - sum(neg.matches) | |
return(score) | |
}, pos.words, neg.words, .progress=.progress ) | |
scores.df = data.frame(score=scores, text=sentences) | |
return(scores.df) | |
} | |
# | |
# Grab latest tweets hacia PIÑERA | |
tweets_sanders <- searchTwitter("@sebastianpinera", n=3500, since="2017-12-16", | |
until = "2017-12-18") | |
# Loop over tweets and extract text | |
library(plyr) | |
feed_sanders = laply(tweets_sanders, function(t) t$getText()) | |
# Call the function and return a data frame | |
feelthabern <- score.sentiment(feed_sanders, pos.words, neg.words, .progress='text') | |
# Cut the text, just gets in the way | |
plotdat <- feelthabern[c("text", "score")] | |
# Remove neutral values of 0 | |
#plotdat <- plotdat[!plotdat$score == 0, ] | |
hist(plotdat$score, breaks = 10) | |
length(plotdat$score) | |
mean0<-as.character(mean(plotdat$score)) | |
# Grab latest tweets hacia GUILLIER | |
tweets_sanders1 <- searchTwitter("@guillier", n=3500,since="2017-12-16", | |
until = "2017-12-18") | |
# Loop over tweets and extract text | |
library(plyr) | |
feed_sanders1 = laply(tweets_sanders1, function(t) t$getText()) | |
# Call the function and return a data frame | |
feelthabern1 <- score.sentiment(feed_sanders1, pos.words, neg.words, .progress='text') | |
# Cut the text, just gets in the way | |
plotdat1 <- feelthabern1[c("text", "score")] | |
# Remove neutral values of 0 | |
#plotdat <- plotdat[!plotdat$score == 0, ] | |
hist(plotdat1$score, breaks = 10) | |
length(plotdat1$score) | |
options(digits=2) | |
mean1<-round(mean(plotdat1$score),2) | |
mean0<-round(mean(plotdat$score),2) | |
###----un grafico rápido para comparar y con la hora. | |
#---despues se puede hacer la evolución si hace el monitoreo cada una hora | |
par(mfrow = c(2, 1)) | |
hist(plotdat1$score, breaks = 10, col="red", | |
main="Sentimiento a @guillier", sub=Sys.time()) | |
text(-4,600,mean1 ) | |
hist(plotdat$score, breaks = 10, col="blue", | |
main="Sentimiento a @sebastianpinera", sub=Sys.time()) | |
text(-4,600,mean0) | |
#------ideas alternativas son bienvenidad | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment