Last active
May 29, 2017 19:55
-
-
Save ferolo3000/adb3950829ca3c9f712dc9e1717cf828 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load library | |
library(ROAuth) | |
library(twitteR) | |
library(ROAuth) | |
library(syuzhet)#sentiment analysis | |
library(ggplot2) | |
library(streamR) | |
library(tm) | |
# Parameters configuration | |
reqURL <- "https://api.twitter.com/oauth/request_token" | |
accessURL <- "https://api.twitter.com/oauth/access_token" | |
authURL <- "https://api.twitter.com/oauth/authorize" | |
options(httr_oauth_cache=T) | |
consumer_key <- 'xxxxxxxxxxxxxxxxxx' | |
consumer_secret <- 'xxxxxxxxxxxxxxxxxxx' | |
access_token <- 'xxxxxxxxxxxxxxxxxx' | |
access_secret <- 'xxxxxxxxxxxxxxxxxxx' | |
# twitteR authentication | |
setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret) | |
# streamR authentication | |
credentials_file <- "my_oauth.Rdata" | |
if (file.exists(credentials_file)){ | |
load(credentials_file) | |
} else { | |
cred <- OAuthFactory$new(consumerKey = consumer_key, consumerSecret = consumer_secret, requestURL = reqURL, accessURL = accessURL, authURL = authURL) | |
cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")) | |
save(cred, file = credentials_file) | |
} | |
# load packages | |
library(streamR); | |
# connect to Twitter stream a get messages | |
filterStream("tweets.json", track = c("Trump", "Putin"), timeout = 60, oauth = cred); | |
# parse tweets | |
df <- parseTweets("tweets.json", simplify = TRUE); | |
# compute some measures | |
show(paste("Number of tweets with #Trump:", length(grep("Trump", df$text, ignore.case = TRUE)))); | |
show(paste("Number of tweets with #Putin:", length(grep("Putin", df$text, ignore.case = TRUE)))) | |
#text cleaning | |
mytxt <- df$text | |
usableText=str_replace_all(mytxt,"[^[:graph:]]", " ") | |
mytxt <- iconv(mytxt, 'UTF-8', 'ASCII') | |
mytxt = gsub('(RT|via)((?:\\b\\W*@\\w+)+)', '', mytxt) | |
mytxt = gsub('@\\w+', '', mytxt) | |
mytxt = gsub('[[:punct:]]', '', mytxt) | |
mytxt = gsub('[[:digit:]]', '', mytxt) | |
mytxt = gsub('http\\w+', '', mytxt) | |
mytxt = gsub('https\\w+', '', mytxt) | |
mytxt = gsub('[ \t]{2,}', '', mytxt) | |
mytxt = gsub('^\\s+|\\s+$', '', mytxt) | |
mytxt = gsub('í ¼í·ºí ¼í·', '', mytxt) | |
#get sentiment | |
sent <- get_nrc_sentiment(mytxt) | |
tweets <- cbind(df$text, sent) | |
#common emotions in the tweets | |
sentimentTotal <- data.frame(colSums(tweets[,c(2:9)])) | |
names(sentimentTotal) <- "count" | |
sentimentTotal <- cbind("sentiment" = rownames(sentimentTotal), sentimentTotal) | |
rownames(sentimentTotal) <- NULL | |
#plot total sentiment | |
ggplot(data = sentimentTotal, aes(x = sentiment, y = count)) + | |
geom_bar(aes(fill = sentiment), stat = "identity") + | |
theme(legend.position = "none") + | |
xlab("Sentiment") + ylab("Total Count tweets") + ggtitle("Sentiment Score for All Tweets") | |
#What source they are using? | |
par(mar = c(3, 3, 3, 2)) | |
df$source = substr(df$source, | |
regexpr('>', df$source) + 1, | |
regexpr('</a>', df$source) - 1) | |
dotchart(sort(table(df$source))) | |
mtext('Number of tweets posted by platform') | |
#working with the date from JSON | |
#this is a way to transform JSON date from Mon May 29 17:21:58 +0000 2017 to 2017-05-29 17:21:58 | |
format.str <- "%a %b %d %H:%M:%S %z %Y" | |
df$date <- as.POSIXct(strptime(df[,"created_at"], format.str, tz = "GMT"), tz = "GMT") | |
#When they tweet? | |
ggplot(df,aes(x=df$date))+geom_bar(aes(y = (..count..))) | |
#sentiment by date | |
grupo <- mutate(tweets, tweet= ifelse(tweets$positive>0,"positive", ifelse(tweets$negative>0, "negative","neutral"))) | |
by.tweet <- group_by(grupo, tweet, date) | |
by.tweet <- summarise(by.tweet, number=n()) | |
ggplot(by.tweet, aes(date, number)) + geom_line(aes(group=tweet, color=tweet), size=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment