Skip to content

Instantly share code, notes, and snippets.

@ferolo3000
Last active May 29, 2017 19:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ferolo3000/adb3950829ca3c9f712dc9e1717cf828 to your computer and use it in GitHub Desktop.
Save ferolo3000/adb3950829ca3c9f712dc9e1717cf828 to your computer and use it in GitHub Desktop.
#load library
library(ROAuth)
library(twitteR)
library(ROAuth)
library(syuzhet)#sentiment analysis
library(ggplot2)
library(streamR)
library(tm)
# Parameters configuration
reqURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
options(httr_oauth_cache=T)
consumer_key <- 'xxxxxxxxxxxxxxxxxx'
consumer_secret <- 'xxxxxxxxxxxxxxxxxxx'
access_token <- 'xxxxxxxxxxxxxxxxxx'
access_secret <- 'xxxxxxxxxxxxxxxxxxx'
# twitteR authentication
setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)
# streamR authentication
credentials_file <- "my_oauth.Rdata"
if (file.exists(credentials_file)){
load(credentials_file)
} else {
cred <- OAuthFactory$new(consumerKey = consumer_key, consumerSecret = consumer_secret, requestURL = reqURL, accessURL = accessURL, authURL = authURL)
cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
save(cred, file = credentials_file)
}
# load packages
library(streamR);
# connect to Twitter stream a get messages
filterStream("tweets.json", track = c("Trump", "Putin"), timeout = 60, oauth = cred);
# parse tweets
df <- parseTweets("tweets.json", simplify = TRUE);
# compute some measures
show(paste("Number of tweets with #Trump:", length(grep("Trump", df$text, ignore.case = TRUE))));
show(paste("Number of tweets with #Putin:", length(grep("Putin", df$text, ignore.case = TRUE))))
#text cleaning
mytxt <- df$text
usableText=str_replace_all(mytxt,"[^[:graph:]]", " ")
mytxt <- iconv(mytxt, 'UTF-8', 'ASCII')
mytxt = gsub('(RT|via)((?:\\b\\W*@\\w+)+)', '', mytxt)
mytxt = gsub('@\\w+', '', mytxt)
mytxt = gsub('[[:punct:]]', '', mytxt)
mytxt = gsub('[[:digit:]]', '', mytxt)
mytxt = gsub('http\\w+', '', mytxt)
mytxt = gsub('https\\w+', '', mytxt)
mytxt = gsub('[ \t]{2,}', '', mytxt)
mytxt = gsub('^\\s+|\\s+$', '', mytxt)
mytxt = gsub('í ¼í·ºí ¼í·', '', mytxt)
#get sentiment
sent <- get_nrc_sentiment(mytxt)
tweets <- cbind(df$text, sent)
#common emotions in the tweets
sentimentTotal <- data.frame(colSums(tweets[,c(2:9)]))
names(sentimentTotal) <- "count"
sentimentTotal <- cbind("sentiment" = rownames(sentimentTotal), sentimentTotal)
rownames(sentimentTotal) <- NULL
#plot total sentiment
ggplot(data = sentimentTotal, aes(x = sentiment, y = count)) +
geom_bar(aes(fill = sentiment), stat = "identity") +
theme(legend.position = "none") +
xlab("Sentiment") + ylab("Total Count tweets") + ggtitle("Sentiment Score for All Tweets")
#What source they are using?
par(mar = c(3, 3, 3, 2))
df$source = substr(df$source,
regexpr('>', df$source) + 1,
regexpr('</a>', df$source) - 1)
dotchart(sort(table(df$source)))
mtext('Number of tweets posted by platform')
#working with the date from JSON
#this is a way to transform JSON date from Mon May 29 17:21:58 +0000 2017 to 2017-05-29 17:21:58
format.str <- "%a %b %d %H:%M:%S %z %Y"
df$date <- as.POSIXct(strptime(df[,"created_at"], format.str, tz = "GMT"), tz = "GMT")
#When they tweet?
ggplot(df,aes(x=df$date))+geom_bar(aes(y = (..count..)))
#sentiment by date
grupo <- mutate(tweets, tweet= ifelse(tweets$positive>0,"positive", ifelse(tweets$negative>0, "negative","neutral")))
by.tweet <- group_by(grupo, tweet, date)
by.tweet <- summarise(by.tweet, number=n())
ggplot(by.tweet, aes(date, number)) + geom_line(aes(group=tweet, color=tweet), size=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment