ferolo3000/Twitter whit R

## Twitter whit R
#load library
library(ROAuth)
library(twitteR)
library(ROAuth)
library(syuzhet)#sentiment analysis
library(ggplot2)
library(streamR)
library(tm)

# Parameters configuration
reqURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"

options(httr_oauth_cache=T)

consumer_key <- 'xxxxxxxxxxxxxxxxxx'
consumer_secret <- 'xxxxxxxxxxxxxxxxxxx'
access_token <- 'xxxxxxxxxxxxxxxxxx'
access_secret <- 'xxxxxxxxxxxxxxxxxxx'

# twitteR authentication
setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)


# streamR authentication
credentials_file <- "my_oauth.Rdata"

if (file.exists(credentials_file)){
    load(credentials_file)
} else {
	cred <- OAuthFactory$new(consumerKey = consumer_key, consumerSecret = consumer_secret, requestURL = reqURL, accessURL = accessURL, authURL = authURL)
	cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
	save(cred, file = credentials_file)
}

# load packages
library(streamR);

# connect to Twitter stream a get messages
filterStream("tweets.json", track = c("Trump", "Putin"), timeout = 60, oauth = cred);

# parse tweets
df <- parseTweets("tweets.json", simplify = TRUE);

# compute some measures
show(paste("Number of tweets with #Trump:", length(grep("Trump", df$text, ignore.case = TRUE))));
show(paste("Number of tweets with #Putin:", length(grep("Putin", df$text, ignore.case = TRUE))))

#text cleaning
mytxt <- df$text
usableText=str_replace_all(mytxt,"[^[:graph:]]", " ")
mytxt <- iconv(mytxt, 'UTF-8', 'ASCII')
mytxt = gsub('(RT|via)((?:\\b\\W*@\\w+)+)', '', mytxt)
mytxt = gsub('@\\w+', '', mytxt)
mytxt = gsub('[[:punct:]]', '', mytxt)
mytxt = gsub('[[:digit:]]', '', mytxt)
mytxt = gsub('http\\w+', '', mytxt)
mytxt = gsub('https\\w+', '', mytxt)
mytxt = gsub('[ \t]{2,}', '', mytxt)
mytxt = gsub('^\\s+|\\s+$', '', mytxt)
mytxt = gsub('í ¼í·ºí ¼í·', '', mytxt)

#get sentiment
sent <- get_nrc_sentiment(mytxt)
tweets <- cbind(df$text, sent)

#common emotions in the tweets
sentimentTotal <- data.frame(colSums(tweets[,c(2:9)]))
names(sentimentTotal) <- "count"
sentimentTotal <- cbind("sentiment" = rownames(sentimentTotal), sentimentTotal)
rownames(sentimentTotal) <- NULL

#plot total sentiment
ggplot(data = sentimentTotal, aes(x = sentiment, y = count)) +
  geom_bar(aes(fill = sentiment), stat = "identity") +
  theme(legend.position = "none") +
  xlab("Sentiment") + ylab("Total Count tweets") + ggtitle("Sentiment Score for All Tweets")

#What source they are using?
par(mar = c(3, 3, 3, 2))
df$source = substr(df$source,
                        regexpr('>', df$source) + 1,
                        regexpr('</a>', df$source) - 1)
dotchart(sort(table(df$source)))
mtext('Number of tweets posted by platform')

#working with the date from JSON
#this is a way to transform JSON date from Mon May 29 17:21:58 +0000 2017 to 2017-05-29 17:21:58

format.str <- "%a %b %d %H:%M:%S %z %Y"
df$date <- as.POSIXct(strptime(df[,"created_at"], format.str, tz = "GMT"), tz = "GMT")

#When they tweet?
ggplot(df,aes(x=df$date))+geom_bar(aes(y = (..count..)))


#sentiment by date
grupo <- mutate(tweets, tweet= ifelse(tweets$positive>0,"positive", ifelse(tweets$negative>0, "negative","neutral")))

by.tweet <- group_by(grupo, tweet, date)
by.tweet <- summarise(by.tweet, number=n())

ggplot(by.tweet, aes(date, number)) + geom_line(aes(group=tweet, color=tweet), size=2)
	#load library
	library(ROAuth)
	library(twitteR)
	library(ROAuth)
	library(syuzhet)#sentiment analysis
	library(ggplot2)
	library(streamR)
	library(tm)

	# Parameters configuration
	reqURL <- "https://api.twitter.com/oauth/request_token"
	accessURL <- "https://api.twitter.com/oauth/access_token"
	authURL <- "https://api.twitter.com/oauth/authorize"

	options(httr_oauth_cache=T)

	consumer_key <- 'xxxxxxxxxxxxxxxxxx'
	consumer_secret <- 'xxxxxxxxxxxxxxxxxxx'
	access_token <- 'xxxxxxxxxxxxxxxxxx'
	access_secret <- 'xxxxxxxxxxxxxxxxxxx'

	# twitteR authentication
	setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)


	# streamR authentication
	credentials_file <- "my_oauth.Rdata"

	if (file.exists(credentials_file)){
	load(credentials_file)
	} else {
	cred <- OAuthFactory$new(consumerKey = consumer_key, consumerSecret = consumer_secret, requestURL = reqURL, accessURL = accessURL, authURL = authURL)
	cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
	save(cred, file = credentials_file)
	}

	# load packages
	library(streamR);

	# connect to Twitter stream a get messages
	filterStream("tweets.json", track = c("Trump", "Putin"), timeout = 60, oauth = cred);

	# parse tweets
	df <- parseTweets("tweets.json", simplify = TRUE);

	# compute some measures
	show(paste("Number of tweets with #Trump:", length(grep("Trump", df$text, ignore.case = TRUE))));
	show(paste("Number of tweets with #Putin:", length(grep("Putin", df$text, ignore.case = TRUE))))

	#text cleaning
	mytxt <- df$text
	usableText=str_replace_all(mytxt,"[^[:graph:]]", " ")
	mytxt <- iconv(mytxt, 'UTF-8', 'ASCII')
	mytxt = gsub('(RT\|via)((?:\\b\\W*@\\w+)+)', '', mytxt)
	mytxt = gsub('@\\w+', '', mytxt)
	mytxt = gsub('[[:punct:]]', '', mytxt)
	mytxt = gsub('[[:digit:]]', '', mytxt)
	mytxt = gsub('http\\w+', '', mytxt)
	mytxt = gsub('https\\w+', '', mytxt)
	mytxt = gsub('[ \t]{2,}', '', mytxt)
	mytxt = gsub('^\\s+\|\\s+$', '', mytxt)
	mytxt = gsub('í ¼í·ºí ¼í·', '', mytxt)

	#get sentiment
	sent <- get_nrc_sentiment(mytxt)
	tweets <- cbind(df$text, sent)

	#common emotions in the tweets
	sentimentTotal <- data.frame(colSums(tweets[,c(2:9)]))
	names(sentimentTotal) <- "count"
	sentimentTotal <- cbind("sentiment" = rownames(sentimentTotal), sentimentTotal)
	rownames(sentimentTotal) <- NULL

	#plot total sentiment
	ggplot(data = sentimentTotal, aes(x = sentiment, y = count)) +
	geom_bar(aes(fill = sentiment), stat = "identity") +
	theme(legend.position = "none") +
	xlab("Sentiment") + ylab("Total Count tweets") + ggtitle("Sentiment Score for All Tweets")

	#What source they are using?
	par(mar = c(3, 3, 3, 2))
	df$source = substr(df$source,
	regexpr('>', df$source) + 1,
	regexpr('</a>', df$source) - 1)
	dotchart(sort(table(df$source)))
	mtext('Number of tweets posted by platform')

	#working with the date from JSON
	#this is a way to transform JSON date from Mon May 29 17:21:58 +0000 2017 to 2017-05-29 17:21:58

	format.str <- "%a %b %d %H:%M:%S %z %Y"
	df$date <- as.POSIXct(strptime(df[,"created_at"], format.str, tz = "GMT"), tz = "GMT")

	#When they tweet?
	ggplot(df,aes(x=df$date))+geom_bar(aes(y = (..count..)))


	#sentiment by date
	grupo <- mutate(tweets, tweet= ifelse(tweets$positive>0,"positive", ifelse(tweets$negative>0, "negative","neutral")))

	by.tweet <- group_by(grupo, tweet, date)
	by.tweet <- summarise(by.tweet, number=n())

	ggplot(by.tweet, aes(date, number)) + geom_line(aes(group=tweet, color=tweet), size=2)