loiyumba/Airlines_emotion.R

## Airlines_emotion.R
# To extract tweets from twitter

# Load the required libraries
require(twitteR)
require(stringr)
require(syuzhet)

# Set working directory
setwd("..\\tweet_analysis\\Airlines")

# Load twitter API
options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))
reqURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
apiKey <- " " # your twitter api key
apiSecret <- " " # your twitter api secret
accessToken <- " " # your access token
accessSecret <- " " # your access secret
setup_twitter_oauth(apiKey, apiSecret, access_token = accessToken, access_secret = accessSecret)

# Extracting Tweets
# Air India
air_india <- searchTwitter("#AirIndia", n = 500, since = "2016-03-01", until = "2016-03-30")
air_india_df <- twListToDF(air_india)
head(air_india_df)
write.csv(air_india_df, "air_india.csv", row.names = FALSE) # saving tweets in local disk

# Jet Airways
jet_airways <- searchTwitter("#JetAirways", n = 500, since = "2016-03-01", until = "2016-03-30")
jet_airways_df <- twListToDF(jet_airways)
head(jet_airways_df)
write.csv(jet_airways_df, "jet_airways.csv", row.names = FALSE)

# Indigo Airlines ## Not able to retrieve enough tweets. Only 18 tweets returned
indigo <- searchTwitter("#indigoairlines", n = 500, since = "2016-03-01", until = "2016-03-31")
indigo_df <- twListToDF(indigo)
head(indigo_df)
write.csv(indigo_df, "indigo.csv", row.names = FALSE)

# Spicejet ## Returned only 165 tweets
spicejet <- searchTwitter("#SpiceJet", n = 500, since = "2016-03-01", until = "2016-03-30")
spicejet_df <- twListToDF(spicejet)
head(spicejet_df)
write.csv(spicejet_df, "spicejet.csv", row.names = FALSE)

# Reload the data
# air india
air_india <- read.csv("air_india.csv", stringsAsFactors = FALSE)
tweets <- air_india$text # extract only tweets column
# Cleaning tweets
tweets <- str_replace_all(tweets, "\\#", "") # remove #
tweets <- str_replace_all(tweets, "Retweet", "") # remove Retweet
tweets <- str_replace_all(tweets, "RT", "") # remove RT
tweets <- str_replace_all(tweets, ":", "") # remove :
tweets <- str_replace_all(tweets, "&amp", "") # remove &amp
tweets <- str_replace_all(tweets, "\n", " ") # remove \n
tweets <- str_replace_all(tweets, "http[^[:space:]]*", "") # remove URL
tweets <- str_replace_all(tweets, "@\\w+", "") # remove userid
tweets <- str_replace_all(tweets, ".*?\\w+\\+", "") # remove words with + sign
tweets <- str_replace_all(tweets, ".+\\>", "") # remove words with > sign
tweets <- str_replace_all(tweets, "  ", " ") # remove double spaces
tweets <- str_trim(tweets, side = "both") # remove spaces beginning & end of the sentence
tweets <- tweets[tweets != ""] # remove all the empty tweets
tweets <- tweets[tweets != "AirIndia"] # remove tweets with just AirIndia
tweets <- tweets[tweets != "-"] # remove tweets with "-" only

# NRC Sentiment on AirIndia
nrc_data <- get_nrc_sentiment(tweets)
# Bar plot
barplot(sort(colSums(prop.table(nrc_data[, 1:8])), decreasing = TRUE),
        main = "Twitter Emotions of #AirIndia", ylab = "Percentage",
        col = "#ff3300")

#  ------------------------------------------------------------------------

# Jet Airways
jet <- read.csv("jet_airways.csv", stringsAsFactors = FALSE)
jet_tweets <- jet$text
# cleaning tweets
jet_tweets <- str_replace_all(jet_tweets, "http[^[:space:]]*", "") # remove URL
jet_tweets <- str_replace_all(jet_tweets, "@\\w+", "") # remove userid
jet_tweets <- str_replace_all(jet_tweets, "[:punct:]", " ") # remove all the punctuation
jet_tweets <- str_replace_all(jet_tweets, ".*?\\w+\\+", "") # remove words with + sign
jet_tweets <- str_replace_all(jet_tweets, ".+\\>", "") # remove words with > sign
jet_tweets <- str_replace_all(jet_tweets, "\n", " ") # remove \n
jet_tweets <- str_replace_all(jet_tweets, "RT", "") # remove RT
jet_tweets <- str_replace_all(jet_tweets, "\\$", "") # remove $
jet_tweets <- str_replace_all(jet_tweets, "\\|", "") # remove \
jet_tweets <- str_replace_all(jet_tweets, "  ", " ") # remove double spaces
jet_tweets <- str_trim(jet_tweets, side = "both") # remove spaces beginning & end of the sentence
jet_tweets <- jet_tweets[jet_tweets != ""] # remove all the empty tweets
jet_tweets <- jet_tweets[jet_tweets != "JetAirways"]# remove tweets with just JetAirways
jet_tweets <- jet_tweets[jet_tweets != "jetairways"] # remove tweets with just jetairways
jet_tweets <- jet_tweets[jet_tweets != "Je"] # remvove tweets with just Je

# NRC Sentiment on JetAirways
nrc_jet <- get_nrc_sentiment(jet_tweets)
# Bar plot
barplot(sort(colSums(prop.table(nrc_jet[, 1:8])), decreasing = TRUE),
        main = "Twitter Emotions of #JetAirways", ylab = "Percentage",
        col = "#000033")

#  ------------------------------------------------------------------------

# SpiceJet
spice <- read.csv("spicejet.csv", stringsAsFactors = FALSE)
spice_tweets <- spice$text
# cleaning tweets
spice_tweets <- str_replace_all(spice_tweets, "http[^[:space:]]*", "") # remove URL
spice_tweets <- str_replace_all(spice_tweets, "@\\w+", "") # remove userid
spice_tweets <- str_replace_all(spice_tweets, "[:punct:]", " ") # remove all the punctuation
spice_tweets <- str_replace_all(spice_tweets, ".*?\\w+\\+", "") # remove words with + sign
spice_tweets <- str_replace_all(spice_tweets, ".+\\>", "") # remove words with > sign
spice_tweets <- str_replace_all(spice_tweets, "\n", " ") # remove \n
spice_tweets <- str_replace_all(spice_tweets, "RT", "") # remove RT
spice_tweets <- str_replace_all(spice_tweets, "  ", " ") # remove double spaces
spice_tweets <- str_trim(spice_tweets, side = "both") # remove spaces beginning & end of the sentence
spice_tweets <- spice_tweets[spice_tweets != "spicejet"]
spice_tweets <- spice_tweets[spice_tweets != ""] # remove all the empty tweets

# NRC Sentiment on SpiceJets
nrc_spice <- get_nrc_sentiment(spice_tweets)
# Bar plot
barplot(sort(colSums(prop.table(nrc_spice[, 1:8])), decreasing = TRUE),
        main = "Twitter Emotions of #SpiceJet", ylab = "Percentage",
        col = "#cc0000")
	# To extract tweets from twitter

	# Load the required libraries
	require(twitteR)
	require(stringr)
	require(syuzhet)

	# Set working directory
	setwd("..\\tweet_analysis\\Airlines")

	# Load twitter API
	options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))
	reqURL <- "https://api.twitter.com/oauth/request_token"
	accessURL <- "https://api.twitter.com/oauth/access_token"
	authURL <- "https://api.twitter.com/oauth/authorize"
	apiKey <- " " # your twitter api key
	apiSecret <- " " # your twitter api secret
	accessToken <- " " # your access token
	accessSecret <- " " # your access secret
	setup_twitter_oauth(apiKey, apiSecret, access_token = accessToken, access_secret = accessSecret)

	# Extracting Tweets
	# Air India
	air_india <- searchTwitter("#AirIndia", n = 500, since = "2016-03-01", until = "2016-03-30")
	air_india_df <- twListToDF(air_india)
	head(air_india_df)
	write.csv(air_india_df, "air_india.csv", row.names = FALSE) # saving tweets in local disk

	# Jet Airways
	jet_airways <- searchTwitter("#JetAirways", n = 500, since = "2016-03-01", until = "2016-03-30")
	jet_airways_df <- twListToDF(jet_airways)
	head(jet_airways_df)
	write.csv(jet_airways_df, "jet_airways.csv", row.names = FALSE)

	# Indigo Airlines ## Not able to retrieve enough tweets. Only 18 tweets returned
	indigo <- searchTwitter("#indigoairlines", n = 500, since = "2016-03-01", until = "2016-03-31")
	indigo_df <- twListToDF(indigo)
	head(indigo_df)
	write.csv(indigo_df, "indigo.csv", row.names = FALSE)

	# Spicejet ## Returned only 165 tweets
	spicejet <- searchTwitter("#SpiceJet", n = 500, since = "2016-03-01", until = "2016-03-30")
	spicejet_df <- twListToDF(spicejet)
	head(spicejet_df)
	write.csv(spicejet_df, "spicejet.csv", row.names = FALSE)

	# Reload the data
	# air india
	air_india <- read.csv("air_india.csv", stringsAsFactors = FALSE)
	tweets <- air_india$text # extract only tweets column
	# Cleaning tweets
	tweets <- str_replace_all(tweets, "\\#", "") # remove #
	tweets <- str_replace_all(tweets, "Retweet", "") # remove Retweet
	tweets <- str_replace_all(tweets, "RT", "") # remove RT
	tweets <- str_replace_all(tweets, ":", "") # remove :
	tweets <- str_replace_all(tweets, "&amp", "") # remove &amp
	tweets <- str_replace_all(tweets, "\n", " ") # remove \n
	tweets <- str_replace_all(tweets, "http[^[:space:]]*", "") # remove URL
	tweets <- str_replace_all(tweets, "@\\w+", "") # remove userid
	tweets <- str_replace_all(tweets, ".*?\\w+\\+", "") # remove words with + sign
	tweets <- str_replace_all(tweets, ".+\\>", "") # remove words with > sign
	tweets <- str_replace_all(tweets, " ", " ") # remove double spaces
	tweets <- str_trim(tweets, side = "both") # remove spaces beginning & end of the sentence
	tweets <- tweets[tweets != ""] # remove all the empty tweets
	tweets <- tweets[tweets != "AirIndia"] # remove tweets with just AirIndia
	tweets <- tweets[tweets != "-"] # remove tweets with "-" only

	# NRC Sentiment on AirIndia
	nrc_data <- get_nrc_sentiment(tweets)
	# Bar plot
	barplot(sort(colSums(prop.table(nrc_data[, 1:8])), decreasing = TRUE),
	main = "Twitter Emotions of #AirIndia", ylab = "Percentage",
	col = "#ff3300")

	# ------------------------------------------------------------------------

	# Jet Airways
	jet <- read.csv("jet_airways.csv", stringsAsFactors = FALSE)
	jet_tweets <- jet$text
	# cleaning tweets
	jet_tweets <- str_replace_all(jet_tweets, "http[^[:space:]]*", "") # remove URL
	jet_tweets <- str_replace_all(jet_tweets, "@\\w+", "") # remove userid
	jet_tweets <- str_replace_all(jet_tweets, "[:punct:]", " ") # remove all the punctuation
	jet_tweets <- str_replace_all(jet_tweets, ".*?\\w+\\+", "") # remove words with + sign
	jet_tweets <- str_replace_all(jet_tweets, ".+\\>", "") # remove words with > sign
	jet_tweets <- str_replace_all(jet_tweets, "\n", " ") # remove \n
	jet_tweets <- str_replace_all(jet_tweets, "RT", "") # remove RT
	jet_tweets <- str_replace_all(jet_tweets, "\\$", "") # remove $
	jet_tweets <- str_replace_all(jet_tweets, "\\\|", "") # remove \
	jet_tweets <- str_replace_all(jet_tweets, " ", " ") # remove double spaces
	jet_tweets <- str_trim(jet_tweets, side = "both") # remove spaces beginning & end of the sentence
	jet_tweets <- jet_tweets[jet_tweets != ""] # remove all the empty tweets
	jet_tweets <- jet_tweets[jet_tweets != "JetAirways"]# remove tweets with just JetAirways
	jet_tweets <- jet_tweets[jet_tweets != "jetairways"] # remove tweets with just jetairways
	jet_tweets <- jet_tweets[jet_tweets != "Je"] # remvove tweets with just Je

	# NRC Sentiment on JetAirways
	nrc_jet <- get_nrc_sentiment(jet_tweets)
	# Bar plot
	barplot(sort(colSums(prop.table(nrc_jet[, 1:8])), decreasing = TRUE),
	main = "Twitter Emotions of #JetAirways", ylab = "Percentage",
	col = "#000033")

	# ------------------------------------------------------------------------

	# SpiceJet
	spice <- read.csv("spicejet.csv", stringsAsFactors = FALSE)
	spice_tweets <- spice$text
	# cleaning tweets
	spice_tweets <- str_replace_all(spice_tweets, "http[^[:space:]]*", "") # remove URL
	spice_tweets <- str_replace_all(spice_tweets, "@\\w+", "") # remove userid
	spice_tweets <- str_replace_all(spice_tweets, "[:punct:]", " ") # remove all the punctuation
	spice_tweets <- str_replace_all(spice_tweets, ".*?\\w+\\+", "") # remove words with + sign
	spice_tweets <- str_replace_all(spice_tweets, ".+\\>", "") # remove words with > sign
	spice_tweets <- str_replace_all(spice_tweets, "\n", " ") # remove \n
	spice_tweets <- str_replace_all(spice_tweets, "RT", "") # remove RT
	spice_tweets <- str_replace_all(spice_tweets, " ", " ") # remove double spaces
	spice_tweets <- str_trim(spice_tweets, side = "both") # remove spaces beginning & end of the sentence
	spice_tweets <- spice_tweets[spice_tweets != "spicejet"]
	spice_tweets <- spice_tweets[spice_tweets != ""] # remove all the empty tweets

	# NRC Sentiment on SpiceJets
	nrc_spice <- get_nrc_sentiment(spice_tweets)
	# Bar plot
	barplot(sort(colSums(prop.table(nrc_spice[, 1:8])), decreasing = TRUE),
	main = "Twitter Emotions of #SpiceJet", ylab = "Percentage",
	col = "#cc0000")