Skip to content

Instantly share code, notes, and snippets.

@loiyumba
Created April 1, 2016 15:49
Show Gist options
  • Save loiyumba/605ba5bd44d65b904c93d83d37d7e57f to your computer and use it in GitHub Desktop.
Save loiyumba/605ba5bd44d65b904c93d83d37d7e57f to your computer and use it in GitHub Desktop.
Twitter emotions on major airlines in India
# To extract tweets from twitter
# Load the required libraries
require(twitteR)
require(stringr)
require(syuzhet)
# Set working directory
setwd("..\\tweet_analysis\\Airlines")
# Load twitter API
options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))
reqURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
apiKey <- " " # your twitter api key
apiSecret <- " " # your twitter api secret
accessToken <- " " # your access token
accessSecret <- " " # your access secret
setup_twitter_oauth(apiKey, apiSecret, access_token = accessToken, access_secret = accessSecret)
# Extracting Tweets
# Air India
air_india <- searchTwitter("#AirIndia", n = 500, since = "2016-03-01", until = "2016-03-30")
air_india_df <- twListToDF(air_india)
head(air_india_df)
write.csv(air_india_df, "air_india.csv", row.names = FALSE) # saving tweets in local disk
# Jet Airways
jet_airways <- searchTwitter("#JetAirways", n = 500, since = "2016-03-01", until = "2016-03-30")
jet_airways_df <- twListToDF(jet_airways)
head(jet_airways_df)
write.csv(jet_airways_df, "jet_airways.csv", row.names = FALSE)
# Indigo Airlines ## Not able to retrieve enough tweets. Only 18 tweets returned
indigo <- searchTwitter("#indigoairlines", n = 500, since = "2016-03-01", until = "2016-03-31")
indigo_df <- twListToDF(indigo)
head(indigo_df)
write.csv(indigo_df, "indigo.csv", row.names = FALSE)
# Spicejet ## Returned only 165 tweets
spicejet <- searchTwitter("#SpiceJet", n = 500, since = "2016-03-01", until = "2016-03-30")
spicejet_df <- twListToDF(spicejet)
head(spicejet_df)
write.csv(spicejet_df, "spicejet.csv", row.names = FALSE)
# Reload the data
# air india
air_india <- read.csv("air_india.csv", stringsAsFactors = FALSE)
tweets <- air_india$text # extract only tweets column
# Cleaning tweets
tweets <- str_replace_all(tweets, "\\#", "") # remove #
tweets <- str_replace_all(tweets, "Retweet", "") # remove Retweet
tweets <- str_replace_all(tweets, "RT", "") # remove RT
tweets <- str_replace_all(tweets, ":", "") # remove :
tweets <- str_replace_all(tweets, "&amp", "") # remove &amp
tweets <- str_replace_all(tweets, "\n", " ") # remove \n
tweets <- str_replace_all(tweets, "http[^[:space:]]*", "") # remove URL
tweets <- str_replace_all(tweets, "@\\w+", "") # remove userid
tweets <- str_replace_all(tweets, ".*?\\w+\\+", "") # remove words with + sign
tweets <- str_replace_all(tweets, ".+\\>", "") # remove words with > sign
tweets <- str_replace_all(tweets, " ", " ") # remove double spaces
tweets <- str_trim(tweets, side = "both") # remove spaces beginning & end of the sentence
tweets <- tweets[tweets != ""] # remove all the empty tweets
tweets <- tweets[tweets != "AirIndia"] # remove tweets with just AirIndia
tweets <- tweets[tweets != "-"] # remove tweets with "-" only
# NRC Sentiment on AirIndia
nrc_data <- get_nrc_sentiment(tweets)
# Bar plot
barplot(sort(colSums(prop.table(nrc_data[, 1:8])), decreasing = TRUE),
main = "Twitter Emotions of #AirIndia", ylab = "Percentage",
col = "#ff3300")
# ------------------------------------------------------------------------
# Jet Airways
jet <- read.csv("jet_airways.csv", stringsAsFactors = FALSE)
jet_tweets <- jet$text
# cleaning tweets
jet_tweets <- str_replace_all(jet_tweets, "http[^[:space:]]*", "") # remove URL
jet_tweets <- str_replace_all(jet_tweets, "@\\w+", "") # remove userid
jet_tweets <- str_replace_all(jet_tweets, "[:punct:]", " ") # remove all the punctuation
jet_tweets <- str_replace_all(jet_tweets, ".*?\\w+\\+", "") # remove words with + sign
jet_tweets <- str_replace_all(jet_tweets, ".+\\>", "") # remove words with > sign
jet_tweets <- str_replace_all(jet_tweets, "\n", " ") # remove \n
jet_tweets <- str_replace_all(jet_tweets, "RT", "") # remove RT
jet_tweets <- str_replace_all(jet_tweets, "\\$", "") # remove $
jet_tweets <- str_replace_all(jet_tweets, "\\|", "") # remove \
jet_tweets <- str_replace_all(jet_tweets, " ", " ") # remove double spaces
jet_tweets <- str_trim(jet_tweets, side = "both") # remove spaces beginning & end of the sentence
jet_tweets <- jet_tweets[jet_tweets != ""] # remove all the empty tweets
jet_tweets <- jet_tweets[jet_tweets != "JetAirways"]# remove tweets with just JetAirways
jet_tweets <- jet_tweets[jet_tweets != "jetairways"] # remove tweets with just jetairways
jet_tweets <- jet_tweets[jet_tweets != "Je"] # remvove tweets with just Je
# NRC Sentiment on JetAirways
nrc_jet <- get_nrc_sentiment(jet_tweets)
# Bar plot
barplot(sort(colSums(prop.table(nrc_jet[, 1:8])), decreasing = TRUE),
main = "Twitter Emotions of #JetAirways", ylab = "Percentage",
col = "#000033")
# ------------------------------------------------------------------------
# SpiceJet
spice <- read.csv("spicejet.csv", stringsAsFactors = FALSE)
spice_tweets <- spice$text
# cleaning tweets
spice_tweets <- str_replace_all(spice_tweets, "http[^[:space:]]*", "") # remove URL
spice_tweets <- str_replace_all(spice_tweets, "@\\w+", "") # remove userid
spice_tweets <- str_replace_all(spice_tweets, "[:punct:]", " ") # remove all the punctuation
spice_tweets <- str_replace_all(spice_tweets, ".*?\\w+\\+", "") # remove words with + sign
spice_tweets <- str_replace_all(spice_tweets, ".+\\>", "") # remove words with > sign
spice_tweets <- str_replace_all(spice_tweets, "\n", " ") # remove \n
spice_tweets <- str_replace_all(spice_tweets, "RT", "") # remove RT
spice_tweets <- str_replace_all(spice_tweets, " ", " ") # remove double spaces
spice_tweets <- str_trim(spice_tweets, side = "both") # remove spaces beginning & end of the sentence
spice_tweets <- spice_tweets[spice_tweets != "spicejet"]
spice_tweets <- spice_tweets[spice_tweets != ""] # remove all the empty tweets
# NRC Sentiment on SpiceJets
nrc_spice <- get_nrc_sentiment(spice_tweets)
# Bar plot
barplot(sort(colSums(prop.table(nrc_spice[, 1:8])), decreasing = TRUE),
main = "Twitter Emotions of #SpiceJet", ylab = "Percentage",
col = "#cc0000")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment