Created
April 1, 2016 15:49
-
-
Save loiyumba/605ba5bd44d65b904c93d83d37d7e57f to your computer and use it in GitHub Desktop.
Twitter emotions on major airlines in India
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# To extract tweets from twitter | |
# Load the required libraries | |
require(twitteR) | |
require(stringr) | |
require(syuzhet) | |
# Set working directory | |
setwd("..\\tweet_analysis\\Airlines") | |
# Load twitter API | |
options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))) | |
reqURL <- "https://api.twitter.com/oauth/request_token" | |
accessURL <- "https://api.twitter.com/oauth/access_token" | |
authURL <- "https://api.twitter.com/oauth/authorize" | |
apiKey <- " " # your twitter api key | |
apiSecret <- " " # your twitter api secret | |
accessToken <- " " # your access token | |
accessSecret <- " " # your access secret | |
setup_twitter_oauth(apiKey, apiSecret, access_token = accessToken, access_secret = accessSecret) | |
# Extracting Tweets | |
# Air India | |
air_india <- searchTwitter("#AirIndia", n = 500, since = "2016-03-01", until = "2016-03-30") | |
air_india_df <- twListToDF(air_india) | |
head(air_india_df) | |
write.csv(air_india_df, "air_india.csv", row.names = FALSE) # saving tweets in local disk | |
# Jet Airways | |
jet_airways <- searchTwitter("#JetAirways", n = 500, since = "2016-03-01", until = "2016-03-30") | |
jet_airways_df <- twListToDF(jet_airways) | |
head(jet_airways_df) | |
write.csv(jet_airways_df, "jet_airways.csv", row.names = FALSE) | |
# Indigo Airlines ## Not able to retrieve enough tweets. Only 18 tweets returned | |
indigo <- searchTwitter("#indigoairlines", n = 500, since = "2016-03-01", until = "2016-03-31") | |
indigo_df <- twListToDF(indigo) | |
head(indigo_df) | |
write.csv(indigo_df, "indigo.csv", row.names = FALSE) | |
# Spicejet ## Returned only 165 tweets | |
spicejet <- searchTwitter("#SpiceJet", n = 500, since = "2016-03-01", until = "2016-03-30") | |
spicejet_df <- twListToDF(spicejet) | |
head(spicejet_df) | |
write.csv(spicejet_df, "spicejet.csv", row.names = FALSE) | |
# Reload the data | |
# air india | |
air_india <- read.csv("air_india.csv", stringsAsFactors = FALSE) | |
tweets <- air_india$text # extract only tweets column | |
# Cleaning tweets | |
tweets <- str_replace_all(tweets, "\\#", "") # remove # | |
tweets <- str_replace_all(tweets, "Retweet", "") # remove Retweet | |
tweets <- str_replace_all(tweets, "RT", "") # remove RT | |
tweets <- str_replace_all(tweets, ":", "") # remove : | |
tweets <- str_replace_all(tweets, "&", "") # remove & | |
tweets <- str_replace_all(tweets, "\n", " ") # remove \n | |
tweets <- str_replace_all(tweets, "http[^[:space:]]*", "") # remove URL | |
tweets <- str_replace_all(tweets, "@\\w+", "") # remove userid | |
tweets <- str_replace_all(tweets, ".*?\\w+\\+", "") # remove words with + sign | |
tweets <- str_replace_all(tweets, ".+\\>", "") # remove words with > sign | |
tweets <- str_replace_all(tweets, " ", " ") # remove double spaces | |
tweets <- str_trim(tweets, side = "both") # remove spaces beginning & end of the sentence | |
tweets <- tweets[tweets != ""] # remove all the empty tweets | |
tweets <- tweets[tweets != "AirIndia"] # remove tweets with just AirIndia | |
tweets <- tweets[tweets != "-"] # remove tweets with "-" only | |
# NRC Sentiment on AirIndia | |
nrc_data <- get_nrc_sentiment(tweets) | |
# Bar plot | |
barplot(sort(colSums(prop.table(nrc_data[, 1:8])), decreasing = TRUE), | |
main = "Twitter Emotions of #AirIndia", ylab = "Percentage", | |
col = "#ff3300") | |
# ------------------------------------------------------------------------ | |
# Jet Airways | |
jet <- read.csv("jet_airways.csv", stringsAsFactors = FALSE) | |
jet_tweets <- jet$text | |
# cleaning tweets | |
jet_tweets <- str_replace_all(jet_tweets, "http[^[:space:]]*", "") # remove URL | |
jet_tweets <- str_replace_all(jet_tweets, "@\\w+", "") # remove userid | |
jet_tweets <- str_replace_all(jet_tweets, "[:punct:]", " ") # remove all the punctuation | |
jet_tweets <- str_replace_all(jet_tweets, ".*?\\w+\\+", "") # remove words with + sign | |
jet_tweets <- str_replace_all(jet_tweets, ".+\\>", "") # remove words with > sign | |
jet_tweets <- str_replace_all(jet_tweets, "\n", " ") # remove \n | |
jet_tweets <- str_replace_all(jet_tweets, "RT", "") # remove RT | |
jet_tweets <- str_replace_all(jet_tweets, "\\$", "") # remove $ | |
jet_tweets <- str_replace_all(jet_tweets, "\\|", "") # remove \ | |
jet_tweets <- str_replace_all(jet_tweets, " ", " ") # remove double spaces | |
jet_tweets <- str_trim(jet_tweets, side = "both") # remove spaces beginning & end of the sentence | |
jet_tweets <- jet_tweets[jet_tweets != ""] # remove all the empty tweets | |
jet_tweets <- jet_tweets[jet_tweets != "JetAirways"]# remove tweets with just JetAirways | |
jet_tweets <- jet_tweets[jet_tweets != "jetairways"] # remove tweets with just jetairways | |
jet_tweets <- jet_tweets[jet_tweets != "Je"] # remvove tweets with just Je | |
# NRC Sentiment on JetAirways | |
nrc_jet <- get_nrc_sentiment(jet_tweets) | |
# Bar plot | |
barplot(sort(colSums(prop.table(nrc_jet[, 1:8])), decreasing = TRUE), | |
main = "Twitter Emotions of #JetAirways", ylab = "Percentage", | |
col = "#000033") | |
# ------------------------------------------------------------------------ | |
# SpiceJet | |
spice <- read.csv("spicejet.csv", stringsAsFactors = FALSE) | |
spice_tweets <- spice$text | |
# cleaning tweets | |
spice_tweets <- str_replace_all(spice_tweets, "http[^[:space:]]*", "") # remove URL | |
spice_tweets <- str_replace_all(spice_tweets, "@\\w+", "") # remove userid | |
spice_tweets <- str_replace_all(spice_tweets, "[:punct:]", " ") # remove all the punctuation | |
spice_tweets <- str_replace_all(spice_tweets, ".*?\\w+\\+", "") # remove words with + sign | |
spice_tweets <- str_replace_all(spice_tweets, ".+\\>", "") # remove words with > sign | |
spice_tweets <- str_replace_all(spice_tweets, "\n", " ") # remove \n | |
spice_tweets <- str_replace_all(spice_tweets, "RT", "") # remove RT | |
spice_tweets <- str_replace_all(spice_tweets, " ", " ") # remove double spaces | |
spice_tweets <- str_trim(spice_tweets, side = "both") # remove spaces beginning & end of the sentence | |
spice_tweets <- spice_tweets[spice_tweets != "spicejet"] | |
spice_tweets <- spice_tweets[spice_tweets != ""] # remove all the empty tweets | |
# NRC Sentiment on SpiceJets | |
nrc_spice <- get_nrc_sentiment(spice_tweets) | |
# Bar plot | |
barplot(sort(colSums(prop.table(nrc_spice[, 1:8])), decreasing = TRUE), | |
main = "Twitter Emotions of #SpiceJet", ylab = "Percentage", | |
col = "#cc0000") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment