This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getFeatureVector(tweet): | |
featureVector = [] | |
#split tweet into words | |
words = tweet.split() | |
for w in words: | |
#replace two or more with two occurrences | |
w = replaceTwoOrMore(w) | |
#strip punctuation | |
w = w.strip('\'"?,.') | |
#check if the word stats with an alphabet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
training_set = nltk.classify.util.apply_features(extract_features, tweets) | |
# Train the classifier Naive Bayes Classifier | |
NBClassifier = nltk.NaiveBayesClassifier.train(training_set) | |
#ua is a dataframe containing all the united airline tweets | |
ua['sentiment'] = ua['tweets'].apply(lambda tweet: NBClassifier.classify(extract_features(getFeatureVector(processTweet2(tweet))))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Preprocess tweets | |
def processTweet2(tweet): | |
# process the tweets | |
#Convert to lower case | |
tweet = tweet.lower() | |
#Convert www.* or https?://* to URL | |
tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','URL',tweet) | |
#Convert @username to AT_USER | |
tweet = re.sub('@[^\s]+','AT_USER',tweet) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy | |
import csv | |
import pandas as pd | |
####input your credentials here | |
consumer_key = '' | |
consumer_secret = '' | |
access_token = '' | |
access_token_secret = '' | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) |