vickyqian

## twitter crawler.txt
import tweepy
import csv
import pandas as pd
####input your credentials here
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)

## preprocesstweet.txt
###Preprocess tweets
def processTweet2(tweet):
    # process the tweets

    #Convert to lower case
    tweet = tweet.lower()
    #Convert www.* or https?://* to URL
    tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','URL',tweet)
    #Convert @username to AT_USER
    tweet = re.sub('@[^\s]+','AT_USER',tweet)

## getfeaturevector
def getFeatureVector(tweet):
    featureVector = []
    #split tweet into words
    words = tweet.split()
    for w in words:
        #replace two or more with two occurrences
        w = replaceTwoOrMore(w)
        #strip punctuation
        w = w.strip('\'"?,.')
        #check if the word stats with an alphabet

## naivebayes
import nltk
training_set = nltk.classify.util.apply_features(extract_features, tweets)
# Train the classifier Naive Bayes Classifier
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
#ua is a dataframe containing all the united airline tweets
ua['sentiment'] = ua['tweets'].apply(lambda tweet: NBClassifier.classify(extract_features(getFeatureVector(processTweet2(tweet)))))
	import tweepy
	import csv
	import pandas as pd
	####input your credentials here
	consumer_key = ''
	consumer_secret = ''
	access_token = ''
	access_token_secret = ''

	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	###Preprocess tweets
	def processTweet2(tweet):
	# process the tweets

	#Convert to lower case
	tweet = tweet.lower()
	#Convert www.* or https?://* to URL
	tweet = re.sub('((www\.[^\s]+)\|(https?://[^\s]+))','URL',tweet)
	#Convert @username to AT_USER
	tweet = re.sub('@[^\s]+','AT_USER',tweet)
	def getFeatureVector(tweet):
	featureVector = []
	#split tweet into words
	words = tweet.split()
	for w in words:
	#replace two or more with two occurrences
	w = replaceTwoOrMore(w)
	#strip punctuation
	w = w.strip('\'"?,.')
	#check if the word stats with an alphabet
	import nltk
	training_set = nltk.classify.util.apply_features(extract_features, tweets)
	# Train the classifier Naive Bayes Classifier
	NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
	#ua is a dataframe containing all the united airline tweets
	ua['sentiment'] = ua['tweets'].apply(lambda tweet: NBClassifier.classify(extract_features(getFeatureVector(processTweet2(tweet)))))