Skip to content

Instantly share code, notes, and snippets.

@vickyqian
vickyqian / getfeaturevector
Last active May 7, 2019 06:17
Get Feature Vector
def getFeatureVector(tweet):
featureVector = []
#split tweet into words
words = tweet.split()
for w in words:
#replace two or more with two occurrences
w = replaceTwoOrMore(w)
#strip punctuation
w = w.strip('\'"?,.')
#check if the word stats with an alphabet
@vickyqian
vickyqian / naivebayes
Created April 26, 2017 15:29
Naive Bayes Classifier
import nltk
training_set = nltk.classify.util.apply_features(extract_features, tweets)
# Train the classifier Naive Bayes Classifier
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
#ua is a dataframe containing all the united airline tweets
ua['sentiment'] = ua['tweets'].apply(lambda tweet: NBClassifier.classify(extract_features(getFeatureVector(processTweet2(tweet)))))
@vickyqian
vickyqian / preprocesstweet.txt
Last active April 10, 2020 10:48
Preprocess Tweets
###Preprocess tweets
def processTweet2(tweet):
# process the tweets
#Convert to lower case
tweet = tweet.lower()
#Convert www.* or https?://* to URL
tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','URL',tweet)
#Convert @username to AT_USER
tweet = re.sub('@[^\s]+','AT_USER',tweet)
@vickyqian
vickyqian / twitter crawler.txt
Last active May 11, 2024 16:19
A Python script to download all the tweets of a hashtag into a csv
import tweepy
import csv
import pandas as pd
####input your credentials here
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)