Skip to content

Instantly share code, notes, and snippets.

@yuyasugano
Created September 25, 2020 06:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yuyasugano/2fdaf1f9ce71b6b0e5b9e3e0b6e66155 to your computer and use it in GitHub Desktop.
Save yuyasugano/2fdaf1f9ce71b6b0e5b9e3e0b6e66155 to your computer and use it in GitHub Desktop.
Sentiment analysis with NLTK and Scikit-learn
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
def preprocessing(tweet):
# Generating the list of words in the tweet (hastags and other punctuations removed)
def form_sentence(tweet):
tweet_blob = TextBlob(tweet)
return ' '.join(tweet_blob.words)
# Removing stopwords and words with unusual symbols
def no_user_alpha(tweet):
tweet_list = [ele for ele in tweet.split() if ele != 'user']
clean_tokens = [t for t in tweet_list if re.match(r'[^\W\d]*$', t)]
clean_s = ' '.join(clean_tokens)
clean_mess = [word for word in clean_s.split() if word.lower() not in stopwords.words('english')]
return clean_mess
# Lemmatize the words in tweets
def lemmatization(tweet_list):
lem = WordNetLemmatizer()
lemmatized_tweet = []
for word in tweet_list:
lemmatized_text = lem.lemmatize(word, 'v')
lemmatized_tweet.append(lemmatized_text)
return lemmatized_tweet
new_tweet = form_sentence(tweet)
no_punc_tweet = no_user_alpha(new_tweet)
lemmatized_tweet = lemmatization(no_punc_tweet)
return lemmatized_tweet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment