Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
def individual_tweet_vectorizer(corpus, tweet, index=0, author=''):
"""
Formats a single tweet as a vector
:param corpus: list of all words in tweets
:param tweet: tweet to be vectorized
:param index: index of tweet in main list of tweets
:param author: Trump or general
:return: Single tweet in vector form
"""
individual_tweet_vector = np.zeros((1, len(corpus) + 2), dtype=int)
for word in range(len(corpus)):
if corpus[word] in tweet:
individual_tweet_vector[0][word] = 1
if author != '': # If author is specified, set the last value of the tweet vector to 1
individual_tweet_vector[0][-1] = 1
individual_tweet_vector[0][-2] = index # Keep track of index of tweet for interpretation
return individual_tweet_vector
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment