Skip to content

Instantly share code, notes, and snippets.

@kurasaiteja
Created May 28, 2020 19:19
Show Gist options
  • Save kurasaiteja/df25a4c477e6b625c3474f514cb3d0b0 to your computer and use it in GitHub Desktop.
Save kurasaiteja/df25a4c477e6b625c3474f514cb3d0b0 to your computer and use it in GitHub Desktop.
def check_word_in_tweet(word, data):
"""Checks if a word is in a Twitter dataset's text.
Checks text and extended tweet (140+ character tweets) for tweets,
retweets and quoted tweets.
Returns a logical pandas Series.
"""
contains_column = data['text'].str.contains(word, case = False)
contains_column |= data['extended_tweet-full_text'].str.contains(word, case = False)
contains_column |= data['quoted_status-text'].str.contains(word, case = False)
contains_column |= data['retweeted_status-text'].str.contains(word, case = False)
return contains_column
# Find mentions of #python in all text fields
python = check_word_in_tweet('python', df_tweet)
# Find mentions of #javascript in all text fields
js = check_word_in_tweet('javascript', df_tweet)
# Print proportion of tweets mentioning #python
print("Proportion of #python tweets:", np.sum(python) / df_tweet.shape[0])
# Print proportion of tweets mentioning #rstats
print("Proportion of #javascript tweets:", np.sum(js) / df_tweet.shape[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment