Skip to content

Instantly share code, notes, and snippets.

@spdustin
Last active August 29, 2017 21:53
Show Gist options
  • Save spdustin/96bb282dec2a9e6ac4fe77f355f57d96 to your computer and use it in GitHub Desktop.
Save spdustin/96bb282dec2a9e6ac4fe77f355f57d96 to your computer and use it in GitHub Desktop.
def load_tweets_from_mongodb(dbname, collection='tweet'):
df = odo('mongodb://localhost/{}::{}'.format(dbname, collection),
pd.DataFrame)
df.drop(['user_id', 'usernameTweet', 'url'], axis=1, inplace=True)
df = df.replace(0, np.NAN)
df.dropna(how='any', inplace=True)
df['reply_to_retweet_ratio'] = np.divide(df['nbr_reply'],
df['nbr_retweet'])
df['reply_to_like_ratio'] = np.divide(df['nbr_reply'],
df['nbr_favorite'])
df['like_to_retweet_ratio'] = np.divide(df['nbr_favorite'],
df['nbr_retweet'])
df['controversial'] = df['reply_to_retweet_ratio'] > 1.25
df.set_index('datetime', inplace=True)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment