Skip to content

Instantly share code, notes, and snippets.

@karishmadudani
Last active September 4, 2017 16:19
Show Gist options
  • Save karishmadudani/9393e3fb80df49dbec629ad8b4d36f23 to your computer and use it in GitHub Desktop.
Save karishmadudani/9393e3fb80df49dbec629ad8b4d36f23 to your computer and use it in GitHub Desktop.
#Store tweets data in a dataframe
def tweets_df(results):
id_list = [tweet.id for tweet in results]
data_set = pd.DataFrame(id_list, columns = ["id"])
data_set["text"] = [tweet.text for tweet in results]
data_set["created_at"] = [tweet.created_at for tweet in results]
data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
data_set["user_screen_name"] = [tweet.author.screen_name for tweet in results]
data_set["user_followers_count"] = [tweet.author.followers_count for tweet in results]
data_set["user_location"] = [tweet.author.location for tweet in results]
data_set["Hashtags"] = [tweet.entities.get('hashtags') for tweet in results]
return data_set
data_set = tweets_df(results)
# Remove tweets with duplicate text
text = data_set["text"]
for i in range(0,len(text)):
txt = ' '.join(word for word in text[i] .split() if not word.startswith('https:'))
data_set.set_value(i, 'text2', txt)
data_set.drop_duplicates('text2', inplace=True)
data_set.reset_index(drop = True, inplace=True)
data_set.drop('text', axis = 1, inplace = True)
data_set.rename(columns={'text2': 'text'}, inplace=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment