-
-
Save onmyeoin/b6d99be0216d621788fcbe6bcb34a370 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy | |
import csv | |
def get_all_tweets(screen_name): | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" | |
access_secret = "" | |
#authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
#initialize a list to hold all the tweepy Tweets & list with no retweets | |
alltweets = [] | |
noRT = [] | |
#make initial request for most recent tweets with extended mode enabled to get full tweets | |
new_tweets = api.user_timeline(screen_name = screen_name, tweet_mode = 'extended', count=200) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#save the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
#keep grabbing tweets until the api limit is reached | |
while len(alltweets) <= 3200: | |
print("getting tweets before {}".format(oldest)) | |
#all subsiquent requests use the max_id param to prevent duplicates | |
new_tweets = api.user_timeline(screen_name = screen_name,tweet_mode = 'extended', count=200,max_id=oldest) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#update the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
print("...{} tweets downloaded so far".format(len(alltweets))) | |
#removes retweets | |
for tweet in alltweets: | |
if 'RT' in tweet.full_text: | |
continue | |
else: | |
noRT.append([tweet.id_str, tweet.created_at, tweet.full_text]) | |
#write to csv | |
with open('{}_tweets.csv'.format(screen_name), 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(["id","created_at","text"]) | |
writer.writerows(noRT) | |
print('{}_tweets.csv was successfully created.'.format(screen_name)) | |
pass | |
if __name__ == '__main__': | |
#pass in the username of the account you want to download | |
get_all_tweets("realDonaldTrump") |
@jose46moreno
- Add your twitter credentials to the start of the script (you need to sign up for a free twitter developer account to get these, plenty of videos on youtube if your stuck)
consumer_key = " here "
consumer_secret = " here "
access_key = " here "
access_secret = " here "
-
Make sure you have tweepy module installed. You can install this from the command line using
pip3 install tweepy
-
Pass users screen name into the get_all_tweets function at the end of the script
-
Run script
python3 tweet_dumper.py
The terminal keeps repeating itself on a loop:
...759 tweets downloaded so far
getting tweets before 102157671012106240
...759 tweets downloaded so far
getting tweets before 102157671012106240
Does the account you are trying to download from only have 759 tweets?
This script will keep running until it reaches the maximum amount of tweets allowed by the Twitter API (which is 3240, this looks for 3200)
If the account your trying to download from has less than 3200, change the line
while len(alltweets) <= 3200:
To
while len(alltweets) <= 759
Thanks!
What are the steps once the script is downloaded?
To install, add users and run?
Thank you!