Skip to content

Instantly share code, notes, and snippets.

@krishashok
Created October 3, 2022 08:00
Show Gist options
  • Save krishashok/ebcc3a04aab6784a51ba6bbbd0b7c653 to your computer and use it in GitHub Desktop.
Save krishashok/ebcc3a04aab6784a51ba6bbbd0b7c653 to your computer and use it in GitHub Desktop.
Download all your tweets and pickle them
# This script will download all your (or the logged in user's) tweets and save it as a serialized pickle object
import tweepy
import pickle
# Go to developer.twitter.com, apply to use the API, create a project and get the authorisation details below
API_KEY = ''
API_KEY_SECRET = ''
BEARER_TOKEN = ''
CLIENT_ID = ''
CLIENT_SECRET = ''
ACCESS_TOKEN = ''
ACCESS_TOKEN_SECRET = ''
def authorize_twitter():
# Create a client with auth params to access API V2
client = tweepy.Client(BEARER_TOKEN,API_KEY, API_KEY_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
# Create an api object with auth params to access API V1
auth = tweepy.OAuth1UserHandler(API_KEY,API_KEY_SECRET,ACCESS_TOKEN,ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
return client, api
def get_all_tweets_for_user_and_pickle_it():
# Authorise Twitter
client, api = authorize_twitter()
all_tweets = []
# Get the current logged in user's timeline
tweets = api.user_timeline(exclude_replies=True, include_rts=False, count=200)
all_tweets.extend(tweets)
# Grab oldest tweet in current list to use in the pagination loop
oldest = all_tweets[-1].id - 1
# Loop to retrieve older tweets till no more exist
while len(tweets) > 0:
print(f'Getting tweets before {oldest}')
tweets = api.user_timeline(exclude_replies=True, include_rts=False, max_id=oldest, count=200)
all_tweets.extend(tweets)
oldest = all_tweets[-1].id - 1
print(f"...{len(all_tweets)} tweets downloaded so far")
# Pickle it
with open('all_tweets.pkl', 'wb') as f:
pickle.dump(all_tweets, f)
get_all_tweets_for_user_and_pickle_it()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment