Skip to content

Instantly share code, notes, and snippets.

@KageKirin
Created November 15, 2022 22:58
Show Gist options
  • Save KageKirin/3818d7884b81e4e74ad67caca30813e8 to your computer and use it in GitHub Desktop.
Save KageKirin/3818d7884b81e4e74ad67caca30813e8 to your computer and use it in GitHub Desktop.
Python script to get tweets and retweets

Python script to get tweets and retweets

This is super simple, quick & dirty Python script to retrieve all your (recent) tweets, and store them as combined JSON file on your disk.

It's really basic (no lookup/storage of media links, no saving threads, ...).

Usage

Get your Twitter App credentials by registering as a developer.

export APP_TOKEN="your bearer token"

Then run with

tw_get_tweets.py -i <your user id>

Note: The user id is a number, not your @ handle.

#!/usr/bin/env python3
import argparse
import requests
import colorama
import os, sys, time
import json
colorama.init()
def add_authentication(inout_request):
inout_request.headers["Authorization"] = f'Bearer {os.environ.get("APP_TOKEN")}'
inout_request.headers["User-Agent"] = "tw_get_tweets"
return inout_request;
def get_tweets(id, start):
params = {}
params["expansions"] = ",".join({"attachments.poll_ids", "attachments.media_keys", "author_id", "edit_history_tweet_ids", "entities.mentions.username", "geo.place_id", "in_reply_to_user_id", "referenced_tweets.id", "referenced_tweets.id.author_id"})
params["tweet.fields"] = ",".join({"attachments", "author_id", "context_annotations", "conversation_id", "created_at", "edit_controls", "entities", "geo", "id", "in_reply_to_user_id", "lang", "public_metrics", "possibly_sensitive", "referenced_tweets", "reply_settings", "source", "text", "withheld"});
params["user.fields"] = ",".join({"created_at", "description", "entities", "id", "location", "name", "pinned_tweet_id", "profile_image_url", "protected", "public_metrics", "url", "username", "verified", "withheld"})
params["media.fields"] = ",".join({"duration_ms", "height", "media_key", "preview_image_url", "type", "url", "width", "public_metrics", "alt_text", "variants"})
params["place.fields"] = ",".join({"contained_within", "country", "country_code", "full_name", "geo", "id", "name", "place_type"})
params["poll.fields"] = ",".join({"duration_minutes", "end_datetime", "id", "options", "voting_status"})
params["max_results"] = 100
if start != "":
params["pagination_token"] = start
tweets_url = f"https://api.twitter.com/2/users/{id}/tweets"
response = requests.request("GET", tweets_url, auth=add_authentication, params=params)
return response
def main(args):
assert(args.id)
count = 0
start_token = ''
while True:
response = get_tweets(args.id, start_token)
print(count, response.status_code)
if response.status_code != 200:
print("Error!")
print(response.json())
break
json_response = response.json()
with open(f"tweets_{count}.json", 'w') as handler:
handler.write(json.dumps(json_response, indent=2))
if "meta" in json_response and "next_token" in json_response["meta"]:
start_token = json_response["meta"]["next_token"]
# 15 minutes -> 900 seconds
# 300 requests / 15 minute => 1 request / 3 second
## sleep for 3s to avoid breaking stuff
time.sleep(3)
count = count + 1
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--id', help='user id', type=str, required=True)
args = parser.parse_args()
print(args)
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment