-
-
Save sparack/cf8591d3ed6e67889c713e842f2cb1c7 to your computer and use it in GitHub Desktop.
Sample code to demonstrate how to page through more than 500 Tweets for full-archive search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def connect_to_endpoint(bearer_token, query, next_token=None): | |
headers = {"Authorization": "Bearer {}".format(bearer_token)} | |
# add additional parameters as needed | |
params = {"tweet.fields" : "attachments,author_id,context_annotations,created_at,entities"} | |
# replace appropriate start and end times below | |
if (next_token is not None): | |
url = "https://api.twitter.com/2/tweets/search/all?max_results=500&query={}&start_time=2006-03-31T15:00:00Z&next_token={}".format(query, next_token) | |
else: | |
url = "https://api.twitter.com/2/tweets/search/all?max_results=500&start_time=2006-03-31T15:00:00Z&query={}".format(query) | |
response = requests.request("GET", url, params=params, headers=headers) | |
if response.status_code != 200: | |
raise Exception(response.status_code, response.text) | |
return response.json() | |
count = 0 | |
flag = True | |
# Replace with your own bearer token from your academic project in developer portal | |
bearer_token = "REPLACE_ME" | |
while flag: | |
# Replace the count below with the number of Tweets you want to stop at. | |
# Note: running without the count check will result in getting more Tweets | |
# that will count towards the Tweet cap | |
if count >= 1000: | |
break | |
json_response = connect_to_endpoint(bearer_token, 'from:TwitterDev') | |
result_count = json_response['meta']['result_count'] | |
if 'next_token' in json_response['meta']: | |
next_token = json_response['meta']['next_token'] | |
if result_count is not None and result_count > 0 and next_token is not None: | |
for tweet in json_response['data']: | |
# Replace with your path below | |
f = open('/your/path/tweet_ids.csv', 'a') | |
f.write(tweet['id'] + "\n") | |
count += result_count | |
print(count) | |
json_response = connect_to_endpoint(bearer_token, 'from:TwitterDev', next_token) | |
else: | |
flag = False | |
print("Total Tweet IDs saved: {}".format(count)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you for sharing this, Suham!
I got the following error while trying to run the code:
Exception: (400, '{"errors":[{"parameters":{"tweet.fields":["attachments,author_id,context_annotations,created_at,entities"],"max_results":["500"]},"message":"when requesting
tweet.fields=context_annotations
max_results
must be less than or equal to100
"}],"title":"Invalid Request","detail":"One or more parameters to your request was invalid.","type":"https://api.twitter.com/2/problems/invalid-request"}')In lines #13 and #15, the 500 should be 100 because that's the max_result allowed, right?