qqpann/get_tweet_replies.py

## get_tweet_replies.py
# Author: Qiushi Pan (@qqhann)
# This is a code snippet with sample usage, to get replies to a specific tweet.
# =====

import tweepy
import time
import os
from collections import defaultdict
from dotenv import load_dotenv, find_dotenv
env = load_dotenv(find_dotenv(), override=True)

# Save your tokens in .env file
con_key = os.getenv('TWITTER_API_KEY')
con_sec = os.getenv('TWITTER_API_SECRET_KEY')
acc_tok = os.getenv('TWITTER_ACCESS_TOKEN')
acc_sec = os.getenv('TWITTER_ACCESS_TOKEN_SECRET')


# Initialize tweepy
auth = tweepy.OAuthHandler(con_key, con_sec)
auth.set_access_token(acc_tok, acc_sec)

api = tweepy.API(auth)


# Sample teets.
# Tweet 1 (refer as T1)
# https://twitter.com/bozu_108/status/1090475718335578112
# Tweet 2 (refer as T2)
# https://twitter.com/bozu_108/status/1090501639625072640
# In this sample, we try to get replies to T1,
# since T1 until T2.


# Preparation
TARGET_TWEET_ID = 1090475718335578112  # T1
oldest_id = 1090501639625072640  # T2
search_term = '@bozu_108'  # Target tweet's user name
no_match, not_reply, match = 'No Match', 'Not Reply', 'Match'
counts = {no_match: 0, not_reply: 0, match: 0}
matched_texts = defaultdict(int)


# Repeat the api multiple times to try to get all replies
# You may want to change the repetition times, or stop it mannualy (if using jupyter notebook)
for i in range(1000):
    print('loop', i, counts)
    time.sleep(5)  # API Limit: 180req / 15min(900sec)
    result_tweets = api.search(search_term, count=100, since_id=TARGET_TWEET_ID, max_id=oldest_id)

    for tweet in result_tweets:
      	# Reflesh oldest_id
        oldest_id = min(tweet.id, oldest_id)
        if tweet.in_reply_to_status_id == TARGET_TWEET_ID:
            counts[match] += 1
            matched_texts[tweet.text] += 1
        elif tweet.in_reply_to_status_id is None:
            counts[not_reply] += 1
        else:
            counts[no_match] += 1


# Show results
for text, count in sorted(matched_texts.items(), key=lambda x: x[1], reverse=True):
    print('{:>4}\t{}'.format(count, text))

# sample results output (top 5) =>
#   8	@bozu_108 じゃんけん
#   7	@bozu_108 スペランカー
#   6	@bozu_108 MOTHERシリーズ
#   6	@bozu_108 Vainglory
#   5	@bozu_108 学園ハンサム
	# Author: Qiushi Pan (@qqhann)
	# This is a code snippet with sample usage, to get replies to a specific tweet.
	# =====

	import tweepy
	import time
	import os
	from collections import defaultdict
	from dotenv import load_dotenv, find_dotenv
	env = load_dotenv(find_dotenv(), override=True)

	# Save your tokens in .env file
	con_key = os.getenv('TWITTER_API_KEY')
	con_sec = os.getenv('TWITTER_API_SECRET_KEY')
	acc_tok = os.getenv('TWITTER_ACCESS_TOKEN')
	acc_sec = os.getenv('TWITTER_ACCESS_TOKEN_SECRET')


	# Initialize tweepy
	auth = tweepy.OAuthHandler(con_key, con_sec)
	auth.set_access_token(acc_tok, acc_sec)

	api = tweepy.API(auth)


	# Sample teets.
	# Tweet 1 (refer as T1)
	# https://twitter.com/bozu_108/status/1090475718335578112
	# Tweet 2 (refer as T2)
	# https://twitter.com/bozu_108/status/1090501639625072640
	# In this sample, we try to get replies to T1,
	# since T1 until T2.


	# Preparation
	TARGET_TWEET_ID = 1090475718335578112 # T1
	oldest_id = 1090501639625072640 # T2
	search_term = '@bozu_108' # Target tweet's user name
	no_match, not_reply, match = 'No Match', 'Not Reply', 'Match'
	counts = {no_match: 0, not_reply: 0, match: 0}
	matched_texts = defaultdict(int)


	# Repeat the api multiple times to try to get all replies
	# You may want to change the repetition times, or stop it mannualy (if using jupyter notebook)
	for i in range(1000):
	print('loop', i, counts)
	time.sleep(5) # API Limit: 180req / 15min(900sec)
	result_tweets = api.search(search_term, count=100, since_id=TARGET_TWEET_ID, max_id=oldest_id)

	for tweet in result_tweets:
	# Reflesh oldest_id
	oldest_id = min(tweet.id, oldest_id)
	if tweet.in_reply_to_status_id == TARGET_TWEET_ID:
	counts[match] += 1
	matched_texts[tweet.text] += 1
	elif tweet.in_reply_to_status_id is None:
	counts[not_reply] += 1
	else:
	counts[no_match] += 1


	# Show results
	for text, count in sorted(matched_texts.items(), key=lambda x: x[1], reverse=True):
	print('{:>4}\t{}'.format(count, text))

	# sample results output (top 5) =>
	# 8 @bozu_108 じゃんけん
	# 7 @bozu_108 スペランカー
	# 6 @bozu_108 MOTHERシリーズ
	# 6 @bozu_108 Vainglory
	# 5 @bozu_108 学園ハンサム