Skip to content

Instantly share code, notes, and snippets.

@cmj
Last active February 18, 2024 18:16
Show Gist options
  • Save cmj/adfd541dde30585d861d28fd58bec9f0 to your computer and use it in GitHub Desktop.
Save cmj/adfd541dde30585d861d28fd58bec9f0 to your computer and use it in GitHub Desktop.
likes page output
import json
import requests
import argparse
import datetime
import time
import re
# All values stored here are constant, copy-pasted from the website
FEATURES_USER = '{"hidden_profile_likes_enabled":true,"hidden_profile_subscriptions_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"subscriptions_verification_info_is_identity_verified_enabled":true,"subscriptions_verification_info_verified_since_enabled":true,"highlights_tweets_tab_ui_enabled":true,"responsive_web_twitter_article_notes_tab_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true}'
FEATURES_TWEETS = '{"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"c9s_tweet_anatomy_moderator_badge_enabled":true,"tweetypie_unmention_optimization_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_media_download_video_enabled":false,"responsive_web_enhance_cards_enabled":false}'
AUTHORIZATION_TOKEN = 'AAAAAAAAAAAAAAAAAAAAAFXzAwAAAAAAMHCxpeSDG1gLNLghVe8d74hl6k4%3DRUMF4xAQLsbeBhTSRrCiQpJtxoGWeyHrDb5te2jpGskWDFW82F'
HEADERS = {
'authorization': 'Bearer %s' % AUTHORIZATION_TOKEN,
'x-csrf-token': 'XXXXXXXXXXXXXXXXXXXXXXXXXXX',
'cookie': 'ct0=XXXXXXXXXXXXXXXXXXXXXXXXXXXX; auth_token=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
}
GET_USER_URL = 'https://twitter.com/i/api/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName'
GET_TWEETS_URL = 'https://twitter.com/i/api/graphql/G_zHbTiwSqLm0TAK_3sNWQ/Likes'
FIELDNAMES = ['id', 'tweet_url', 'name', 'user_id', 'username', 'published_at', 'content', 'views_count', 'retweet_count', 'likes', 'quote_count', 'reply_count', 'bookmarks_count', 'medias']
class TwitterScraper:
def __init__(self, username):
self.HEADERS = HEADERS
assert username
self.username = username
def get_user(self):
# We recover the user_id required to go ahead
arg = {"screen_name": self.username, "withSafetyModeUserFields": True}
params = {
'variables': json.dumps(arg),
'features': FEATURES_USER,
}
response = requests.get(
GET_USER_URL,
params=params,
headers=self.HEADERS
)
try:
json_response = response.json()
except requests.exceptions.JSONDecodeError:
#print(response.status_code)
#print(response.text)
raise
result = json_response.get("data", {}).get("user", {}).get("result", {})
legacy = result.get("legacy", {})
return {
"id": result.get("rest_id"),
"username": self.username,
"full_name": legacy.get("name")
}
def iter_tweets(self, limit=20):
# The main navigation method
#print(f"[+] scraping: {self.username}")
_user = self.get_user()
full_name = _user.get("full_name")
user_id = _user.get("id")
if not user_id:
print("/!\\ error: no user id found")
raise NotImplementedError
cursor = None
_tweets = []
while True:
var = {
"userId": user_id,
"count": 80,
"cursor": cursor,
"includePromotedContent": False,
"withQuickPromoteEligibilityTweetFields": True,
"withVoice": True,
"withV2Timeline": True
}
params = {
'variables': json.dumps(var),
'features': FEATURES_TWEETS,
}
response = requests.get(
GET_TWEETS_URL,
params=params,
headers=self.HEADERS,
)
json_response = response.json()
# XXX
print(json.dumps(json_response))
if len(_tweets) >= limit or cursor is None or len(entries) == 2:
break
return _tweets
def main():
s = time.perf_counter()
argparser = argparse.ArgumentParser()
argparser.add_argument('--username', '-u', type=str, required=False, help='user to scrape likes from', default='elonmusk')
argparser.add_argument('--limit', '-l', type=int, required=False, help='max likes to scrape', default=100)
args = argparser.parse_args()
username = args.username
limit = args.limit
assert all([username, limit])
twitter_scraper = TwitterScraper(username)
tweets = twitter_scraper.iter_tweets(limit=limit)
#print('elapsed %s' % (time.perf_counter()-s))
if __name__ == '__main__':
main()
@cmj
Copy link
Author

cmj commented Feb 18, 2024

privacydev-howto

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment