Created
June 14, 2020 01:03
-
-
Save browserdotsys/e4f81a8b7d34e7ff708d70f3e97ebc92 to your computer and use it in GitHub Desktop.
Rank your followers in a twitter data archive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.7 | |
import json | |
import os | |
import sys | |
def load_json(fp): | |
data = fp.read() | |
return json.loads(data[data.find('['):]) | |
twdir = sys.argv[1] | |
followers = load_json(open(os.path.join(twdir, 'data', 'follower.js'))) | |
tweets = load_json(open(os.path.join(twdir, 'data', 'tweet.js'))) | |
likes = load_json(open(os.path.join(twdir, 'data', 'like.js'))) | |
follower_interacts = {} | |
retweets = {} | |
name_mapping = {} | |
rev_name_mapping = {} | |
for f in followers: | |
uid = f['follower']['accountId'] | |
follower_interacts[uid] = 0 | |
retweets[uid] = 0 | |
for t in tweets: | |
uid = t['tweet'].get('in_reply_to_user_id') | |
if uid and uid in follower_interacts: follower_interacts[uid] += 1 | |
if 'user_mentions' in t['tweet']['entities']: | |
for o in t['tweet']['entities']['user_mentions']: | |
name_mapping[o['id']] = (o['screen_name'], o['name']) | |
rev_name_mapping[o['screen_name'].upper()] = o['id'] | |
if t['tweet']['full_text'].startswith("RT @"): | |
colon_pos = t['tweet']['full_text'].find(':') | |
username = t['tweet']['full_text'][4:colon_pos] | |
uid = rev_name_mapping[username.upper()] | |
if uid in retweets: retweets[uid] += 1 | |
interacts = len([f for f in follower_interacts if follower_interacts[f]]) | |
print(f"Interacted users: {interacts} Followers: {len(followers)}") | |
print() | |
print("Top 10 most replied-to users:") | |
for uid in sorted(follower_interacts, key = lambda k: follower_interacts[k], reverse=True)[:10]: | |
sn, name = name_mapping.get(uid, ("N/A", "N/A")) | |
print(f"{follower_interacts[uid]:4} {uid:20} {sn:16} {name:50}") | |
print() | |
print("Top 10 most retweeted users:") | |
for uid in sorted(retweets, key = lambda k: retweets[k], reverse=True)[:10]: | |
sn, name = name_mapping.get(uid, ("N/A", "N/A")) | |
print(f"{retweets[uid]:4} {uid:20} {sn:16} {name:50}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment