Skip to content

Instantly share code, notes, and snippets.

@lucahammer
Last active April 17, 2022 06:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lucahammer/8e31fa100446c3b7c783a00bd006003a to your computer and use it in GitHub Desktop.
Save lucahammer/8e31fa100446c3b7c783a00bd006003a to your computer and use it in GitHub Desktop.
Python script to generate a GDF for Gephi to visualize the Mastodon Hashtag Network https://vis.social/web/statuses/100634263439065513
from multiprocessing import Pool
import requests
import datetime
import time
MAX_PROCESSES = 100 #number of python processes that can run in parallel
TOOTS_PER_INSTANCE = 4000 #multiples of 40
MASTOVERSE = ["mstdn.jp","pawoo.net","friends.nico","mastodon.social","mimumedon.com"] #instances you want to examine
def get_local_timeline(instance_url, count=80):
url = 'https://' + instance_url + '/api/v1/timelines/public?local=true&limit=40'
r = requests.get(url, timeout=10)
timeline = r.json()
max_id = 0
got_new_toots = True
while 'next' in r.links and len(timeline) < count and got_new_toots:
r = requests.get(r.links['next']['url'], timeout=10)
new_toots = r.json()
if len(new_toots) > 0:
if max_id != new_toots[-1]['id']:
max_id = new_toots[-1]['id']
timeline += new_toots
else:
got_new_toots = False
else:
got_new_toots = False
return timeline
def get_hashtags(timeline):
hashtags = []
for toot in timeline:
if len(toot['tags']) > 0:
for tag in toot['tags']:
hashtags.append(tag['name'])
return (hashtags)
instances = dict()
def process_instance(instance):
timeline = get_local_timeline(instance, TOOTS_PER_INSTANCE)
tags = get_hashtags(timeline)
with open('mastodon-hashtags.gdf', 'a', encoding="utf8") as f:
print(instance + ': ' + str(len(tags)))
for tag in tags:
f.write(instance + ',' + tag + ',TRUE\n')
return ("Done: " + instance)
if __name__ == '__main__':
with open('mastodon-hashtags.gdf', 'w', encoding="utf8") as f:
f.write('nodedef>name VARCHAR\n')
with open('mastodon-hashtags.gdf', 'a', encoding="utf8") as f:
f.write('edgedef>node1 VARCHAR,node2 VARCHAR,directed BOOLEAN\n')
with Pool(MAX_PROCESSES) as p:
result = p.map_async(process_instance, MASTOVERSE)
while not result.ready():
print("# # # # # # # # Instances left: {}".format(result._number_left))
time.sleep(10)
print(result.get())
print ("All Done.")
@lucahammer
Copy link
Author

Maybe add https://instances.social/ to automatically populate the mastoverse list.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment