Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Generates a GDF of the Fediverse https://vis.social/@Luca/100606625507856187
'''
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
'''
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Generates a network file of the Fediverse as GDF. #
# (https://gephi.org/users/supported-graph-formats/gdf-format/) #
# Result can analyzed and visualized with https://gephi.org/ #
# Because the script uses the Mastodon API other platforms like #
# Pleroma, Peertube, Pixelfed, Funkwhale won't have outgoing peers. #
# #
# Tutorial how to use Gephi: #
# lucahammer.com/guide-analyzing-twitter-networks-with-gephi-0-9-1/ #
# #
# GDF from 2018-08-24: #
# https://lucahammer.at/vis/fediverse/2018-08-24-fediverse-GDF.zip #
# Related thread: https://vis.social/@Luca/100606625507856187 #
# GDF by mastodon.social/@Gargron from 2018-01-30 #
# (not generated with this script): #
# https://gist.github.com/Gargron/48e67b1b14723cd178c951fe7f373a38 #
# #
# The script generates three files: #
# - nodes.csv #
# - edges.csv #
# - fediverse.gdf #
# #
# Change SEED to start from a different instance. #
# Change MAX_PROCESSES to change how many requests can run in parallel. #
# (Each process needs about 15MB RAM) #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
SEED = 'mastodon.social'
MAX_PROCESSES = 200
TIMEOUT = 20
NODES_FILENAME = 'fediverse-nodes.csv'
EDGES_FILENAME = 'fediverse-edges.csv'
GDF_FILENAME = 'fediverse.gdf'
import requests
from multiprocessing import Pool
import json
import time
import fuckit
# collect info about instance
def get_instance_info(instance):
url = 'https://' + instance + '/api/v1/instance'
r = requests.get(url, timeout=TIMEOUT)
return (r.json())
# collect connected instances
def get_instance_peers(instance):
url = 'https://' + instance + '/api/v1/instance/peers'
r = requests.get(url, timeout=TIMEOUT)
return (r.json())
# work instance
def process_instance(instance):
data = dict()
print('Processing: ' + str(instance))
try:
data['instance'] = instance
data['info'] = get_instance_info(instance)
data['peers'] = get_instance_peers(instance)
data['status'] = 'success'
return (data)
except:
data['name'] = instance
data['status'] = 'failure'
return (data)
# save data
@fuckit
def save_data(data):
# instance, status, info, peers
#todo_bag.update(data['peers'])
if data['status'] == 'success' and data:
#edges[data['instance']] = data['peers']
#nodes[data['instance']] = data['info']
with open('fediverse-nodes.csv', 'a') as f:
f.write(data['instance']+',')
f.write(str(data['info']['stats']['domain_count'])+',')
f.write(str(data['info']['stats']['status_count'])+',')
f.write(str(data['info']['stats']['user_count'])+',')
f.write(data['info']['version'])
f.write('\n')
with open('fediverse-edges.csv', 'a') as f:
for peer in data['peers']:
f.write(data['instance']+','+peer+',true\n')
done_bag.add(data['instance'])
if __name__ == '__main__':
with open('fediverse-nodes.csv', 'w') as f:
f.write('nodedef>name VARCHAR,domain_count INT,status_count INT,user_count INT,version VARCHAR\n')
with open('fediverse-edges.csv', 'w') as f:
f.write('edgedef>node1 VARCHAR,node2 VARCHAR,directed BOOLEAN\n')
todo_bag = set()
done_bag = set()
nodes = dict() #instance -> info dict
edges = dict() #instance -> [edges]
todo_bag.update(get_instance_peers(SEED))
while len(todo_bag) > 0:
work_bag = []
while len(todo_bag) > 0 and len(work_bag) < MAX_PROCESSES:
instance = todo_bag.pop()
if instance not in done_bag:
work_bag.append(instance)
print("work_bag filled: {} todo_bag: {}".format(str(len(work_bag)), str(len(todo_bag))))
with Pool(MAX_PROCESSES) as p:
result = p.map_async(process_instance, work_bag)
while not result.ready():
print("Instances left in current bag: {}".format(result._number_left))
time.sleep(10)
datalist = result.get()
for data in datalist:
save_data(data)
# combine files
filenames = [NODES_FILENAME, EDGES_FILENAME]
with open(GDF_FILENAME, 'w') as outfile:
for fname in filenames:
with open(fname, 'r') as infile:
for line in infile:
outfile.write(line)
print('All done.')
@lucahammer
Copy link
Author

lucahammer commented Aug 25, 2018

To include data of other platforms than Mastodon the following could be tried:

  • Nodeinfo /nodeinfo/1.0
  • Mastodon’s /api/v1/instance
  • Statusnet-like /api/statusnet/config, /api/statusnet/version
  • PeerTube’s /api/v1/config

Inspiration: https://fediverse.network/info

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment