Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save judge2020/79d0a813c29d5996e254ef2d00f86e6b to your computer and use it in GitHub Desktop.
Save judge2020/79d0a813c29d5996e254ef2d00f86e6b to your computer and use it in GitHub Desktop.
Simple script to plot your Discord machine-learning predicted age and gender from the data dump.
import glob
import json
from datetime import datetime
from matplotlib import pyplot
age_ts = []
age_keys = ["prob_13_17", "prob_18_24", "prob_25_34", "prob_35_over"]
age_lists = {k:[] for k in age_keys}
gen_ts = []
gen_keys = ["prob_male", "prob_female", "prob_non_binary_gender_expansive"]
gen_lists = {k:[] for k in gen_keys}
activity_files = glob.glob("./events-*-*-of-*.json")
for activity_file in activity_files:
print("Processing", activity_file)
with open(activity_file, "r") as f:
for l in f:
if ',"predicted_' in l:
j = json.loads(l)
if "predicted_age" in j:
age_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", "")))
for key in age_keys:
age_lists[key].append(j.get(key))
if "predicted_gender" in j:
gen_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", "")))
for key in gen_lists:
gen_lists[key].append(j.get(key))
def sort_data(ts, data):
combined = list(zip(ts, *data.values()))
combined.sort(key=lambda x: x[0])
sorted_ts = [x[0] for x in combined]
sorted_data = {k: [x[i + 1] for x in combined] for i, k in enumerate(data.keys())}
return sorted_ts, sorted_data
if len(age_ts):
age_ts, age_lists = sort_data(age_ts, age_lists)
pyplot.title("Discord predicted age")
for key in age_keys:
pyplot.plot(age_ts, age_lists[key], marker="o")
pyplot.legend(["13-17", "18-24", "25-34", "35+"])
pyplot.show()
if len(gen_ts):
gen_ts, gen_lists = sort_data(gen_ts, gen_lists)
pyplot.title("Discord predicted gender")
for key in gen_keys:
pyplot.plot(gen_ts, gen_lists[key], marker="o")
pyplot.legend(["male", "female", "non-binary"])
pyplot.show()
if not len(activity_files):
input("Couldn't find your activity file! Make sure you have a file named events-YEAR-XXXXX-of-XXXXX.json in the current directory. It should be in activity/analytics/ in your Discord data dump.")
elif not len(age_ts) + len(gen_ts):
input("No predicted data found for you :(")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment