Last active
September 3, 2024 14:30
-
-
Save rebane2001/b0256bde6b999d9477938d5a968ded5c to your computer and use it in GitHub Desktop.
Simple script to plot your Discord machine-learning predicted age and gender from the data dump.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Simple script to plot your Discord machine-learning predicted age and gender from the data dump. | |
To use, simply put this script in your activity/analytics/ folder of your Discord data dump and run it. | |
You may need to install matplotlib first: | |
pip install -U matplotlib | |
thrown together by rebane2001 | |
changelog: | |
2024-05-18T14:40:00Z | |
- add warning for wrong directory | |
- add sorting to the plot | |
2024-05-30 | |
- force UTF-8 encoding (for windows) | |
""" | |
import os | |
import glob | |
import json | |
from datetime import datetime | |
from matplotlib import pyplot | |
age_ts = [] | |
age_keys = ["prob_13_17", "prob_18_24", "prob_25_34", "prob_35_over"] | |
age_lists = {k:[] for k in age_keys} | |
gen_ts = [] | |
gen_keys = ["prob_male", "prob_female", "prob_non_binary_gender_expansive"] | |
gen_lists = {k:[] for k in gen_keys} | |
activity_files = glob.glob("./events-*-*-of-*.json") | |
if not os.getcwd().replace("\\", "/").endswith("activity/analytics"): | |
print("Warning: you don't seem to be in the activity/analytics/ directory!") | |
for activity_file in activity_files: | |
print("Processing", activity_file) | |
with open(activity_file, "r", encoding="UTF-8") as f: | |
for l in f: | |
if ',"predicted_' in l: | |
j = json.loads(l) | |
if "predicted_age" in j: | |
age_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", ""))) | |
for key in age_keys: | |
age_lists[key].append(j.get(key)) | |
if "predicted_gender" in j: | |
gen_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", ""))) | |
for key in gen_lists: | |
gen_lists[key].append(j.get(key)) | |
if len(age_ts): | |
pyplot.title(f"Discord predicted age") | |
for key in age_keys: | |
pyplot.plot(*zip(*sorted(zip(age_ts, age_lists[key]))), marker="o") | |
pyplot.legend(["13-17", "18-24", "25-34", "35+"]) | |
pyplot.show() | |
if len(gen_ts): | |
pyplot.title(f"Discord predicted gender") | |
for key in gen_keys: | |
pyplot.plot(*zip(*sorted(zip(gen_ts, gen_lists[key]))), marker="o") | |
pyplot.legend(["male", "female", "non-binary"]) | |
pyplot.show() | |
if not len(activity_files): | |
input("Couldn't find your activity file! Make sure you have a file named events-YEAR-XXXXX-of-XXXXX.json in the current directory. It should be in activity/analytics/ in your Discord data dump.") | |
elif not len(age_ts) + len(gen_ts): | |
input("No predited data found for you :(") |
had an issue with the text encoding, fix was just changing open(activity_file, "r")
to open(activity_file, "r", encoding="utf8")
I encountered the same issue as PatheticMustan, running Windows 11 Home 23H2, their change fixes the issue. I also recommend adding os.chdir(os.path.dirname(os.path.abspath(__file__)))
so the issues with the working directory not being where the file is (such as by right-clicking on the file and choosing "Open with Python") can be easily prevented for user convenience.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Had an issue where the first and last points were connected, chatgpt fixed it by sorting
https://gist.github.com/judge2020/79d0a813c29d5996e254ef2d00f86e6b/revisions