Skip to content

Instantly share code, notes, and snippets.

@ashwinvis
Last active July 13, 2019 18:44
Show Gist options
  • Save ashwinvis/c94eb4f6bab60cc633627575d507fda8 to your computer and use it in GitHub Desktop.
Save ashwinvis/c94eb4f6bab60cc633627575d507fda8 to your computer and use it in GitHub Desktop.
View statistics and organize your Mastodon following into lists
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Prepare data as a pandas dataframe"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"Before you start, install all dependencies and get an archive\n",
"\n",
"```sh\n",
"pip install -r requirements.txt\n",
"mastodon-archive archive --with-following ashwinvis@mastodon.acc.sunet.se \n",
"```"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from mastodon_archive import core\n",
"\n",
"username = \"ashwinvis\"\n",
"domain = \"mastodon.acc.sunet.se\"\n",
"\n",
"\n",
"def get_data():\n",
" status_file = domain + '.user.' + username + '.json'\n",
" data = core.load(status_file, required = True, quiet = True)\n",
" return data\n",
"\n",
"\n",
"def age_in_days(timestamp):\n",
" age = datetime.utcnow() - timestamp\n",
" return age.days"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"df_init = pd.DataFrame(get_data()[\"following\"])\n",
"\n",
"# Convert strings to timestamps\n",
"df_init.created_at = df_init.created_at.apply(pd.Timestamp).dt.tz_convert(None)\n",
"\n",
"# New column: age\n",
"df_init[\"age\"] = df_init.created_at.apply(age_in_days)\n",
"\n",
"# New column: statuses per day to calculate volume\n",
"df_init[\"statuses_per_day\"] = df_init.statuses_count / df_init.age\n",
"\n",
"# Don't put bots in lists\n",
"df_init = df_init.query(\"not bot\") \n",
"df = df_init.set_index('username')\n",
"df.head()"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Moved users"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"for user in df.moved.dropna():\n",
" print(user[\"acct\"])"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Filter users by no. of statuses per day"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"df.statuses_per_day.plot.hist(bins=100)\n",
"plt.xlabel(\"No. of statuses per day\")\n",
"plt.ylabel(\"No. of users\")"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"from math import inf\n",
"\n",
"def filter_statuses_per_day(low=0, high=inf):\n",
" _ = df.sort_values('statuses_per_day', ascending=False)\n",
" _ = _.query(f'({low} <= statuses_per_day) & (statuses_per_day < {high})')\n",
" return _.filter(items=[\"id\", \"statuses_per_day\"])"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"df.statuses_per_day.describe()"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## Classify by volume"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"from enum import Enum\n",
"\n",
"level = [0.1, 1, 10]\n",
"\n",
"class Volumes(Enum):\n",
" High = (level[2], inf)\n",
" Mid = level[1:]\n",
" Low = level[0:2]\n",
" Inactive = (0, level[0])"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"for v in Volumes:\n",
" v.users = filter_statuses_per_day(*v.value)"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"Volumes.High.users"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"Volumes.Mid.users"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"Volumes.Low.users"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"Volumes.Inactive.users"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Organize into lists"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"from util import login, get_list, list_accounts_addrm\n",
"\n",
"mastodon = login('rw')\n",
"\n",
"for v in (Volumes.High, Volumes.Mid, Volumes.Low):\n",
" list_id = get_list(mastodon, v.name)[\"id\"]\n",
" list_accounts_addrm(mastodon, list_id, v.users.id)"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Unfollow some inactive users"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"excluded = ('postmarketOS', 'gokuldas2', 'the_compiler', 'ashwinvis', 'gnome', 'mozilla')\n",
"unfollow = Volumes.Inactive.users.query(f\"username not in {excluded}\")\n",
"unfollow"
],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"unfollow.id.apply(mastodon.account_unfollow)"
],
"outputs": [],
"execution_count": null,
"metadata": {}
}
],
"metadata": {
"kernelspec": {
"name": "py-mastodon-bqk7dv3r",
"language": "python",
"display_name": "py-mastodon-bqK7dv3R"
},
"language_info": {
"name": "python",
"version": "3.7.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"kernel_info": {
"name": "py-mastodon-bqk7dv3r"
},
"nteract": {
"version": "0.14.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
mastodon-archive
ipykernel
pandas
matplotlib
#
# This file is autogenerated by pip-compile
# To update, run:
#
# pip-compile
#
asn1crypto==0.24.0 # via cryptography
backcall==0.1.0 # via ipython
certifi==2018.10.15 # via requests
cffi==1.11.5 # via cryptography
chardet==3.0.4 # via requests
cryptography==2.3.1 # via http-ece, mastodon.py
cycler==0.10.0 # via matplotlib
decorator==4.3.0 # via ipython, mastodon.py, traitlets
html2text==2018.1.9 # via mastodon-archive
http-ece==1.0.5 # via mastodon.py
idna==2.7 # via cryptography, requests
ipykernel==5.1.1
ipython-genutils==0.2.0 # via traitlets
ipython==7.6.1 # via ipykernel
jedi==0.14.0 # via ipython
jupyter-client==5.3.1 # via ipykernel
jupyter-core==4.5.0 # via jupyter-client
kiwisolver==1.1.0 # via matplotlib
mastodon-archive==1.1.0
mastodon.py==1.3.1 # via mastodon-archive
matplotlib==3.1.1
numpy==1.16.4 # via matplotlib, pandas
pandas==0.24.2
parso==0.5.0 # via jedi
pexpect==4.7.0 # via ipython
pickleshare==0.7.5 # via ipython
progress==1.4 # via mastodon-archive
prompt-toolkit==2.0.9 # via ipython
ptyprocess==0.6.0 # via pexpect
pycparser==2.19 # via cffi
pygments==2.4.2 # via ipython
pyparsing==2.4.0 # via matplotlib
python-dateutil==2.7.3 # via jupyter-client, mastodon.py, matplotlib, pandas
pytz==2018.5 # via mastodon.py, pandas
pyzmq==18.0.2 # via jupyter-client
requests==2.19.1 # via mastodon.py
six==1.11.0 # via cryptography, cycler, mastodon.py, prompt-toolkit, python-dateutil, traitlets
tornado==6.0.3 # via ipykernel, jupyter-client
traitlets==4.3.2 # via ipykernel, ipython, jupyter-client, jupyter-core
urllib3==1.23 # via requests
wcwidth==0.1.7 # via prompt-toolkit
# The following packages are considered to be unsafe in a requirements file:
# setuptools==41.0.1 # via ipython, kiwisolver
# -*- coding: utf-8 -*-
"""Bin followers by frecency
Some of the code snippet is copied as is from mastodon_archive.core.login with
minor modification to *.secret filenames. Therefor:
Copyright (C) 2019 Ashwin Vishnu Mohanan <ashwinvis+gh@protonmail.com>
Copyright (C) 2017-2018 Alex Schroeder <alex@gnu.org>
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <http://www.gnu.org/licenses/>.
"""
import os
import sys
from dataclasses import dataclass
from mastodon import Mastodon
from mastodon_archive import core
# FIXME: Change username and domain url
username = "ashwinvis"
domain = "mastodon.acc.sunet.se"
@dataclass
class args:
"""Mimic args for mastodon-archive CLI"""
user = f"{username}@{domain}"
pace = False
def login(scope="r"):
global username
global domain
if len(scope) == "r":
from mastodon_archive.core import read
return read(args)
else:
client_secret = domain + ".rw.client.secret"
user_secret = domain + ".rw.user." + username + ".secret"
scopes = ["read", "write", "follow"]
pace = hasattr(args, "pace") and args.pace
(username, domain) = args.user.split("@")
url = "https://" + domain
# client_secret = domain + '.client.secret'
# user_secret = domain + '.user.' + username + '.secret'
mastodon = None
if not os.path.isfile(client_secret):
print("Registering app")
Mastodon.create_app(
"mastodon-organize",
api_base_url=url,
scopes=scopes,
to_file=client_secret,
)
if not os.path.isfile(user_secret):
print("This app needs access to your Mastodon account.")
mastodon = Mastodon(client_id=client_secret, api_base_url=url)
url = mastodon.auth_request_url(
client_id=client_secret, scopes=scopes
)
print("Visit the following URL and authorize the app:")
print(url)
print("Then paste the access token here:")
token = sys.stdin.readline().rstrip()
try:
# on the very first login, --pace has no effect
mastodon.log_in(code=token, to_file=user_secret, scopes=scopes)
except Exception as e:
print(
"Sadly, that did not work. On some sites, this login mechanism"
)
print(
"(namely OAuth) seems to be broken. There is an alternative"
)
print(
"if you are willing to trust us with your password just this"
)
print("once. We need it just this once in order to get an access")
print(
"token. We won't save it. If you don't want to try this, use"
)
print(
"Ctrl+C to quit. If you want to try it, please provide your"
)
print("login details.")
sys.stdout.write("Email: ")
sys.stdout.flush()
email = sys.stdin.readline().rstrip()
sys.stdout.write("Password: ")
sys.stdout.flush()
password = sys.stdin.readline().rstrip()
# on the very first login, --pace has no effect
mastodon.log_in(
username=email,
password=password,
to_file=user_secret,
scopes=scopes,
)
else:
if pace:
# in case the user kept running into a General API problem
mastodon = Mastodon(
client_id=client_secret,
access_token=user_secret,
api_base_url=url,
ratelimit_method="pace",
ratelimit_pacefactor=0.9,
request_timeout=300,
)
else:
# the defaults are ratelimit_method='wait',
# ratelimit_pacefactor=1.1, request_timeout=300
mastodon = Mastodon(
client_id=client_secret,
access_token=user_secret,
api_base_url=url,
)
return mastodon
def get_data():
status_file = domain + ".user." + username + ".json"
data = core.load(status_file, required=True, quiet=True)
return data
def get_list(mastodon, title):
"""Returns a list by title"""
list = [
list_dict for list_dict in mastodon.lists() if list_dict["title"] == title
]
if list:
return list.pop()
else:
return mastodon.list_create(title)
def list_accounts_addrm(mastodon, list_id, account_ids_new):
"""Fill a list with the prescribed accounts and remove accounts which are not mentioned."""
accounts_present = mastodon.list_accounts(list_id)
account_ids_present = set(acct["id"] for acct in accounts_present)
account_ids_new = set(account_ids_new)
ids_to_rm = account_ids_present - account_ids_new
ids_to_add = account_ids_new - account_ids_present
if ids_to_rm:
print("Deleting from list", list_id, ids_to_rm)
mastodon.list_accounts_delete(list_id, ids_to_rm)
if ids_to_add:
print("Adding to list", list_id, ids_to_add)
mastodon.list_accounts_add(list_id, ids_to_add)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment