Skip to content

Instantly share code, notes, and snippets.

@minrk
Created October 4, 2024 09:57
Show Gist options
  • Save minrk/b768971cd1158ea1ef3809d01ebb9db2 to your computer and use it in GitHub Desktop.
Save minrk/b768971cd1158ea1ef3809d01ebb9db2 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Compare slug changes for kubespawner 7
To be run in an active jupyterhub deployment, via:
cat compare_slugs.py | kubectl exec -it $(kubectl get pod -l component=hub -o name) -- python3 -
"""
import argparse
import asyncio
import json
import sys
from dataclasses import dataclass
from difflib import unified_diff
from pathlib import Path
from subprocess import check_call, check_output
import yaml
import jupyterhub
from jupyterhub.utils import maybe_future
from packaging.version import parse as _v
from jupyterhub import orm
from jupyterhub.app import JupyterHub
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
@dataclass
class MockUser:
name: str
id: int = 0
@property
def url(self):
# not really used, but accessed so must be defined
return f"/users/{self.name}/"
async def _check_pvc_exists(self):
# extract handling of existing pvcs from kubespawner._start
# should we make this a method, just to enable this script?
if (
self.handle_legacy_names
and self.remember_pvc_name
and not self._pvc_exists
and self._state_kubespawner_version == "unknown"
):
# pvc name wasn't reliably persisted before kubespawner 7,
# so if the name changed check if a pvc with the legacy name exists and use it.
# This will be persisted in state on next launch in the future,
# so the comparison below will be False for launches after the first.
# this check will only work if pvc_name_template itself has not changed across the upgrade.
legacy_pvc_name = self._expand_user_properties(
self.pvc_name_template, slug_scheme="escape"
)
if legacy_pvc_name != self.pvc_name:
self.log.debug(
f"Checking for legacy-named pvc {legacy_pvc_name} for {self.user.name}"
)
if await self._check_pvc_exists(self.pvc_name, self.namespace):
# if current name exists: use it
self._pvc_exists = True
else:
# current name doesn't exist, check if legacy name exists
if await self._check_pvc_exists(legacy_pvc_name, self.namespace):
# legacy name exists, use it to avoid data loss
self.log.warning(
f"Using legacy pvc {legacy_pvc_name} for {self.user.name}"
)
self.pvc_name = legacy_pvc_name
self._pvc_exists = True
async def compare_user(username, server_name, spawner_state, config):
"""Compare templated fields for a given Spawner"""
from kubespawner import KubeSpawner
user_server = f"{username}/{server_name}".rstrip()
before = KubeSpawner(
_mock=True,
user=MockUser(name=username),
name=server_name,
config=config,
slug_scheme="escape",
)
after = KubeSpawner(
_mock=True,
user=MockUser(name=username),
name=server_name,
config=config,
slug_scheme="safe",
)
if spawner_state:
before.load_state(spawner_state)
after.load_state(spawner_state)
if after.storage_pvc_ensure:
# handle pvc name change
await _check_pvc_exists(before)
await _check_pvc_exists(after)
changed = False
to_check = ["pod_name", "namespace", "working_dir"]
if after.storage_pvc_ensure:
to_check.append("pvc_name")
for attr in to_check:
before_attr = getattr(before, attr)
after_attr = getattr(after, attr)
if after_attr == before_attr:
after_attr = "same"
else:
if not changed:
print(user_server, "(no state)" if not spawner_state else "")
changed = True
print(" " + attr, "before:", before_attr)
print(" " * (len(attr) + 2), " after:", after_attr)
# next, diff the actual pods themselves
before_pod = await before.get_pod_manifest()
if before.modify_pod_hook:
before_pod = await maybe_future(before.modify_pod_hook(before, before_pod))
after_pod = await after.get_pod_manifest()
if after.modify_pod_hook:
after_pod = await maybe_future(after.modify_pod_hook(after, after_pod))
bp = yaml.dump(before_pod.to_dict())
ap = yaml.dump(after_pod.to_dict())
if before_pod != after_pod:
if not changed:
print(user_server, "(no state)" if not spawner_state else "")
changed = True
print(
"".join(
unified_diff(
bp.splitlines(True),
ap.splitlines(True),
f"{user_server} before",
f"{user_server} after",
)
)
)
return changed
def make_env():
"""Create an env and inject it on sys.path"""
env_path = Path("/tmp/test-env")
env_py = env_path / "bin" / "python"
if not env_path.exists():
check_call([sys.executable, "-m", "venv", str(env_path)], stdin=None)
check_call(
[
str(env_py),
"-m",
"pip",
"install",
"-q",
"jupyterhub-kubespawner>=7.0.0b2",
],
stdin=None,
)
# inject kubespawner env to front of sys.path
# this allows us to continue without launching a new process
# which helps when piping this script via `kubectl exec`
venv_path = check_output(
[str(env_py), "-c", "import sys, json; print(json.dumps(sys.path))"], text=True
)
sys.path[:0] = json.loads(venv_path)
return env_py
def compare_users(config_file):
"""Report changes in user pods
Loads real jupyterhub config and users from the database
For each user/server, compare differences between 'escape' slug scheme
and new 'safe' slug scheme.
Does not actually compare with the old kubespawner version
"""
hub = JupyterHub()
hub.load_config_file(config_file)
engine = create_engine(hub.db_url, **hub.db_kwargs)
db = sessionmaker(engine)()
def no_commit():
raise RuntimeError("this should be read only")
db.commit = no_commit
total_count = changed_count = 0
for username, server_name, spawner_state in db.query(
orm.User.name, orm.Spawner.name, orm.Spawner.state
).outerjoin(orm.Spawner, orm.Spawner.user_id == orm.User.id):
if server_name is None:
server_name = ""
changed = asyncio.run(
compare_user(username, server_name, spawner_state or {}, hub.config)
)
total_count += 1
changed_count += changed
print(f"{changed_count}/{total_count} user pods changed")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-f", dest="config_file", default="jupyterhub_config.py")
args = parser.parse_args()
config_file = Path(args.config_file).resolve()
jupyterhub_version = _v(jupyterhub.__version__)
if jupyterhub_version < _v("7.0"):
print("Creating env for jupyterhub 7")
# add kubespawner 7 in a venv
make_env()
print(f"Comparing users for {config_file}")
compare_users(config_file)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment