Created
October 4, 2024 09:57
-
-
Save minrk/b768971cd1158ea1ef3809d01ebb9db2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Compare slug changes for kubespawner 7 | |
To be run in an active jupyterhub deployment, via: | |
cat compare_slugs.py | kubectl exec -it $(kubectl get pod -l component=hub -o name) -- python3 - | |
""" | |
import argparse | |
import asyncio | |
import json | |
import sys | |
from dataclasses import dataclass | |
from difflib import unified_diff | |
from pathlib import Path | |
from subprocess import check_call, check_output | |
import yaml | |
import jupyterhub | |
from jupyterhub.utils import maybe_future | |
from packaging.version import parse as _v | |
from jupyterhub import orm | |
from jupyterhub.app import JupyterHub | |
from sqlalchemy import create_engine | |
from sqlalchemy.orm import sessionmaker | |
@dataclass | |
class MockUser: | |
name: str | |
id: int = 0 | |
@property | |
def url(self): | |
# not really used, but accessed so must be defined | |
return f"/users/{self.name}/" | |
async def _check_pvc_exists(self): | |
# extract handling of existing pvcs from kubespawner._start | |
# should we make this a method, just to enable this script? | |
if ( | |
self.handle_legacy_names | |
and self.remember_pvc_name | |
and not self._pvc_exists | |
and self._state_kubespawner_version == "unknown" | |
): | |
# pvc name wasn't reliably persisted before kubespawner 7, | |
# so if the name changed check if a pvc with the legacy name exists and use it. | |
# This will be persisted in state on next launch in the future, | |
# so the comparison below will be False for launches after the first. | |
# this check will only work if pvc_name_template itself has not changed across the upgrade. | |
legacy_pvc_name = self._expand_user_properties( | |
self.pvc_name_template, slug_scheme="escape" | |
) | |
if legacy_pvc_name != self.pvc_name: | |
self.log.debug( | |
f"Checking for legacy-named pvc {legacy_pvc_name} for {self.user.name}" | |
) | |
if await self._check_pvc_exists(self.pvc_name, self.namespace): | |
# if current name exists: use it | |
self._pvc_exists = True | |
else: | |
# current name doesn't exist, check if legacy name exists | |
if await self._check_pvc_exists(legacy_pvc_name, self.namespace): | |
# legacy name exists, use it to avoid data loss | |
self.log.warning( | |
f"Using legacy pvc {legacy_pvc_name} for {self.user.name}" | |
) | |
self.pvc_name = legacy_pvc_name | |
self._pvc_exists = True | |
async def compare_user(username, server_name, spawner_state, config): | |
"""Compare templated fields for a given Spawner""" | |
from kubespawner import KubeSpawner | |
user_server = f"{username}/{server_name}".rstrip() | |
before = KubeSpawner( | |
_mock=True, | |
user=MockUser(name=username), | |
name=server_name, | |
config=config, | |
slug_scheme="escape", | |
) | |
after = KubeSpawner( | |
_mock=True, | |
user=MockUser(name=username), | |
name=server_name, | |
config=config, | |
slug_scheme="safe", | |
) | |
if spawner_state: | |
before.load_state(spawner_state) | |
after.load_state(spawner_state) | |
if after.storage_pvc_ensure: | |
# handle pvc name change | |
await _check_pvc_exists(before) | |
await _check_pvc_exists(after) | |
changed = False | |
to_check = ["pod_name", "namespace", "working_dir"] | |
if after.storage_pvc_ensure: | |
to_check.append("pvc_name") | |
for attr in to_check: | |
before_attr = getattr(before, attr) | |
after_attr = getattr(after, attr) | |
if after_attr == before_attr: | |
after_attr = "same" | |
else: | |
if not changed: | |
print(user_server, "(no state)" if not spawner_state else "") | |
changed = True | |
print(" " + attr, "before:", before_attr) | |
print(" " * (len(attr) + 2), " after:", after_attr) | |
# next, diff the actual pods themselves | |
before_pod = await before.get_pod_manifest() | |
if before.modify_pod_hook: | |
before_pod = await maybe_future(before.modify_pod_hook(before, before_pod)) | |
after_pod = await after.get_pod_manifest() | |
if after.modify_pod_hook: | |
after_pod = await maybe_future(after.modify_pod_hook(after, after_pod)) | |
bp = yaml.dump(before_pod.to_dict()) | |
ap = yaml.dump(after_pod.to_dict()) | |
if before_pod != after_pod: | |
if not changed: | |
print(user_server, "(no state)" if not spawner_state else "") | |
changed = True | |
print( | |
"".join( | |
unified_diff( | |
bp.splitlines(True), | |
ap.splitlines(True), | |
f"{user_server} before", | |
f"{user_server} after", | |
) | |
) | |
) | |
return changed | |
def make_env(): | |
"""Create an env and inject it on sys.path""" | |
env_path = Path("/tmp/test-env") | |
env_py = env_path / "bin" / "python" | |
if not env_path.exists(): | |
check_call([sys.executable, "-m", "venv", str(env_path)], stdin=None) | |
check_call( | |
[ | |
str(env_py), | |
"-m", | |
"pip", | |
"install", | |
"-q", | |
"jupyterhub-kubespawner>=7.0.0b2", | |
], | |
stdin=None, | |
) | |
# inject kubespawner env to front of sys.path | |
# this allows us to continue without launching a new process | |
# which helps when piping this script via `kubectl exec` | |
venv_path = check_output( | |
[str(env_py), "-c", "import sys, json; print(json.dumps(sys.path))"], text=True | |
) | |
sys.path[:0] = json.loads(venv_path) | |
return env_py | |
def compare_users(config_file): | |
"""Report changes in user pods | |
Loads real jupyterhub config and users from the database | |
For each user/server, compare differences between 'escape' slug scheme | |
and new 'safe' slug scheme. | |
Does not actually compare with the old kubespawner version | |
""" | |
hub = JupyterHub() | |
hub.load_config_file(config_file) | |
engine = create_engine(hub.db_url, **hub.db_kwargs) | |
db = sessionmaker(engine)() | |
def no_commit(): | |
raise RuntimeError("this should be read only") | |
db.commit = no_commit | |
total_count = changed_count = 0 | |
for username, server_name, spawner_state in db.query( | |
orm.User.name, orm.Spawner.name, orm.Spawner.state | |
).outerjoin(orm.Spawner, orm.Spawner.user_id == orm.User.id): | |
if server_name is None: | |
server_name = "" | |
changed = asyncio.run( | |
compare_user(username, server_name, spawner_state or {}, hub.config) | |
) | |
total_count += 1 | |
changed_count += changed | |
print(f"{changed_count}/{total_count} user pods changed") | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-f", dest="config_file", default="jupyterhub_config.py") | |
args = parser.parse_args() | |
config_file = Path(args.config_file).resolve() | |
jupyterhub_version = _v(jupyterhub.__version__) | |
if jupyterhub_version < _v("7.0"): | |
print("Creating env for jupyterhub 7") | |
# add kubespawner 7 in a venv | |
make_env() | |
print(f"Comparing users for {config_file}") | |
compare_users(config_file) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment