Skip to content

Instantly share code, notes, and snippets.

@consideRatio
Last active September 29, 2023 14:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save consideRatio/7b5b8e65f0e90b3c56b5eff3a4038560 to your computer and use it in GitHub Desktop.
Save consideRatio/7b5b8e65f0e90b3c56b5eff3a4038560 to your computer and use it in GitHub Desktop.
# This is a JupyterHub Helm chart (z2jh) configuration file that:
#
# - injects a Python script to /tmp/cleanup-orphaned-pods.py via hub.extraFiles
# - defines a managed JupyterHub service to run the Python script with
# permissions ask the JupyterHub REST API about what users' servers are active
#
# It was developed to help cleanup user server pods that could end up orphaned
# by JupyterHub using KubeSpawner 5.0-6.0 or the z2jh versioned 3.0. For more
# information, visit this forum post:
# https://discourse.jupyter.org/t/how-to-cleanup-orphaned-user-pods-after-bug-in-z2jh-3-0-and-kubespawner-6-0/21677
#
hub:
extraConfig:
cleanup-orphaned-pods: |
import os
import sys
c.JupyterHub.services.append({
"name": "cleanup-orphaned-pods",
"command": [sys.executable, "/tmp/cleanup-orphaned-pods.py"],
"environment": {
"POD_NAMESPACE": os.environ["POD_NAMESPACE"],
"HELM_RELEASE_NAME": os.environ["HELM_RELEASE_NAME"],
"KUBERNETES_SERVICE_HOST": os.environ["KUBERNETES_SERVICE_HOST"],
"KUBERNETES_SERVICE_PORT": os.environ["KUBERNETES_SERVICE_PORT"],
},
})
c.JupyterHub.load_roles.append({
"name": "cleanup-orphaned-pods",
"scopes": ["list:users", "read:servers"],
"services": ["cleanup-orphaned-pods"],
})
extraFiles:
cleanup-orphaned-pods:
mountPath: /tmp/cleanup-orphaned-pods.py
stringData: |
"""
Cleanup orphaned user server pods
Compares JupyterHub API list of running servers to list of running pods
in kubernetes in order to identify discrepancies.
This script is to be used once as a managed JupyterHub service by z2jh
deployment of versioned 3.1 and later, as could be needed if the z2jh deployment
once has been running version 3.0 - this could have led to orphaned user server
pods.
More information, including how to run this, is available at
https://discourse.jupyter.org/t/how-to-cleanup-orphaned-user-pods-after-bug-in-z2jh-3-0-and-kubespawner-6-0/21677
"""
import asyncio
import json
import os
import logging
from urllib.parse import urlencode
from tornado.httpclient import AsyncHTTPClient
from kubernetes_asyncio import client, config
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__file__)
async def get_running_servers(api_url, api_token):
"""Get users' running servers using JupyterHub's REST API"""
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
http_client = AsyncHTTPClient()
api_url = api_url.rstrip("/")
users_url = api_url + "/users"
headers = {
"Authorization": f"Bearer {api_token}",
"Accept": "application/jupyterhub-pagination+json",
}
running = {}
params = {"state": "active", "limit": 200}
next_params = {"offset": "0"}
while next_params:
params.update(next_params)
url = users_url + "?" + urlencode(params)
r = await http_client.fetch(url, headers=headers)
page = json.loads(r.body)
for user in page["items"]:
for server_name, server in user["servers"].items():
running[f"{user['name']}/{server_name}"] = server
next_params = page["_pagination"]["next"]
return running
async def get_user_pods(api_client, namespace, helm_release_name):
"""Get users' server pods running in Kubernetes"""
label_selector = f"release={helm_release_name},component=singleuser-server"
kwargs = {
"label_selector": label_selector,
"_preload_content": False,
}
r = await api_client.list_namespaced_pod(namespace, **kwargs)
r = json.loads(await r.read())
pods = r["items"]
user_pods = {}
for pod in pods:
annotations = pod["metadata"]["annotations"]
username = annotations["hub.jupyter.org/username"]
servername = annotations.get("hub.jupyter.org/servername", "")
key = f"{username}/{servername}"
user_pods[key] = pod
return user_pods
async def main():
namespace = os.environ["POD_NAMESPACE"]
helm_release_name = os.environ["HELM_RELEASE_NAME"]
api_url = os.environ["JUPYTERHUB_API_URL"]
api_token = os.environ["JUPYTERHUB_API_TOKEN"]
config.load_incluster_config()
k8s_api_client = client.CoreV1Api()
pods = await get_user_pods(k8s_api_client, namespace, helm_release_name)
servers = await get_running_servers(api_url, api_token)
orphaned_pods = set(pods).difference(servers)
log.info(f"Found {len(servers)} active user servers according to JupyterHub")
log.info(f"Found {len(pods)} active user server pods according to Kubernetes")
log.info(f"{len(orphaned_pods)} user server pods are orphaned")
pod_names = []
for server_name in orphaned_pods:
pod = pods[server_name]
pod_name = pod["metadata"]["name"]
pod_names.append(pod_name)
log.info(f"Found orphaned pod {pod_name} for {server_name}")
for pod in pod_names:
try:
await k8s_api_client.delete_namespaced_pod(pod, namespace)
except:
log.warn(f"Failed to delete orphaned pod {pod}")
else:
log.info(f"Successfully deleted orphaned pod {pod}")
log.info("Cleanup of orphaned pods complete.")
await k8s_api_client.api_client.close()
if __name__ == "__main__":
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.create_task(main())
loop.run_forever()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment