Skip to content

Instantly share code, notes, and snippets.

@ndricca
Last active July 5, 2022 09:35
Show Gist options
  • Save ndricca/f967b50af2450d61f48a341d97f61a93 to your computer and use it in GitHub Desktop.
Save ndricca/f967b50af2450d61f48a341d97f61a93 to your computer and use it in GitHub Desktop.
Find which jupyter notebook is using more memory on server

How to find out which jupyter notebook is using most of memory on a server

UPDATE 2022-07: see python script attached

  1. find PIDs responsibles of most memory usage: ps aux --sort=-%mem | head

  2. look at the name of the JSON used inside the jupyter launcher command, usually is kernel-${kernel_id}.json and get `${kernel_id}

  3. go at the jupyter server endpoint /api/sessions

  4. the endpoint expose a JSON with an array of structures: look for the structure containing the ${kernel_id} found on point 2. In the same structure there is the name of the corresponing notebook:

[
  ...,
  {
    ...,
    "kernel": {
      "id": ${kernel_id},
      ...
      },
    "notebook": {
      "path": ${HERE_IS_THE_PATH_TO_FILE_NAME_OF_THE_NOTEBOOK},
      ...,
      },
  ...
  }
  ...
]
import os
import json
from notebook.notebookapp import list_running_servers
import psutil
import requests
TOP_N_MEMORY=5
def is_jupyter_proc(cmdline_list):
filter_string = "-m ipykernel_launcher"
if len(cmdline_list) > 0 and filter_string in " ".join(cmdline_list):
return True
else:
return False
def get_kernel_id(cmdline_list):
file_name = os.path.split(cmdline_list[-1])[-1]
kernel_id = file_name.replace('kernel-', '').replace('.json', '')
return kernel_id
def get_sessions():
server = next(s for s in list_running_servers())
res = requests.get(requests.compat.urljoin(server["url"], "api/sessions"),
params={"token": server.get("token", "")})
return json.loads(res.text)
def kernel_id_to_notebook_name(kernel_id, sessions):
session = next(s for s in sessions if s['kernel']['id'] == kernel_id)
return session['notebook']['path']
# FIND PROCESSES WITH LARGEST MEMORY USAGE
if __name__=="__main__":
print("Jupyter Notebooks with largest memory usage (top {})".format(TOP_N_MEMORY))
procs = {p.pid: p.info for p in psutil.process_iter(['pid', 'cmdline', 'memory_percent'])}
jupyter_procs = {p: i for p,i in procs.items() if is_jupyter_proc(i['cmdline'])}
top_n_procs = dict(sorted(jupyter_procs.items(), key=lambda j: j[1]['memory_percent'], reverse=True)[:TOP_N_MEMORY])
sessions = get_sessions()
for pid, infos in top_n_procs.items():
print("PID: {} - MEM%: {}% - NAME: {}".format(
pid,
round(infos['memory_percent'], 2),
kernel_id_to_notebook_name(get_kernel_id(infos['cmdline']), sessions)
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment