Skip to content

Instantly share code, notes, and snippets.

@tsvikas
Last active January 18, 2023 21:54
Show Gist options
  • Save tsvikas/378edcaea2610489d40ee3f3e2f7e06e to your computer and use it in GitHub Desktop.
Save tsvikas/378edcaea2610489d40ee3f3e2f7e06e to your computer and use it in GitHub Desktop.
manage jupyter kernels using poetry
#!/usr/bin/env python3
import getpass
import json
import re
import shutil
import socket
import subprocess
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import List
# TODO: change click to typer
import click
from git import Repo, Actor, InvalidGitRepositoryError
# TODO: update to ipykernel ^6
IPYKERNEL_VERSION = "^5.0.0"
@dataclass
class Settings:
kernel_basedir: Path
jupyter_dir: Path
name: str = field(repr=False)
email: str = field(repr=False)
author: Actor = field(init=False)
subdirs_to_ignore: List[str] = field(default_factory=lambda: ["bin"])
def __post_init__(self):
assert self.kernel_basedir.exists()
assert self.jupyter_dir.exists()
# TODO: check that jupyter_dir can "poetry run jupyter/ipython"
# TODO: check that poetry version >= 1
assert self.name
self.author = Actor(self.name, self.email)
@property
def all_kernel_dirs(self):
return [
kernel_dir
for kernel_dir in sorted(self.kernel_basedir.glob("*/"))
if kernel_dir.is_dir()
and (kernel_dir.name not in self.subdirs_to_ignore)
and (kernel_dir != self.kernel_basedir)
and (not (kernel_dir / "do-not-update").exists())
]
@click.group()
@click.option(
"kernel_basedir",
"-k",
type=click.Path(exists=True, file_okay=False, writable=True),
default=Path.home() / "kernels",
)
@click.option(
"jupyter_dir",
"-j",
type=click.Path(exists=True, file_okay=False),
default=Path.home() / "jupyter-venv",
)
@click.option("--name", "-n", envvar="USER")
@click.option("--email", "-e", default=getpass.getuser() + "@" + socket.gethostname())
@click.pass_context
def kernel(
ctx,
kernel_basedir: Path,
jupyter_dir: Path,
name: str,
email: str,
):
"""manage jupyter kernels using poetry"""
ctx.obj = Settings(
kernel_basedir=Path(kernel_basedir),
jupyter_dir=Path(jupyter_dir),
name=name,
email=email,
)
pass_settings = click.make_pass_decorator(Settings)
@kernel.command()
@click.argument("kernel-name", type=str)
@click.option(
"packages",
"-a",
"--add",
type=str,
multiple=True,
metavar="PACKAGE",
help="Package to require with an optional version constraint, "
"e.g. requests:^2.10.0 or requests=2.11.1 "
"(multiple values allowed)",
)
@pass_settings
def create(settings: Settings, kernel_name: str, packages: List[str]):
"""create a new kernel.
create a new kernel named KERNEL_NAME, manage it with git & poetry,
and register it with jupyter
"""
if kernel_name == "":
raise click.ClickException("empty kernel_name")
kernel_dir = settings.kernel_basedir / kernel_name
if kernel_dir.exists():
raise click.ClickException(f"{kernel_dir} exists")
# create directory
kernel_dir.mkdir(exist_ok=False)
# create repo
repo = Repo.init(kernel_dir)
# init poetry
subprocess.run(
["poetry", "init"]
+ ["--name", kernel_dir.name]
+ ["--author", settings.author.name]
+ ["--description", "ipython-kernel [auto-generated]"]
+ sum([["--dependency", package] for package in packages], [])
+ [f"--dev-dependency=ipykernel:{IPYKERNEL_VERSION}"]
+ ["--no-interaction", "--quiet"],
check=True,
cwd=kernel_dir,
)
repo.index.add(["pyproject.toml"])
repo.index.commit("poetry init [auto-generated]", author=settings.author)
# create venv
# TODO: "pip install -U pip wheel setuptools"
# after creating the venv but before installing anything
# (is it really needed?)
subprocess.run(["poetry", "install"], check=True, cwd=kernel_dir)
repo.index.add(["poetry.lock"])
repo.index.commit("poetry install [auto-generated]", author=settings.author)
# register kernel
subprocess.run(
["poetry", "run"]
+ ["ipython", "kernel", "install"]
+ [
"--user",
"--name",
kernel_dir.name,
"--display-name",
f"{kernel_dir.name} (Poetry)",
],
check=True,
cwd=kernel_dir,
)
# check git is clean
if repo.is_dirty():
raise RuntimeError("Dirty git repo after create")
@kernel.command()
@click.argument("kernel-names", type=str, nargs=-1)
@click.option("all_kernels", "--all", is_flag=True, help="update all kernels")
@click.option("--show-epilog/--no-show-epilog", help="show outdated epilog per kernel")
@pass_settings
def update(
settings: Settings, kernel_names: List[str], all_kernels: bool, show_epilog: bool
):
"""auto-update kernels."""
if all_kernels:
if kernel_names:
raise click.BadArgumentUsage("can't specify kernel-name when using --all")
kernel_names = [kernel_dir.name for kernel_dir in settings.all_kernel_dirs]
results = {}
first = True
for kernel_name in kernel_names:
if not first:
click.echo("\n")
else:
first = False
try:
kernel_dir = get_kernel_dir(kernel_name, settings.kernel_basedir)
except ValueError as e:
if not all_kernels:
click.echo(str(e), err=True)
results[kernel_name] = "Not found"
continue
click.echo(f"{kernel_dir.name}")
try:
# do the actual work
updated = update_kernel(kernel_dir, author=settings.author)
except (RuntimeError, subprocess.CalledProcessError) as e:
click.echo(str(e), err=True)
results[kernel_name] = "Error"
else:
results[kernel_name] = "Updated" if updated else "No changes"
# show currently outdated packages
if show_epilog:
subprocess.run(
["poetry", "show", "--outdated"], check=True, cwd=kernel_dir
)
click.echo("\n\nSummary:\n--------")
for kernel_name, result in results.items():
click.echo(f"{kernel_name:20} {result}")
def get_kernel_dir(kernel_name: str, kernel_basedir: Path):
if kernel_name == "":
raise ValueError("empty kernel_name")
kernel_dir = kernel_basedir / kernel_name
if not kernel_dir.exists():
raise ValueError(f"{kernel_dir} is not an existing directory")
try:
repo = Repo(kernel_dir)
except InvalidGitRepositoryError:
raise ValueError(f"{kernel_dir} is not a valid git repository")
return kernel_dir
def update_kernel(kernel_dir: Path, author: Actor):
# check for clean status
repo = Repo(kernel_dir)
if repo.is_dirty():
raise RuntimeError(f"{kernel_dir} not a clean git repository")
# requires poetry 1.0
# verify that it's a poetry managed folder
cp = subprocess.run(
["poetry", "env", "info", "-p"], cwd=kernel_dir, stdout=subprocess.PIPE
)
if cp.returncode != 0:
raise RuntimeError(f"{kernel_dir} not a poetry installed directory")
# TODO: remove id not needed
# update pip
# subprocess.run(
# ["poetry", "run", "pip", "install", "--upgrade", "pip", "wheel", "setuptools", "-q"],
# check=True,
# cwd=kernel_dir,
# )
# update venv [the command will output]
retries = 2
for retry in range(retries):
cp = subprocess.run(["poetry", "update"], cwd=kernel_dir)
if cp.returncode == 0:
break
else:
repo.index.add(["poetry.lock"])
repo.index.commit(
"poetry update, partial/failed [auto-generated]", author=author
)
raise RuntimeError("failed poetry update")
# git commit
updated = repo.is_dirty()
if updated:
click.echo("Commiting changes")
repo.index.add(["poetry.lock"])
repo.index.commit("poetry update [auto-generated]", author=author)
# register kernel [the command will output]
# TODO: only check if registered
subprocess.run(
["poetry", "run"]
+ ["ipython", "kernel", "install"]
+ [
"--user",
"--name",
kernel_dir.name,
"--display-name",
f"{kernel_dir.name} (Poetry)",
],
check=True,
cwd=kernel_dir,
)
# check git is clean
if repo.is_dirty():
raise RuntimeError("Dirty git repo after update")
return updated
@kernel.command()
@click.argument("kernel-name", type=str)
@pass_settings
def remove(settings: Settings, kernel_name: str):
"""remove an existing kernel."""
if kernel_name == "":
raise click.ClickException("empty kernel_name")
kernel_dir = settings.kernel_basedir / kernel_name
if not kernel_dir.exists():
raise click.ClickException(f"{kernel_dir} not exists")
def run_cmd(cmd_line: List[str], cwd: Path, capture_output: bool = False):
cp = subprocess.run(
cmd_line,
cwd=cwd,
capture_output=capture_output,
check=False,
)
if cp.returncode:
click.echo(f"{' '.join(cmd_line)!r} failed on {cwd}")
return cp
# uninstall with ipykernel
cp = run_cmd(
["poetry", "run"]
+ ["jupyter", "kernelspec", "uninstall"]
+ ["-f", kernel_dir.name],
cwd=settings.jupyter_dir,
)
# delete poetry venv
cp = run_cmd(["poetry", "env", "remove"], cwd=kernel_dir, capture_output=True)
# delete directory
toml_file = kernel_dir / "pyproject.toml"
if toml_file.exists():
toml_file.rename(settings.kernel_basedir / (kernel_dir.name + ".toml.old"))
shutil.rmtree(kernel_dir)
@kernel.command()
@pass_settings
def ls(settings: Settings):
"""list all registered kernels."""
kernel_pattern = re.compile(r"\s*(\S*)\s*(.*)")
cp = subprocess.run(
["poetry", "run"] + ["jupyter", "kernelspec", "list"],
check=True,
stdout=subprocess.PIPE,
universal_newlines=True,
cwd=settings.jupyter_dir,
)
for line in cp.stdout.splitlines()[1:]:
kernel_match = kernel_pattern.fullmatch(line)
kernel_name = kernel_match.group(1)
kernel_dir = Path(kernel_match.group(2))
kernel_json = kernel_dir / "kernel.json"
try:
with kernel_json.open() as f:
kernel_json_dict = json.load(f)
display_name = kernel_json_dict["display_name"]
python_bin = Path(kernel_json_dict["argv"][0])
venv_dir = (
"/usr" if (python_bin == Path("python")) else python_bin.parents[1]
)
except (FileNotFoundError, json.decoder.JSONDecodeError):
display_name = ""
venv_dir = ""
del display_name # unused for now
click.echo(f"{kernel_name:20} {str(kernel_dir):62} {str(venv_dir):62}")
@kernel.command()
@pass_settings
def outdated(settings: Settings):
"""list kernels and outdated packages."""
processes = {}
completed_processes = {}
try:
for kernel_dir in settings.all_kernel_dirs:
processes[kernel_dir] = subprocess.Popen(
["poetry", "show", "--outdated", "--ansi"],
cwd=kernel_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
try:
for kernel_dir, process in processes.items():
# wait on each process
stdout, stderr = process.communicate(input, timeout=None)
retcode = process.poll()
completed_processes[kernel_dir] = subprocess.CompletedProcess(
process.args, retcode, stdout, stderr
)
# try again once
second_pass = [
kd for (kd, cp) in completed_processes.items() if cp.returncode
]
for kernel_dir in second_pass:
print(f"DEBUG: rerun {kernel_dir}")
old_process = processes[kernel_dir]
old_process.stdout.close()
old_process.stderr.close()
old_process.wait()
processes[kernel_dir] = subprocess.Popen(
["poetry", "show", "--outdated", "--ansi"],
cwd=kernel_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
# wait on each new process
for kernel_dir in second_pass:
process = processes[kernel_dir]
stdout, stderr = process.communicate(input, timeout=None)
retcode = process.poll()
completed_processes[kernel_dir] = subprocess.CompletedProcess(
process.args, retcode, stdout, stderr
)
except: # Including KeyboardInterrupt, communicate handled that.
for process in processes.values():
process.kill()
# We don't call process.wait() as finally does that for us.
raise
finally:
for process in processes.values():
process.stdout.close()
process.stderr.close()
process.wait()
first = True
for kernel_dir, cp in completed_processes.items():
# print header
if not first:
click.echo()
else:
first = False
click.echo(kernel_dir.name)
# print stdout, stderr
print(cp.stdout, end="")
print(cp.stderr, file=sys.stderr, end="")
# raise
cp.check_returncode()
@kernel.command()
@pass_settings
def outdated_jupyter(settings: Settings):
"""list outdated packages in the jupyter venv."""
subprocess.run(
["poetry", "show", "--outdated"], check=True, cwd=settings.jupyter_dir
)
@kernel.command()
@pass_settings
def dirty(settings: Settings):
"""list kernels with dirty git repo."""
for kernel_dir in settings.all_kernel_dirs:
# show data
repo = Repo(kernel_dir)
if repo.is_dirty():
click.echo(kernel_dir.name)
@kernel.command()
@click.option("--build/--no-build", help='perform "jupyter lab build"')
@pass_settings
def update_jupyter(settings: Settings, build: bool):
"""auto-update jupyter."""
jupyter_dir = settings.jupyter_dir
try:
repo = Repo(jupyter_dir)
except InvalidGitRepositoryError:
raise click.ClickException(f"{settings.jupyter_dir} not a valid git repository")
if repo.is_dirty():
raise click.ClickException(f"{settings.jupyter_dir} not a clean git repository")
# TODO: remove if not needed
# update pip
# subprocess.run(
# ["poetry", "run", "pip", "install", "--upgrade", "pip", "-q"],
# check=True,
# cwd=jupyter_dir,
# )
# update venv [the command will output]
subprocess.run(["poetry", "update"], check=True, cwd=jupyter_dir)
# git commit
if repo.is_dirty():
click.echo("Commiting changes")
repo.index.add(["poetry.lock"])
repo.index.commit("poetry update [auto-generated]", author=settings.author)
# show currently outdated packages
subprocess.run(["poetry", "show", "--outdated"], check=True, cwd=jupyter_dir)
# update extensions & rebuild
subprocess.run(
["poetry", "run", "jupyter", "labextension", "update", "--all"],
check=True,
cwd=jupyter_dir,
)
if build:
subprocess.run(
["poetry", "run", "jupyter", "lab", "build"], check=True, cwd=jupyter_dir
)
# check git is clean
if repo.is_dirty():
raise RuntimeError("Dirty git repo after jupyter-update")
if __name__ == "__main__":
kernel()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment