Last active
January 18, 2023 21:54
-
-
Save tsvikas/378edcaea2610489d40ee3f3e2f7e06e to your computer and use it in GitHub Desktop.
manage jupyter kernels using poetry
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import getpass | |
import json | |
import re | |
import shutil | |
import socket | |
import subprocess | |
import sys | |
from dataclasses import dataclass, field | |
from pathlib import Path | |
from typing import List | |
# TODO: change click to typer | |
import click | |
from git import Repo, Actor, InvalidGitRepositoryError | |
# TODO: update to ipykernel ^6 | |
IPYKERNEL_VERSION = "^5.0.0" | |
@dataclass | |
class Settings: | |
kernel_basedir: Path | |
jupyter_dir: Path | |
name: str = field(repr=False) | |
email: str = field(repr=False) | |
author: Actor = field(init=False) | |
subdirs_to_ignore: List[str] = field(default_factory=lambda: ["bin"]) | |
def __post_init__(self): | |
assert self.kernel_basedir.exists() | |
assert self.jupyter_dir.exists() | |
# TODO: check that jupyter_dir can "poetry run jupyter/ipython" | |
# TODO: check that poetry version >= 1 | |
assert self.name | |
self.author = Actor(self.name, self.email) | |
@property | |
def all_kernel_dirs(self): | |
return [ | |
kernel_dir | |
for kernel_dir in sorted(self.kernel_basedir.glob("*/")) | |
if kernel_dir.is_dir() | |
and (kernel_dir.name not in self.subdirs_to_ignore) | |
and (kernel_dir != self.kernel_basedir) | |
and (not (kernel_dir / "do-not-update").exists()) | |
] | |
@click.group() | |
@click.option( | |
"kernel_basedir", | |
"-k", | |
type=click.Path(exists=True, file_okay=False, writable=True), | |
default=Path.home() / "kernels", | |
) | |
@click.option( | |
"jupyter_dir", | |
"-j", | |
type=click.Path(exists=True, file_okay=False), | |
default=Path.home() / "jupyter-venv", | |
) | |
@click.option("--name", "-n", envvar="USER") | |
@click.option("--email", "-e", default=getpass.getuser() + "@" + socket.gethostname()) | |
@click.pass_context | |
def kernel( | |
ctx, | |
kernel_basedir: Path, | |
jupyter_dir: Path, | |
name: str, | |
email: str, | |
): | |
"""manage jupyter kernels using poetry""" | |
ctx.obj = Settings( | |
kernel_basedir=Path(kernel_basedir), | |
jupyter_dir=Path(jupyter_dir), | |
name=name, | |
email=email, | |
) | |
pass_settings = click.make_pass_decorator(Settings) | |
@kernel.command() | |
@click.argument("kernel-name", type=str) | |
@click.option( | |
"packages", | |
"-a", | |
"--add", | |
type=str, | |
multiple=True, | |
metavar="PACKAGE", | |
help="Package to require with an optional version constraint, " | |
"e.g. requests:^2.10.0 or requests=2.11.1 " | |
"(multiple values allowed)", | |
) | |
@pass_settings | |
def create(settings: Settings, kernel_name: str, packages: List[str]): | |
"""create a new kernel. | |
create a new kernel named KERNEL_NAME, manage it with git & poetry, | |
and register it with jupyter | |
""" | |
if kernel_name == "": | |
raise click.ClickException("empty kernel_name") | |
kernel_dir = settings.kernel_basedir / kernel_name | |
if kernel_dir.exists(): | |
raise click.ClickException(f"{kernel_dir} exists") | |
# create directory | |
kernel_dir.mkdir(exist_ok=False) | |
# create repo | |
repo = Repo.init(kernel_dir) | |
# init poetry | |
subprocess.run( | |
["poetry", "init"] | |
+ ["--name", kernel_dir.name] | |
+ ["--author", settings.author.name] | |
+ ["--description", "ipython-kernel [auto-generated]"] | |
+ sum([["--dependency", package] for package in packages], []) | |
+ [f"--dev-dependency=ipykernel:{IPYKERNEL_VERSION}"] | |
+ ["--no-interaction", "--quiet"], | |
check=True, | |
cwd=kernel_dir, | |
) | |
repo.index.add(["pyproject.toml"]) | |
repo.index.commit("poetry init [auto-generated]", author=settings.author) | |
# create venv | |
# TODO: "pip install -U pip wheel setuptools" | |
# after creating the venv but before installing anything | |
# (is it really needed?) | |
subprocess.run(["poetry", "install"], check=True, cwd=kernel_dir) | |
repo.index.add(["poetry.lock"]) | |
repo.index.commit("poetry install [auto-generated]", author=settings.author) | |
# register kernel | |
subprocess.run( | |
["poetry", "run"] | |
+ ["ipython", "kernel", "install"] | |
+ [ | |
"--user", | |
"--name", | |
kernel_dir.name, | |
"--display-name", | |
f"{kernel_dir.name} (Poetry)", | |
], | |
check=True, | |
cwd=kernel_dir, | |
) | |
# check git is clean | |
if repo.is_dirty(): | |
raise RuntimeError("Dirty git repo after create") | |
@kernel.command() | |
@click.argument("kernel-names", type=str, nargs=-1) | |
@click.option("all_kernels", "--all", is_flag=True, help="update all kernels") | |
@click.option("--show-epilog/--no-show-epilog", help="show outdated epilog per kernel") | |
@pass_settings | |
def update( | |
settings: Settings, kernel_names: List[str], all_kernels: bool, show_epilog: bool | |
): | |
"""auto-update kernels.""" | |
if all_kernels: | |
if kernel_names: | |
raise click.BadArgumentUsage("can't specify kernel-name when using --all") | |
kernel_names = [kernel_dir.name for kernel_dir in settings.all_kernel_dirs] | |
results = {} | |
first = True | |
for kernel_name in kernel_names: | |
if not first: | |
click.echo("\n") | |
else: | |
first = False | |
try: | |
kernel_dir = get_kernel_dir(kernel_name, settings.kernel_basedir) | |
except ValueError as e: | |
if not all_kernels: | |
click.echo(str(e), err=True) | |
results[kernel_name] = "Not found" | |
continue | |
click.echo(f"{kernel_dir.name}") | |
try: | |
# do the actual work | |
updated = update_kernel(kernel_dir, author=settings.author) | |
except (RuntimeError, subprocess.CalledProcessError) as e: | |
click.echo(str(e), err=True) | |
results[kernel_name] = "Error" | |
else: | |
results[kernel_name] = "Updated" if updated else "No changes" | |
# show currently outdated packages | |
if show_epilog: | |
subprocess.run( | |
["poetry", "show", "--outdated"], check=True, cwd=kernel_dir | |
) | |
click.echo("\n\nSummary:\n--------") | |
for kernel_name, result in results.items(): | |
click.echo(f"{kernel_name:20} {result}") | |
def get_kernel_dir(kernel_name: str, kernel_basedir: Path): | |
if kernel_name == "": | |
raise ValueError("empty kernel_name") | |
kernel_dir = kernel_basedir / kernel_name | |
if not kernel_dir.exists(): | |
raise ValueError(f"{kernel_dir} is not an existing directory") | |
try: | |
repo = Repo(kernel_dir) | |
except InvalidGitRepositoryError: | |
raise ValueError(f"{kernel_dir} is not a valid git repository") | |
return kernel_dir | |
def update_kernel(kernel_dir: Path, author: Actor): | |
# check for clean status | |
repo = Repo(kernel_dir) | |
if repo.is_dirty(): | |
raise RuntimeError(f"{kernel_dir} not a clean git repository") | |
# requires poetry 1.0 | |
# verify that it's a poetry managed folder | |
cp = subprocess.run( | |
["poetry", "env", "info", "-p"], cwd=kernel_dir, stdout=subprocess.PIPE | |
) | |
if cp.returncode != 0: | |
raise RuntimeError(f"{kernel_dir} not a poetry installed directory") | |
# TODO: remove id not needed | |
# update pip | |
# subprocess.run( | |
# ["poetry", "run", "pip", "install", "--upgrade", "pip", "wheel", "setuptools", "-q"], | |
# check=True, | |
# cwd=kernel_dir, | |
# ) | |
# update venv [the command will output] | |
retries = 2 | |
for retry in range(retries): | |
cp = subprocess.run(["poetry", "update"], cwd=kernel_dir) | |
if cp.returncode == 0: | |
break | |
else: | |
repo.index.add(["poetry.lock"]) | |
repo.index.commit( | |
"poetry update, partial/failed [auto-generated]", author=author | |
) | |
raise RuntimeError("failed poetry update") | |
# git commit | |
updated = repo.is_dirty() | |
if updated: | |
click.echo("Commiting changes") | |
repo.index.add(["poetry.lock"]) | |
repo.index.commit("poetry update [auto-generated]", author=author) | |
# register kernel [the command will output] | |
# TODO: only check if registered | |
subprocess.run( | |
["poetry", "run"] | |
+ ["ipython", "kernel", "install"] | |
+ [ | |
"--user", | |
"--name", | |
kernel_dir.name, | |
"--display-name", | |
f"{kernel_dir.name} (Poetry)", | |
], | |
check=True, | |
cwd=kernel_dir, | |
) | |
# check git is clean | |
if repo.is_dirty(): | |
raise RuntimeError("Dirty git repo after update") | |
return updated | |
@kernel.command() | |
@click.argument("kernel-name", type=str) | |
@pass_settings | |
def remove(settings: Settings, kernel_name: str): | |
"""remove an existing kernel.""" | |
if kernel_name == "": | |
raise click.ClickException("empty kernel_name") | |
kernel_dir = settings.kernel_basedir / kernel_name | |
if not kernel_dir.exists(): | |
raise click.ClickException(f"{kernel_dir} not exists") | |
def run_cmd(cmd_line: List[str], cwd: Path, capture_output: bool = False): | |
cp = subprocess.run( | |
cmd_line, | |
cwd=cwd, | |
capture_output=capture_output, | |
check=False, | |
) | |
if cp.returncode: | |
click.echo(f"{' '.join(cmd_line)!r} failed on {cwd}") | |
return cp | |
# uninstall with ipykernel | |
cp = run_cmd( | |
["poetry", "run"] | |
+ ["jupyter", "kernelspec", "uninstall"] | |
+ ["-f", kernel_dir.name], | |
cwd=settings.jupyter_dir, | |
) | |
# delete poetry venv | |
cp = run_cmd(["poetry", "env", "remove"], cwd=kernel_dir, capture_output=True) | |
# delete directory | |
toml_file = kernel_dir / "pyproject.toml" | |
if toml_file.exists(): | |
toml_file.rename(settings.kernel_basedir / (kernel_dir.name + ".toml.old")) | |
shutil.rmtree(kernel_dir) | |
@kernel.command() | |
@pass_settings | |
def ls(settings: Settings): | |
"""list all registered kernels.""" | |
kernel_pattern = re.compile(r"\s*(\S*)\s*(.*)") | |
cp = subprocess.run( | |
["poetry", "run"] + ["jupyter", "kernelspec", "list"], | |
check=True, | |
stdout=subprocess.PIPE, | |
universal_newlines=True, | |
cwd=settings.jupyter_dir, | |
) | |
for line in cp.stdout.splitlines()[1:]: | |
kernel_match = kernel_pattern.fullmatch(line) | |
kernel_name = kernel_match.group(1) | |
kernel_dir = Path(kernel_match.group(2)) | |
kernel_json = kernel_dir / "kernel.json" | |
try: | |
with kernel_json.open() as f: | |
kernel_json_dict = json.load(f) | |
display_name = kernel_json_dict["display_name"] | |
python_bin = Path(kernel_json_dict["argv"][0]) | |
venv_dir = ( | |
"/usr" if (python_bin == Path("python")) else python_bin.parents[1] | |
) | |
except (FileNotFoundError, json.decoder.JSONDecodeError): | |
display_name = "" | |
venv_dir = "" | |
del display_name # unused for now | |
click.echo(f"{kernel_name:20} {str(kernel_dir):62} {str(venv_dir):62}") | |
@kernel.command() | |
@pass_settings | |
def outdated(settings: Settings): | |
"""list kernels and outdated packages.""" | |
processes = {} | |
completed_processes = {} | |
try: | |
for kernel_dir in settings.all_kernel_dirs: | |
processes[kernel_dir] = subprocess.Popen( | |
["poetry", "show", "--outdated", "--ansi"], | |
cwd=kernel_dir, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True, | |
) | |
try: | |
for kernel_dir, process in processes.items(): | |
# wait on each process | |
stdout, stderr = process.communicate(input, timeout=None) | |
retcode = process.poll() | |
completed_processes[kernel_dir] = subprocess.CompletedProcess( | |
process.args, retcode, stdout, stderr | |
) | |
# try again once | |
second_pass = [ | |
kd for (kd, cp) in completed_processes.items() if cp.returncode | |
] | |
for kernel_dir in second_pass: | |
print(f"DEBUG: rerun {kernel_dir}") | |
old_process = processes[kernel_dir] | |
old_process.stdout.close() | |
old_process.stderr.close() | |
old_process.wait() | |
processes[kernel_dir] = subprocess.Popen( | |
["poetry", "show", "--outdated", "--ansi"], | |
cwd=kernel_dir, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True, | |
) | |
# wait on each new process | |
for kernel_dir in second_pass: | |
process = processes[kernel_dir] | |
stdout, stderr = process.communicate(input, timeout=None) | |
retcode = process.poll() | |
completed_processes[kernel_dir] = subprocess.CompletedProcess( | |
process.args, retcode, stdout, stderr | |
) | |
except: # Including KeyboardInterrupt, communicate handled that. | |
for process in processes.values(): | |
process.kill() | |
# We don't call process.wait() as finally does that for us. | |
raise | |
finally: | |
for process in processes.values(): | |
process.stdout.close() | |
process.stderr.close() | |
process.wait() | |
first = True | |
for kernel_dir, cp in completed_processes.items(): | |
# print header | |
if not first: | |
click.echo() | |
else: | |
first = False | |
click.echo(kernel_dir.name) | |
# print stdout, stderr | |
print(cp.stdout, end="") | |
print(cp.stderr, file=sys.stderr, end="") | |
# raise | |
cp.check_returncode() | |
@kernel.command() | |
@pass_settings | |
def outdated_jupyter(settings: Settings): | |
"""list outdated packages in the jupyter venv.""" | |
subprocess.run( | |
["poetry", "show", "--outdated"], check=True, cwd=settings.jupyter_dir | |
) | |
@kernel.command() | |
@pass_settings | |
def dirty(settings: Settings): | |
"""list kernels with dirty git repo.""" | |
for kernel_dir in settings.all_kernel_dirs: | |
# show data | |
repo = Repo(kernel_dir) | |
if repo.is_dirty(): | |
click.echo(kernel_dir.name) | |
@kernel.command() | |
@click.option("--build/--no-build", help='perform "jupyter lab build"') | |
@pass_settings | |
def update_jupyter(settings: Settings, build: bool): | |
"""auto-update jupyter.""" | |
jupyter_dir = settings.jupyter_dir | |
try: | |
repo = Repo(jupyter_dir) | |
except InvalidGitRepositoryError: | |
raise click.ClickException(f"{settings.jupyter_dir} not a valid git repository") | |
if repo.is_dirty(): | |
raise click.ClickException(f"{settings.jupyter_dir} not a clean git repository") | |
# TODO: remove if not needed | |
# update pip | |
# subprocess.run( | |
# ["poetry", "run", "pip", "install", "--upgrade", "pip", "-q"], | |
# check=True, | |
# cwd=jupyter_dir, | |
# ) | |
# update venv [the command will output] | |
subprocess.run(["poetry", "update"], check=True, cwd=jupyter_dir) | |
# git commit | |
if repo.is_dirty(): | |
click.echo("Commiting changes") | |
repo.index.add(["poetry.lock"]) | |
repo.index.commit("poetry update [auto-generated]", author=settings.author) | |
# show currently outdated packages | |
subprocess.run(["poetry", "show", "--outdated"], check=True, cwd=jupyter_dir) | |
# update extensions & rebuild | |
subprocess.run( | |
["poetry", "run", "jupyter", "labextension", "update", "--all"], | |
check=True, | |
cwd=jupyter_dir, | |
) | |
if build: | |
subprocess.run( | |
["poetry", "run", "jupyter", "lab", "build"], check=True, cwd=jupyter_dir | |
) | |
# check git is clean | |
if repo.is_dirty(): | |
raise RuntimeError("Dirty git repo after jupyter-update") | |
if __name__ == "__main__": | |
kernel() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment