Skip to content

Instantly share code, notes, and snippets.

@alexeldeib
alexeldeib / test.sh
Created August 22, 2023 17:02
nvidia cgroupv2 repro test
# install nvidia device plugin (without env var)
kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/main/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml
# ssh OR nsenter node using node-shell + privileged pod
# tried both to eliminate any container mount issues.
# same behavior
# https://github.com/kvaps/kubectl-node-shell
kubectl node-shell aks-nca100-36400834-vmss000000
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: exclude-all-pods-http-proxy
spec:
mutateExistingOnPolicyUpdate: false
rules:
- name: pod-ns
match:
any:
@alexeldeib
alexeldeib / deploy.yaml
Created July 25, 2023 23:34
nvidia ds with time slicing aks
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: kube-system
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
updateStrategy:
@alexeldeib
alexeldeib / extract.py
Created June 19, 2023 17:16
Azure Wireserver Extension config extraction
import http.client
import xml.etree.ElementTree as ET
from urllib.parse import urlparse
import json
from subprocess import Popen, PIPE, STDOUT
import base64
try:
# request goalstate from wireserver
wireserver = "168.63.129.16"
@alexeldeib
alexeldeib / README.md
Last active June 10, 2023 00:44
Rebuild AKS node kernel for iolatency support

AKS iolatency rebuild

NODE_NAME="$(kubectl get node -o jsonpath="{.items[0].metadata.name}")"
kubectl node-shell $NODE_NAME

copy rebuild_kernel.sh into /opt/rebuild_kernel.sh or similar, and bash rebuild_kernel.sh

it'll reboot into the new kernel if successful.

@alexeldeib
alexeldeib / demo.sh
Last active August 3, 2023 20:59
AKS Nvidia cgroupv2 repro
export GROUP=ace-mig
export NAME=ace-mig
export LOCATION=eastus
az group create -g "${GROUP}" -l ${LOCATION}
# create a cluster with a default pool with some typical parameters.
# not really relevant.
# only key piece is use k8s version >= 1.25.0 for Ubuntu 22.04 with cgroupv2.
@alexeldeib
alexeldeib / cni.sh
Last active May 29, 2023 21:44
CNI fiddling with 6rd/teredo/6to4
# n.b.: nodes are in 172.18.0.0/16
# hash subnet range with sha256 -> first 10 digit for prefix
hash_prefix=$(ip r | grep -E "\/[0-9]+ dev eth0" | cut -d' ' -f1 | sha256sum | head -c 10)
# prepend with fd for unique local address predix for 6rd routing
rd_prefix="fd${hash_prefix}"
# add colons between each 4 hex chars
rd_prefix_formatted=$(echo "${rd_prefix}" | fold -w4 | paste -sd:)
local_addr=$(ip a show dev eth0 | grep -E 'inet ' | cut -d' ' -f6 | cut -d'/' -f1)
# get local IPv4 subnet as XX.XX.XX.XX/XX
@alexeldeib
alexeldeib / deploy.yaml
Created May 18, 2023 18:55
kubelet 30s kill pod
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: &name kubelet-killer-30sec
labels:
app: *name
spec:
selector:
matchLabels:
app: *name
@alexeldeib
alexeldeib / debug.sh
Created May 18, 2023 16:18
poking at nvidia/canonical package sources
# for context, this is a 22.04 hetzner machine upgraded from 20.04 with maybe a kernel I rebuilt (?) I forgot.
root@Ubuntu-2004-focal-64-minimal ~ # cat /etc/os-release
PRETTY_NAME="Ubuntu 22.04.2 LTS"
NAME="Ubuntu"
VERSION_ID="22.04"
VERSION="22.04.2 LTS (Jammy Jellyfish)"
VERSION_CODENAME=jammy
ID=ubuntu
ID_LIKE=debian
HOME_URL="https://www.ubuntu.com/"
@alexeldeib
alexeldeib / images.json
Created May 18, 2023 14:54
agentbaker cli image output
{
"aks-cblmariner-v1": {
"ResourceGroup": "AKS-CBLMariner",
"Gallery": "AKSCBLMariner",
"Definition": "V1",
"Version": "202305.15.0",
"SubscriptionID": "109a5e88-712a-48ae-9078-9ca8b3c81345"
},
"aks-cblmariner-v2": {
"ResourceGroup": "AKS-CBLMariner",