Skip to content

Instantly share code, notes, and snippets.

@acerbetti
Created June 13, 2025 19:58
Show Gist options
  • Save acerbetti/fc7011f598a5c0099b47ddfbedf2c40c to your computer and use it in GitHub Desktop.
Save acerbetti/fc7011f598a5c0099b47ddfbedf2c40c to your computer and use it in GitHub Desktop.
Jetson Nano Cluster Join Script
#!/bin/bash
# Jetson Nano Cluster Join Script
# Based on https://github.com/siderolabs/talos/issues/3990
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${GREEN}πŸš€ Starting Jetson Nano cluster join process...${NC}"
# Configuration - User input
echo -e "${YELLOW}πŸ“ Please provide connection details:${NC}"
read -p "Jetson IP address [192.168.1.102]: " JETSON_IP
read -p "Jetson username [jetson]: " JETSON_USER
read -p "Talos endpoint [192.168.1.100]: " TALOS_ENDPOINT
# Set defaults if empty
JETSON_IP=${JETSON_IP:-"192.168.1.102"}
JETSON_USER=${JETSON_USER:-"jetson"}
TALOS_ENDPOINT=${TALOS_ENDPOINT:-"192.168.1.100"}
echo -e "${YELLOW}πŸ”— Using: ${JETSON_USER}@${JETSON_IP} β†’ Talos ${TALOS_ENDPOINT}${NC}"
TALOS_CONFIG="${HOME}/.talos/config"
# Check prerequisites
echo -e "${YELLOW}πŸ“‹ Checking prerequisites...${NC}"
if ! command -v talosctl &> /dev/null; then
echo -e "${RED}❌ talosctl not found. Please install Talos CLI.${NC}"
exit 1
fi
if ! command -v kubectl &> /dev/null; then
echo -e "${RED}❌ kubectl not found. Please install kubectl.${NC}"
exit 1
fi
if ! ssh -o ConnectTimeout=5 "${JETSON_USER}@${JETSON_IP}" 'exit 0' 2>/dev/null; then
echo -e "${RED}❌ Cannot SSH to Jetson Nano at ${JETSON_IP}${NC}"
exit 1
fi
if ! talosctl --talosconfig "${TALOS_CONFIG}" --endpoints ${TALOS_ENDPOINT} --nodes ${TALOS_ENDPOINT} get machineconfig &>/dev/null; then
echo -e "${RED}❌ Cannot connect to Talos cluster at ${TALOS_ENDPOINT}${NC}"
echo "Trying to diagnose the issue..."
echo "Talos config file: ${TALOS_CONFIG}"
ls -la "${TALOS_CONFIG}" 2>/dev/null || echo "Config file not found!"
echo "Testing connection:"
talosctl --talosconfig "${TALOS_CONFIG}" --endpoints ${TALOS_ENDPOINT} --nodes ${TALOS_ENDPOINT} get machineconfig || true
exit 1
fi
echo -e "${GREEN}βœ… All prerequisites met${NC}"
# Create temporary directory
TMPDIR=$(mktemp -d)
echo -e "${YELLOW}πŸ“ Using temporary directory: ${TMPDIR}${NC}"
# Get cluster configuration from Talos
echo -e "${YELLOW}πŸ“Š Getting cluster configuration from Talos...${NC}"
CLUSTER_DOMAIN=$(talosctl --talosconfig "${TALOS_CONFIG}" --endpoints ${TALOS_ENDPOINT} --nodes ${TALOS_ENDPOINT} get kubeletconfig -o jsonpath="{.spec.clusterDomain}")
CLUSTER_DNS=$(talosctl --talosconfig "${TALOS_CONFIG}" --endpoints ${TALOS_ENDPOINT} --nodes ${TALOS_ENDPOINT} get kubeletconfig -o jsonpath="{.spec.clusterDNS}")
echo "Cluster Domain: ${CLUSTER_DOMAIN}"
echo "Cluster DNS: ${CLUSTER_DNS}"
# Prompt for confirmation
echo -e "${YELLOW}⚠️ WARNING: This will wipe the existing Kubernetes installation on Jetson Nano and cause a reboot!${NC}"
read -p "Do you want to continue? [y/N] " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "Aborted."
exit 1
fi
# Step 1: Clean existing installation on Jetson
echo -e "${YELLOW}🧹 Cleaning existing Kubernetes installation on Jetson...${NC}"
ssh -t "${JETSON_USER}@${JETSON_IP}" 'systemctl is-active kubelet.service containerd.service 1>/dev/null && sudo systemctl disable kubelet.service containerd.service && sudo reboot && sleep 10' || true
echo "Waiting for Jetson to reboot..."
until ssh -o ConnectTimeout=1 "${JETSON_USER}@${JETSON_IP}" 'exit 0' 2>/dev/null; do
sleep 5
echo -n "."
done
echo -e "\n${GREEN}βœ… Jetson is back online${NC}"
ssh -t "${JETSON_USER}@${JETSON_IP}" 'sudo rm -rf /etc/kubernetes /etc/cni /var/lib/cni /opt/cni /var/lib/containerd /var/lib/kubelet /var/lib/etcd'
# Step 2: Copy Kubernetes files from Talos cluster
echo -e "${YELLOW}πŸ“‹ Copying Kubernetes configuration from Talos cluster...${NC}"
talosctl --talosconfig "${TALOS_CONFIG}" --endpoints ${TALOS_ENDPOINT} --nodes ${TALOS_ENDPOINT} cat /etc/kubernetes/kubeconfig-kubelet > "${TMPDIR}/kubelet.conf"
talosctl --talosconfig "${TALOS_CONFIG}" --endpoints ${TALOS_ENDPOINT} --nodes ${TALOS_ENDPOINT} cat /etc/kubernetes/bootstrap-kubeconfig > "${TMPDIR}/bootstrap-kubelet.conf"
talosctl --talosconfig "${TALOS_CONFIG}" --endpoints ${TALOS_ENDPOINT} --nodes ${TALOS_ENDPOINT} cat /etc/kubernetes/pki/ca.crt > "${TMPDIR}/ca.crt"
# Step 3: Update server endpoints
echo -e "${YELLOW}πŸ”§ Updating server endpoints...${NC}"
sed -i.bak "s|server:.*|server: https://${TALOS_ENDPOINT}:6443|g" "${TMPDIR}/kubelet.conf" "${TMPDIR}/bootstrap-kubelet.conf"
# Step 4: Create kubelet configuration
echo -e "${YELLOW}βš™οΈ Creating kubelet configuration...${NC}"
cat > "${TMPDIR}/var-lib-kubelet-config.yaml" <<EOT
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
authentication:
anonymous:
enabled: false
webhook:
enabled: true
x509:
clientCAFile: /etc/kubernetes/pki/ca.crt
authorization:
mode: Webhook
clusterDomain: "${CLUSTER_DOMAIN}"
clusterDNS: ${CLUSTER_DNS}
runtimeRequestTimeout: "0s"
cgroupDriver: systemd
containerRuntimeEndpoint: unix:///var/run/containerd/containerd.sock
failSwapOn: false
EOT
# Step 5: Create kubelet service override
cat > "${TMPDIR}/kubelet.service.override.conf" <<EOT
[Service]
Environment="KUBELET_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --fail-swap-on=false --ignore-preflight-errors=SystemVerification"
EOT
# Step 6: Create HAProxy configuration for KubePrism compatibility
cat > "${TMPDIR}/haproxy.cfg" <<EOT
defaults
timeout client 10s
timeout connect 5s
timeout server 10s
frontend kubeprism
mode tcp
bind 127.0.0.1:7445
default_backend k8s_api
backend k8s_api
mode tcp
server talos-master ${TALOS_ENDPOINT}:6443 check
server talos-worker 192.168.1.101:6443 check backup
EOT
# Step 7: Copy files to Jetson Nano
echo -e "${YELLOW}πŸ“€ Copying configuration files to Jetson Nano...${NC}"
REMOTE_TMPDIR="/tmp/k8s-join-$(date +%s)"
scp -r "${TMPDIR}" "${JETSON_USER}@${JETSON_IP}:${REMOTE_TMPDIR}"
# Step 8: Install HAProxy and configure services on Jetson Nano
echo -e "${YELLOW}πŸ“¦ Installing HAProxy on Jetson Nano...${NC}"
ssh -t "${JETSON_USER}@${JETSON_IP}" "sudo apt-get update && sudo apt-get install -y haproxy"
# Step 9: Install CNI plugins and configure networking
echo -e "${YELLOW}🌐 Installing CNI plugins and configuring networking...${NC}"
ssh "${JETSON_USER}@${JETSON_IP}" 'curl -L https://github.com/containernetworking/plugins/releases/download/v1.3.0/cni-plugins-linux-arm64-v1.3.0.tgz -o /tmp/cni-plugins.tgz'
ssh -t "${JETSON_USER}@${JETSON_IP}" 'sudo mkdir -p /opt/cni/bin && sudo tar -xzf /tmp/cni-plugins.tgz -C /opt/cni/bin && sudo chmod +x /opt/cni/bin/*'
echo -e "${YELLOW}πŸ”Œ Installing Flannel CNI plugin...${NC}"
ssh "${JETSON_USER}@${JETSON_IP}" 'curl -L https://github.com/flannel-io/cni-plugin/releases/download/v1.2.0/flannel-arm64 -o /tmp/flannel'
ssh -t "${JETSON_USER}@${JETSON_IP}" 'sudo mv /tmp/flannel /opt/cni/bin/flannel && sudo chmod +x /opt/cni/bin/flannel'
echo -e "${YELLOW}πŸ”— Configuring bridge netfilter for Flannel...${NC}"
ssh -t "${JETSON_USER}@${JETSON_IP}" 'sudo modprobe br_netfilter && echo "br_netfilter" | sudo tee -a /etc/modules'
ssh -t "${JETSON_USER}@${JETSON_IP}" 'sudo sysctl net.bridge.bridge-nf-call-iptables=1 && sudo sysctl net.bridge.bridge-nf-call-ip6tables=1'
ssh -t "${JETSON_USER}@${JETSON_IP}" 'echo "net.bridge.bridge-nf-call-iptables=1" | sudo tee -a /etc/sysctl.conf && echo "net.bridge.bridge-nf-call-ip6tables=1" | sudo tee -a /etc/sysctl.conf'
# Step 10: Configure and start services
echo -e "${YELLOW}πŸ”§ Installing and configuring services on Jetson Nano...${NC}"
ssh -t "${JETSON_USER}@${JETSON_IP}" "sudo mkdir -p /etc/kubernetes/pki /var/lib/kubelet /etc/systemd/system/kubelet.service.d/ /etc/haproxy &&
sudo mv ${REMOTE_TMPDIR}/kubelet.conf /etc/kubernetes/kubelet.conf &&
sudo mv ${REMOTE_TMPDIR}/bootstrap-kubelet.conf /etc/kubernetes/bootstrap-kubelet.conf &&
sudo mv ${REMOTE_TMPDIR}/ca.crt /etc/kubernetes/pki/ca.crt &&
sudo mv ${REMOTE_TMPDIR}/var-lib-kubelet-config.yaml /var/lib/kubelet/config.yaml &&
sudo mv ${REMOTE_TMPDIR}/kubelet.service.override.conf /etc/systemd/system/kubelet.service.d/override.conf &&
sudo mv ${REMOTE_TMPDIR}/haproxy.cfg /etc/haproxy/haproxy.cfg &&
sudo systemctl daemon-reload &&
sudo systemctl enable --now haproxy.service containerd.service kubelet.service"
# Step 11: Verify node joined successfully
echo -e "${YELLOW}⏳ Waiting for node to join cluster...${NC}"
sleep 30
echo -e "${YELLOW}πŸ” Verifying Jetson Nano joined the cluster...${NC}"
JETSON_HOSTNAME=$(ssh "${JETSON_USER}@${JETSON_IP}" 'hostname')
echo "Looking for node with hostname: ${JETSON_HOSTNAME}"
# Wait up to 5 minutes for the node to appear
TIMEOUT=300
ELAPSED=0
NODE_FOUND=false
while [ $ELAPSED -lt $TIMEOUT ]; do
if kubectl get nodes --no-headers | grep -q "${JETSON_HOSTNAME}.*Ready"; then
NODE_FOUND=true
break
fi
echo -n "."
sleep 10
ELAPSED=$((ELAPSED + 10))
done
echo ""
if [ "$NODE_FOUND" = true ]; then
echo -e "${GREEN}βœ… Jetson Nano successfully joined the cluster!${NC}"
kubectl get nodes
# Cleanup
rm -rf "${TMPDIR}"
echo -e "${GREEN}πŸŽ‰ Jetson Nano cluster join process completed successfully!${NC}"
echo -e "${YELLOW}πŸ’‘ You can now label the node for GPU workloads:${NC}"
echo "kubectl label node ${JETSON_HOSTNAME} node-type=gpu"
echo "kubectl label node ${JETSON_HOSTNAME} gpu=tegra210"
echo ""
read -p "Would you like to apply these GPU labels now? [y/N] " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo -e "${YELLOW}🏷️ Applying GPU labels...${NC}"
kubectl label node "${JETSON_HOSTNAME}" node-type=gpu
kubectl label node "${JETSON_HOSTNAME}" gpu=tegra210
echo -e "${GREEN}βœ… GPU labels applied successfully!${NC}"
fi
else
echo -e "${RED}❌ Jetson Nano failed to join the cluster within ${TIMEOUT} seconds${NC}"
echo "Current cluster nodes:"
kubectl get nodes
echo ""
echo "Checking kubelet status on Jetson:"
ssh "${JETSON_USER}@${JETSON_IP}" 'sudo systemctl status kubelet --no-pager' || true
echo ""
echo "Checking kubelet logs on Jetson:"
ssh "${JETSON_USER}@${JETSON_IP}" 'sudo journalctl -u kubelet --no-pager -n 20' || true
# Cleanup
rm -rf "${TMPDIR}"
echo -e "${RED}πŸ’₯ Jetson Nano cluster join process FAILED!${NC}"
exit 1
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment