Skip to content

Instantly share code, notes, and snippets.

@mattmattox
Created January 4, 2024 22:03
Show Gist options
  • Save mattmattox/c2bd37b5f5c49ef97f648acf6587861c to your computer and use it in GitHub Desktop.
Save mattmattox/c2bd37b5f5c49ef97f648acf6587861c to your computer and use it in GitHub Desktop.
#!/bin/bash
while getopts "c:h" opt; do
case $opt in
c)
CLUSTER="${OPTARG}"
;;
h)
help && exit 0
;;
:)
echo "Option -$OPTARG requires an argument."
exit 1
;;
*)
help && exit 0
esac
done
if [[ -z "${CLUSTER}" ]]; then
echo "Please specify a cluster name."
exit 1
fi
export KUBECONFIG=~/.kube/mattox/${CLUSTER}
kubeconfig=~/.kube/mattox/${CLUSTER}
cd ~/scripts/rolling-patching/
check_ssh() {
echo "Checking ${server}"
until ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${server} 'uptime' > /dev/null
do
echo "Trying again..."
sleep 1
done
}
echo "Starting patching..."
for server in `kubectl --insecure-skip-tls-verify get nodes -o name | awk -F '/' '{print $2}'`
do
i=0
if ping -c 1 $server
then
echo "Server is pingable..."
echo "Draining node..."
#kubectl --insecure-skip-tls-verify --kubeconfig ${kubeconfig} cordon ${server}
kubectl --kubeconfig ${kubeconfig} drain --delete-emptydir-data --ignore-daemonsets ${server}
echo "Running apt update and upgrade"
~/scripts/prep-a-server ${server}
echo "Sleeping for 60 seconds..."
sleep 60
check_ssh
echo "Running do-release-upgrade"
ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${server} 'sed "s/Prompt=.*/Prompt=normal/g" /etc/update-manager/release-upgrades; export DEBIAN_FRONTEND=noninteractive; do-release-upgrade -f DistUpgradeViewNonInteractive; reboot'
echo "Sleeping for 60 seconds..."
sleep 60
check_ssh
echo "Uncordon node..,"
kubectl --insecure-skip-tls-verify --kubeconfig ${kubeconfig} uncordon ${server}
else
echo "Skipping..."
fi
done
#!/bin/bash
function wait_for_ssh {
local host=$1
local port=${2:-22}
local retries=${3:-900}
local interval=${4:-1}
local timeout=$(( retries * interval ))
local i=0
while [ $i -lt $retries ]; do
echo "Trying to connect to root@$host..."
ssh -q -o ConnectTimeout=$interval -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.ssh/id_rsa root@$host exit >/dev/null 2>&1
if [ $? -eq 0 ]; then
echo "SSH is now available for root@$host."
return 0
fi
sleep $interval
i=$(( i + 1 ))
done
echo "Timed out waiting for SSH to become available on $host after ${timeout}s."
return 1
}
function fix_dns {
local host=$1
local port=${2:-22}
local user=${3:-root}
echo "Configuring DNS settings..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" $user@$host /bin/bash << EOF
systemctl disable systemd-resolved.service
systemctl stop systemd-resolved
unlink /etc/resolv.conf
rm /etc/resolv.conf
echo "search support.tools" > /etc/resolv.conf
echo "nameserver 1.1.1.1" >> /etc/resolv.conf
echo "nameserver 1.0.0.1" >> /etc/resolv.conf
EOF
return 0
}
function check_and_copy_ssh_key {
local host=$1
local user=$2
local keyfile=$3
echo "Checking SSH availability for $user@$host..."
ssh -q -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i "$keyfile" $user@$host exit >/dev/null 2>&1
if [ $? -eq 0 ]; then
echo "SSH is available for $user@$host."
else
echo "SSH authentication failed for $user@$host. Trying with ubuntu user..."
ssh -q -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i "$keyfile" ubuntu@$host exit >/dev/null 2>&1
if [ $? -eq 0 ]; then
echo "SSH authentication succeeded with ubuntu user. Copying SSH key to root..."
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@$host "sudo cp /home/ubuntu/.ssh/authorized_keys /root/.ssh/"
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@$host "sudo chown root:root /root/.ssh/authorized_keys"
echo "SSH key copied to root user."
else
echo "SSH authentication failed for both root and ubuntu users on $host."
fi
fi
}
if [[ -z $1 ]]; then
echo "Missing node IP."
exit 1
fi
if [[ ! -z $2 ]]; then
echo "Skipping do-release-upgrade"
skip=1
fi
echo "Preparing node $1"
firstrun=true
echo "Waiting for SSH..."
wait_for_ssh $1
firstrun=false
echo "Resetting SSH host keys if needed..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" root@"$1" /bin/bash << EOF
if [[ "$(md5sum /etc/ssh/ssh_host_rsa_key.pub | awk '{print $1}')" == "a94dbf9ac63ed41c4acee41dc920998a" ]]; then
echo "Needed to recreate SSH host keys"
rm /etc/ssh/ssh_host_*
ssh-keygen -A
reboot
fi
EOF
fix_dns $1
check_and_copy_ssh_key $1 root ~/.ssh/id_rsa
echo "Copying over timesyncd.conf..."
scp -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" ~/scripts/timesyncd.conf root@"$1":/etc/systemd/timesyncd.conf
echo "Updating Server..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" root@"$1" /bin/bash << EOF
rm /etc/apt/apt.conf.d/00aptproxy
sed -i -e 's/Prompt=.*/Prompt=normal/g' /etc/update-manager/release-upgrades
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get -o Dpkg::Options::="--force-confold" -o Dpkg::Options::=--force-confdef upgrade -q -y --allow-downgrades --allow-remove-essential --allow-change-held-packages
apt-get -o Dpkg::Options::="--force-confold" -o Dpkg::Options::=--force-confdef dist-upgrade -q -y --allow-downgrades --allow-remove-essential --allow-change-held-packages
EOF
echo "Syncing time..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" root@"$1" /bin/bash << EOF
apt install ntpdate -y
timedatectl set-ntp on
timedatectl set-timezone America/Chicago
EOF
echo "Installing standard packages..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" root@"$1" /bin/bash << EOF
apt install -y \
apt-transport-https \
ca-certificates \
curl \
wget \
software-properties-common \
git \
htop \
iotop \
iftop \
nload \
sysstat \
nmon \
nfs-common \
open-iscsi \
net-tools \
tcpdump \
dnsutils \
ceph-common
EOF
echo "Setting up sysctl..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" root@"$1" /bin/bash << EOF
echo "fs.inotify.max_user_instances=8192" > /etc/sysctl.d/20-inotify.conf
echo "fs.inotify.max_user_watches=524288" >> /etc/sysctl.d/20-inotify.conf
echo "fs.inotify.max_queued_events=524288" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_max_syn_backlog=2048" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_syncookies=1" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_tw_reuse=1" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_fin_timeout=30" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_keepalive_time=1200" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.ip_local_port_range=1024 65535" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_max_tw_buckets=1440000" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_max_orphans=3276800" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_mem=786432 1048576 26777216" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_rmem=4096 87380 4194304" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_wmem=4096 65536 4194304" >> /etc/sysctl.d/20-inotify.conf
echo "net.core.somaxconn=4096" >> /etc/sysctl.d/20-inotify.conf
echo "net.core.netdev_max_backlog=2500" >> /etc/sysctl.d/20-inotify.conf
echo "net.core.rmem_max=4194304" >> /etc/sysctl.d/20-inotify.conf
echo "net.core.wmem_max=4194304" >> /etc/sysctl.d/20-inotify.conf
echo "net.core.rmem_default=262144" >> /etc/sysctl.d/20-inotify.conf
echo "net.core.wmem_default=262144" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_slow_start_after_idle=0" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_timestamps=0" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_synack_retries=2" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_syn_retries=2" >> /etc/sysctl.d/20-inotify.conf
echo "net.ipv4.tcp_tw_recycle=1" >> /etc/sysctl.d/20-inotify.conf
EOF
echo "Disabling ip checksum offloading..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" root@"$1" /bin/bash << EOF
ethtool -K eth0 tx-checksum-ip-generic off
ethtool -K eth0 tx off rx off
ethtool -K ens160 tx-checksum-ip-generic off
ethtool -K ens160 tx off rx off
ethtool -K ens192 tx-checksum-ip-generic off
ethtool -K ens192 tx off rx off
ethtool -K ens224 tx-checksum-ip-generic off
ethtool -K ens224 tx off rx off
ethtool -K flannel.0 tx-checksum-ip-generic off
ethtool -K flannel.0 tx off rx off
ethtool -K flannel.1 tx-checksum-ip-generic off
ethtool -K flannel.1 tx off rx off
EOF
echo "Rebooting..."
ssh -o "StrictHostKeyChecking=no" -o "UserKnownHostsFile=/dev/null" root@"$1" 'reboot'
echo "Sleeping..."
sleep 60
echo "Waiting for SSH..."
wait_for_ssh $1
fix_dns $1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment