Skip to content

Instantly share code, notes, and snippets.

@c4milo
Created March 19, 2024 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save c4milo/9c34c302b3e1403a3a7d20b60609397d to your computer and use it in GitHub Desktop.
Save c4milo/9c34c302b3e1403a3a7d20b60609397d to your computer and use it in GitHub Desktop.
#!/bin/env bash
# Before contributing to this file, please make sure your IDE has
# the Shellcheck plugin installed.
# It is also important to keep this script under 1MB since we store it
# in a k8s configmap.
set -o errexit
set -o nounset
set -o pipefail
set -x
# shellcheck disable=SC2034
DEBIAN_FRONTEND=noninteractive
MOUNT_POINT=/var/lib/redpanda
MOUNT_UNIT=$(/usr/bin/systemd-escape -p --suffix=mount "$MOUNT_POINT")
RAID_CHUNK_SIZE=4 #kibibytes
RAID_DEVICE=/dev/md0
# Ensure required packages are installed.
# The script uses only packages already installed in the OS image
# to avoid running apt-get update here to make the bootstrap faster.
function check_installed_packages() {
echo "Checking required packages are installed."
apt-get install -y curl mdadm util-linux xfsprogs
# TODO: remove comments once Redpanda's Debian repo is fixed.
# See: https://redpandadata.slack.com/archives/C02LZGSS66M/p1709590279579899
# echo "Installing redpanda APT package."
curl -1sLf 'https://dl.redpanda.com/public/redpanda/setup.deb.sh' | bash
apt-get update -y
apt-get install -y redpanda
}
function setup_mount_unit() {
local device="$1"
local mount_point="$2"
local device_uuid=""
device_uuid=$(blkid --match-tag UUID --output value "$device")
echo "Creating systemd mount unit $MOUNT_UNIT to mount $device at ${mount_point}."
tee "/etc/systemd/system/${MOUNT_UNIT}" <<-EOF
[Unit]
Description=Redpanda Data Directory
Before=kubelet.service
After=local-fs.target
DefaultDependencies=no
[Mount]
What=/dev/disk/by-uuid/${device_uuid}
Where=${mount_point}
Type=xfs
Options=defaults,noatime,nodiratime
[Install]
WantedBy=local-fs.target
EOF
systemctl daemon-reload
systemctl enable --now "${MOUNT_UNIT}"
systemctl start "${MOUNT_UNIT}"
}
function tune_node() {
echo "Running redpanda autotuner."
rpk mode prod && rpk redpanda tune all -r $MOUNT_POINT
apt purge -y redpanda
# Since rpk does not tune inotify yet
# ref: https://github.com/redpanda-data/redpanda/issues/14588
# Some of these may be also set by AKS in /etc/sysctl.d/60-CIS.conf
# and /etc/sysctl.d/999-sysctl-aks.conf. So, we want to make sure ours
# have the highest priority.
tee /etc/sysctl.d/9999-redpanda.conf <<-EOF
fs.inotify.max_queued_events = 16384
fs.inotify.max_user_instances = 1024
fs.inotify.max_user_watches = 1048576
EOF
systemctl restart systemd-sysctl.service
}
function maybe_format_xfs() {
local device="$1"
if xfs_info "$device" >/dev/null 2>&1; then
echo "Device $device is already formatted as XFS. Skipping."
else
echo "Formatting device $device as XFS using 4K block size..."
mkfs.xfs -b size=4096 -K "$device"
fi
}
function maybe_setup_raid() {
if mdadm --detail $RAID_DEVICE >/dev/null 2>&1; then
echo "$RAID_DEVICE device already exist. Skipping."
return 0
fi
for device in "${SSD_LIST[@]}"; do
discard=$(cat "/sys/block/$(basename "${device}")/queue/discard_granularity")
if [[ $discard -ne 0 ]]; then
echo "Discarding $device blocks..."
blkdiscard "$device"
fi
echo "Wiping partition-table signatures from $device..."
wipefs -a "$device"
done
udevadm settle
echo "Creating RAID0 array $RAID_DEVICE from ${SSD_LIST[*]}."
# shellcheck disable=SC2048,SC2086
mdadm --create $RAID_DEVICE --verbose --force --run \
--level=0 \
--homehost any \
--chunk ${RAID_CHUNK_SIZE} \
--raid-devices="$SSD_COUNT" ${SSD_LIST[*]}
echo "RAID0 array $RAID_DEVICE has been created using ${SSD_LIST[*]}."
echo "Wiping partition-table signatures from $RAID_DEVICE..."
wipefs -a "$RAID_DEVICE"
udevadm settle
# update mdadm.conf and initramfs so the RAID device is correctly
# re-assembled on machine reboots or cloud provider host migrations.
echo "Updating initramfs image..."
mdadm --detail --scan >>/etc/mdadm/mdadm.conf
update-initramfs -u
}
function main() {
if systemctl is-active --quiet "$MOUNT_UNIT"; then
echo "$MOUNT_POINT is properly mounted. No disk setup required."
echo -e "\n$(find $MOUNT_POINT -maxdepth 1 -ls)\n"
exit 0
fi
# Check if provisioning already happened and Local SSD(s) are mounted.
check_installed_packages
udevadm settle
mapfile -t SSD_LIST < <(lsblk -lpo NAME,MODEL | grep "Microsoft NVMe Direct Disk" | cut -d " " -f 1 || true)
SSD_COUNT=${#SSD_LIST[@]}
case $SSD_COUNT in
"0")
echo "Local NVMe SSD disk(s) not found. Exiting."
exit 1
;;
"1")
DEVICE="${SSD_LIST[0]}"
echo "Found single Local NVMe SSD: $DEVICE."
;;
*)
DEVICE=$RAID_DEVICE
echo "Found $SSD_COUNT Local NVMe SSD disks: ${SSD_LIST[*]}."
maybe_setup_raid
;;
esac
udevadm settle
maybe_format_xfs "$DEVICE"
udevadm settle
setup_mount_unit "$DEVICE" "$MOUNT_POINT"
# Finally tune the node using rpk tuners
tune_node
}
# Execution flow starts here
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment