Skip to content

Instantly share code, notes, and snippets.

@hrzbrg
Forked from andrew-blake/raid_ephemeral.sh
Last active August 29, 2015 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hrzbrg/708ce96ff613988b9afc to your computer and use it in GitHub Desktop.
Save hrzbrg/708ce96ff613988b9afc to your computer and use it in GitHub Desktop.
#!/bin/bash -ue
#
# this script will attempt to detect any ephemeral drives on an EC2 node and create a RAID-0 stripe.
# It should be run once to setup the RAID, and automatically mount on reboot without further intervention.
#
# Beware, This script is NOT fully idempotent.
#
mount_point=${1:-"/mnt/storage"}
METADATA_URL_BASE="http://169.254.169.254/latest"
INSTANCEID=$(wget -q -O - ${METADATA_URL_BASE}/meta-data/instance-id)
# Configure Raid - take into account xvdb or sdb
root_drive=$(df -h | grep -v grep | awk 'NR==2{print $1}')
if [ "${root_drive}" == "/dev/xvda1" ]; then
echo "Detected 'xvd' drive naming scheme (root: ${root_drive})"
DRIVE_SCHEME='xvd'
else
echo "Detected 'sd' drive naming scheme (root: ${root_drive})"
DRIVE_SCHEME='sd'
fi
# figure out how many ephemerals we have by querying the metadata API, and then:
# - convert the drive name returned from the API to the hosts DRIVE_SCHEME, if necessary
# - verify a matching device is available in /dev/
drives=""
ephemeral_count=0
ephemerals=$(curl --silent ${METADATA_URL_BASE}/meta-data/block-device-mapping/ | grep ephemeral)
for e in ${ephemerals}; do
echo "Probing ${e} .."
device_name=$(curl --silent ${METADATA_URL_BASE}/meta-data/block-device-mapping/${e})
# might have to convert 'sdb' -> 'xvdb'
device_name=$(echo ${device_name} | sed "s/sd/$DRIVE_SCHEME/")
device_path="/dev/${device_name}"
echo "Converting device name from ${device_name} to ${device_path}"
# test that the device actually exists since you can request more ephemeral drives than are available
# for an instance type and the meta-data API will happily tell you it exists when it really does not.
if [ -b ${device_path} ]; then
echo "Detected ephemeral disk: ${device_path}"
drives="$drives ${device_path}"
ephemeral_count=$((ephemeral_count + 1 ))
else
echo "Ephemeral disk ${e}, ${device_path} is not present. skipping"
fi
done
# need at least one disk
if [ "${ephemeral_count}" = 0 ]; then
echo "No ephemeral disk detected. exiting"
exit 0
fi
# check for existing RAID
if [[ -e /dev/md127 ]]
then
echo "Existing RAID array detected. exiting"
exit 0
fi
# unmount any mounted ephemeral disks
for drive in ${drives}; do
if [[ $(cat /proc/mounts | grep ${drive}) ]]
then
umount ${drive}
fi
done
# umount mount point if mounted
if [[ $(cat /proc/mounts | grep ${mount_point}) ]]
then
umount ${mount_point}
fi
# create the new mount point if needed
if [[ ! -e ${mount_point} ]]
then
mkdir -p ${mount_point}
fi
# overwrite first few blocks in case there is a filesystem, otherwise mdadm will prompt for input
for drive in ${drives}; do
dd if=/dev/zero of=${drive} bs=4096 count=1024
done
# Create /dev/md127
partprobe || true
mdadm --create --verbose /dev/md127 --level=0 -c256 --force --raid-devices=${ephemeral_count} ${drives}
echo DEVICE $drives | tee /etc/mdadm.conf
mdadm --detail --scan | tee -a /etc/mdadm.conf
blockdev --setra 65536 /dev/md127
# Tuning for ec2 instance-store vols: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html#disk-performance
#
# "When configuring software-based RAID, make sure to change the minimum reconstruction speed"
echo $((30*1024)) > /proc/sys/dev/raid/speed_limit_min
#
# "Because of the way that Amazon EC2 virtualizes disks, the first write to any location on a standard instance store
# volume performs more slowly than subsequent writes. For most applications, amortizing this cost over the lifetime of
# the instance is acceptable. However, if you require high disk performance, we recommend that you pre-warm your drives
# by writing once to every drive location before production use."
#
# "Note: The I2 high I/O instance type uses direct-attached solid state drives that provide maximum performance at launch
# time, without pre-warming."
#
# Pre-warm instance-store vols
# - This is VERY SLOW
# - Repeat until per-block write performance stabilizes across runs (?)
# - Try multiple instances to detect and throw out bad apples...
#time pv -terb /dev/zero -s "`blockdev --getsize64 /dev/md127`" | dd of=/dev/md127 bs=1M || true
# Create + mount fs
mkfs -t ext4 /dev/md127
# enable writeback mode, this mode will typically provide the best ext4 performance
tune2fs -o journal_data_writeback /dev/md127
# delete has_journal option
tune2fs -O ^has_journal /dev/md127
# required fsck
e2fsck -p -f /dev/md127
# check fs options
# dumpe2fs /dev/md127 | less
mount -t ext4 -o noatime /dev/md127 ${mount_point}
# Remove old /mnt (xvdb/sdb) entry from fstab
chmod 777 /etc/fstab
sed -i "/${DRIVE_SCHEME}b/d" /etc/fstab
# get udid of new raid
udid=$(blkid | grep md127 | awk -F ' ' '{print $2}' | tr -d \")
# Make raid appear on reboot
# only insert once into fstab
fstab_entry="${udid} ${mount_point} ext4 defaults,noatime,nofail 0 0"
if ! grep -q "${fstab_entry}" /etc/fstab
then
echo "${fstab_entry}" | tee -a /etc/fstab
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment