-
-
Save hrzbrg/708ce96ff613988b9afc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -ue | |
# | |
# this script will attempt to detect any ephemeral drives on an EC2 node and create a RAID-0 stripe. | |
# It should be run once to setup the RAID, and automatically mount on reboot without further intervention. | |
# | |
# Beware, This script is NOT fully idempotent. | |
# | |
mount_point=${1:-"/mnt/storage"} | |
METADATA_URL_BASE="http://169.254.169.254/latest" | |
INSTANCEID=$(wget -q -O - ${METADATA_URL_BASE}/meta-data/instance-id) | |
# Configure Raid - take into account xvdb or sdb | |
root_drive=$(df -h | grep -v grep | awk 'NR==2{print $1}') | |
if [ "${root_drive}" == "/dev/xvda1" ]; then | |
echo "Detected 'xvd' drive naming scheme (root: ${root_drive})" | |
DRIVE_SCHEME='xvd' | |
else | |
echo "Detected 'sd' drive naming scheme (root: ${root_drive})" | |
DRIVE_SCHEME='sd' | |
fi | |
# figure out how many ephemerals we have by querying the metadata API, and then: | |
# - convert the drive name returned from the API to the hosts DRIVE_SCHEME, if necessary | |
# - verify a matching device is available in /dev/ | |
drives="" | |
ephemeral_count=0 | |
ephemerals=$(curl --silent ${METADATA_URL_BASE}/meta-data/block-device-mapping/ | grep ephemeral) | |
for e in ${ephemerals}; do | |
echo "Probing ${e} .." | |
device_name=$(curl --silent ${METADATA_URL_BASE}/meta-data/block-device-mapping/${e}) | |
# might have to convert 'sdb' -> 'xvdb' | |
device_name=$(echo ${device_name} | sed "s/sd/$DRIVE_SCHEME/") | |
device_path="/dev/${device_name}" | |
echo "Converting device name from ${device_name} to ${device_path}" | |
# test that the device actually exists since you can request more ephemeral drives than are available | |
# for an instance type and the meta-data API will happily tell you it exists when it really does not. | |
if [ -b ${device_path} ]; then | |
echo "Detected ephemeral disk: ${device_path}" | |
drives="$drives ${device_path}" | |
ephemeral_count=$((ephemeral_count + 1 )) | |
else | |
echo "Ephemeral disk ${e}, ${device_path} is not present. skipping" | |
fi | |
done | |
# need at least one disk | |
if [ "${ephemeral_count}" = 0 ]; then | |
echo "No ephemeral disk detected. exiting" | |
exit 0 | |
fi | |
# check for existing RAID | |
if [[ -e /dev/md127 ]] | |
then | |
echo "Existing RAID array detected. exiting" | |
exit 0 | |
fi | |
# unmount any mounted ephemeral disks | |
for drive in ${drives}; do | |
if [[ $(cat /proc/mounts | grep ${drive}) ]] | |
then | |
umount ${drive} | |
fi | |
done | |
# umount mount point if mounted | |
if [[ $(cat /proc/mounts | grep ${mount_point}) ]] | |
then | |
umount ${mount_point} | |
fi | |
# create the new mount point if needed | |
if [[ ! -e ${mount_point} ]] | |
then | |
mkdir -p ${mount_point} | |
fi | |
# overwrite first few blocks in case there is a filesystem, otherwise mdadm will prompt for input | |
for drive in ${drives}; do | |
dd if=/dev/zero of=${drive} bs=4096 count=1024 | |
done | |
# Create /dev/md127 | |
partprobe || true | |
mdadm --create --verbose /dev/md127 --level=0 -c256 --force --raid-devices=${ephemeral_count} ${drives} | |
echo DEVICE $drives | tee /etc/mdadm.conf | |
mdadm --detail --scan | tee -a /etc/mdadm.conf | |
blockdev --setra 65536 /dev/md127 | |
# Tuning for ec2 instance-store vols: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html#disk-performance | |
# | |
# "When configuring software-based RAID, make sure to change the minimum reconstruction speed" | |
echo $((30*1024)) > /proc/sys/dev/raid/speed_limit_min | |
# | |
# "Because of the way that Amazon EC2 virtualizes disks, the first write to any location on a standard instance store | |
# volume performs more slowly than subsequent writes. For most applications, amortizing this cost over the lifetime of | |
# the instance is acceptable. However, if you require high disk performance, we recommend that you pre-warm your drives | |
# by writing once to every drive location before production use." | |
# | |
# "Note: The I2 high I/O instance type uses direct-attached solid state drives that provide maximum performance at launch | |
# time, without pre-warming." | |
# | |
# Pre-warm instance-store vols | |
# - This is VERY SLOW | |
# - Repeat until per-block write performance stabilizes across runs (?) | |
# - Try multiple instances to detect and throw out bad apples... | |
#time pv -terb /dev/zero -s "`blockdev --getsize64 /dev/md127`" | dd of=/dev/md127 bs=1M || true | |
# Create + mount fs | |
mkfs -t ext4 /dev/md127 | |
# enable writeback mode, this mode will typically provide the best ext4 performance | |
tune2fs -o journal_data_writeback /dev/md127 | |
# delete has_journal option | |
tune2fs -O ^has_journal /dev/md127 | |
# required fsck | |
e2fsck -p -f /dev/md127 | |
# check fs options | |
# dumpe2fs /dev/md127 | less | |
mount -t ext4 -o noatime /dev/md127 ${mount_point} | |
# Remove old /mnt (xvdb/sdb) entry from fstab | |
chmod 777 /etc/fstab | |
sed -i "/${DRIVE_SCHEME}b/d" /etc/fstab | |
# get udid of new raid | |
udid=$(blkid | grep md127 | awk -F ' ' '{print $2}' | tr -d \") | |
# Make raid appear on reboot | |
# only insert once into fstab | |
fstab_entry="${udid} ${mount_point} ext4 defaults,noatime,nofail 0 0" | |
if ! grep -q "${fstab_entry}" /etc/fstab | |
then | |
echo "${fstab_entry}" | tee -a /etc/fstab | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment