-
-
Save andrew-blake/9739a3b054fe1e733edf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -ue | |
# | |
# this script will attempt to detect any ephemeral drives on an EC2 node and create a RAID-0 stripe. | |
# It should be run once to setup the RAID, and automatically mount on reboot without further intervention. | |
# | |
# Beware, This script is NOT fully idempotent. | |
# | |
mount_point=${1:-"/mnt"} | |
METADATA_URL_BASE="http://169.254.169.254/latest" | |
DEBIAN_FRONTEND=noninteractive apt-get -y install mdadm curl xfsprogs | |
# Configure Raid - take into account xvdb or sdb | |
root_drive=`df -h | grep -v grep | awk 'NR==2{print $1}'` | |
if [ "${root_drive}" == "/dev/xvda1" ]; then | |
echo "Detected 'xvd' drive naming scheme (root: ${root_drive})" | |
DRIVE_SCHEME='xvd' | |
else | |
echo "Detected 'sd' drive naming scheme (root: ${root_drive})" | |
DRIVE_SCHEME='sd' | |
fi | |
# figure out how many ephemerals we have by querying the metadata API, and then: | |
# - convert the drive name returned from the API to the hosts DRIVE_SCHEME, if necessary | |
# - verify a matching device is available in /dev/ | |
drives="" | |
ephemeral_count=0 | |
ephemerals=$(curl --silent ${METADATA_URL_BASE}/meta-data/block-device-mapping/ | grep ephemeral) | |
for e in ${ephemerals}; do | |
echo "Probing ${e} .." | |
device_name=$(curl --silent ${METADATA_URL_BASE}/meta-data/block-device-mapping/${e}) | |
# might have to convert 'sdb' -> 'xvdb' | |
device_name=$(echo ${device_name} | sed "s/sd/$DRIVE_SCHEME/") | |
device_path="/dev/${device_name}" | |
echo "Converting device name from ${device_name} to ${device_path}" | |
# test that the device actually exists since you can request more ephemeral drives than are available | |
# for an instance type and the meta-data API will happily tell you it exists when it really does not. | |
if [ -b ${device_path} ]; then | |
echo "Detected ephemeral disk: ${device_path}" | |
drives="$drives ${device_path}" | |
ephemeral_count=$((ephemeral_count + 1 )) | |
else | |
echo "Ephemeral disk ${e}, ${device_path} is not present. skipping" | |
fi | |
done | |
# need at least one disk | |
if [ "${ephemeral_count}" = 0 ]; then | |
echo "No ephemeral disk detected. exiting" | |
exit 0 | |
fi | |
# check for existing RAID | |
if [[ -e /dev/md0 ]] | |
then | |
echo "Existing RAID array detected. exiting" | |
exit 0 | |
fi | |
# unmount any mounted ephemeral disks | |
for drive in ${drives}; do | |
if [[ $(cat /proc/mounts | grep ${drive}) ]] | |
then | |
umount ${drive} | |
fi | |
done | |
# umount mount point if mounted | |
if [[ $(cat /proc/mounts | grep ${mount_point}) ]] | |
then | |
umount ${mount_point} | |
fi | |
# create the new mount point if needed | |
if [[ ! -e ${mount_point} ]] | |
then | |
mkdir -p ${mount_point} | |
fi | |
# overwrite first few blocks in case there is a filesystem, otherwise mdadm will prompt for input | |
for drive in ${drives}; do | |
dd if=/dev/zero of=${drive} bs=4096 count=1024 | |
done | |
# Prepare mount point | |
if [[ ! -e /mnt/md0 ]] | |
then | |
mkdir /mnt/md0 | |
fi | |
# Create /dev/md0 | |
partprobe || true | |
mdadm --create --verbose /dev/md0 --level=0 -c256 --force --raid-devices=${ephemeral_count} ${drives} | |
blockdev --setra 65536 /dev/md0 | |
# configure mdadm.conf | |
/bin/cat<<EOF > /etc/mdadm/mdadm.conf | |
# mdadm.conf | |
# | |
# Please refer to mdadm.conf(5) for information about this file. | |
# | |
# by default (built-in), scan all partitions (/proc/partitions) and all | |
# containers for MD superblocks. alternatively, specify devices to scan, using | |
# wildcards if desired. | |
DEVICE partitions containers | |
# auto-create devices with Debian standard permissions | |
CREATE owner=root group=disk mode=0660 auto=yes | |
# automatically tag new arrays as belonging to the local system | |
HOMEHOST <system> | |
# instruct the monitoring daemon where to send mail alerts | |
MAILADDR root@localhost | |
# definitions of existing MD arrays | |
EOF | |
mdadm -Es >> /etc/mdadm/mdadm.conf | |
# Tuning for ec2 instance-store vols: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html#disk-performance | |
# | |
# "When configuring software-based RAID, make sure to change the minimum reconstruction speed" | |
echo $((30*1024)) > /proc/sys/dev/raid/speed_limit_min | |
# | |
# "Because of the way that Amazon EC2 virtualizes disks, the first write to any location on a standard instance store | |
# volume performs more slowly than subsequent writes. For most applications, amortizing this cost over the lifetime of | |
# the instance is acceptable. However, if you require high disk performance, we recommend that you pre-warm your drives | |
# by writing once to every drive location before production use." | |
# | |
# "Note: The I2 high I/O instance type uses direct-attached solid state drives that provide maximum performance at launch | |
# time, without pre-warming." | |
# | |
# Pre-warm instance-store vols | |
# - This is VERY SLOW | |
# - Repeat until per-block write performance stabilizes across runs (?) | |
# - Try multiple instances to detect and throw out bad apples... | |
#time pv -terb /dev/zero -s "`blockdev --getsize64 /dev/md0`" | dd of=/dev/md0 bs=1M || true | |
# Create + mount fs | |
mkfs -t xfs /dev/md0 | |
#mount -t xfs -o noatime /dev/md0 ${mount_point} | |
# Remove old /mnt (xvdb/sdb) entry from fstab | |
chmod 777 /etc/fstab | |
sed -i "/${DRIVE_SCHEME}b/d" /etc/fstab | |
# Make raid appear on reboot | |
# only insert once into fstab | |
fstab_entry="/dev/md0 ${mount_point} xfs defaults,noatime,nobootwait 0 0" | |
if ! grep -q "${fstab_entry}" /etc/fstab | |
then | |
echo "${fstab_entry}" | tee -a /etc/fstab | |
fi | |
mount ${mount_point} | |
update-initramfs -u |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment