Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Ceph recovery
#!/bin/bash
set -eu -o pipefail
case $(hostname) in
server1)
# FIXME:
# OSD_ID_LIST=30
# OSD_ID_LIST="0 2 3 4 5 18 20 28 29 30 31"
echo "Error: server1 has been finished." >&2
exit 1
;;
server2)
# FIXME:
# OSD_ID_LIST="6 7 8 9 10 11 32 33 34"
echo "Error: server2 has been finished." >&2
exit 1
;;
server3)
# FIXME:
#OSD_ID_LIST="12"
OSD_ID_LIST="13 14 15 16 17 23 24 25 35 36 37"
;;
*)
echo "Error: this is an unsupported host." >&2
exit 1
;;
esac
if ! mountpoint /srv/backup &>/dev/null ; then
echo "Error: /srv/backup is not a mountpoint." >&2
exit 1
fi
sudo mkdir -p /srv/backup/ceph_recovery
sudo chown root:synpro /srv/backup/ceph_recovery
sudo chmod 770 /srv/backup/ceph_recovery
sudo mkdir -p /mnt/ceph-recovery
for OSD_ID in ${OSD_ID_LIST} ; do
echo "Executing for OSD ID ${OSD_ID}"
if ! sudo ceph osd tree down | egrep -q "osd.${OSD_ID} \s+down" ; then
echo "Error: ceph OSD ${OSD_ID} is not down, Exiting." >&2
exit 1
fi
UUID="$(sudo ceph --format json osd dump | jq -r ".osds[] | select(.osd==${OSD_ID}) | .uuid")"
RAW_DEVICE="$(readlink -f /dev/disk/by-partuuid/"${UUID}")"
BASE_DEV="$(basename "${RAW_DEVICE}")"
if [ -f "/srv/backup/ceph_recovery/${BASE_DEV}_${UUID}.dd" ] ; then
echo "NOTE: backup file for /srv/backup/ceph_recovery/${BASE_DEV}_${UUID}.dd exists, not overwriting."
else
echo "Creating backup file /srv/backup/ceph_recovery/${BASE_DEV}_${UUID}.dd"
sudo dd if=/dev/disk/by-partuuid/"${UUID}" of="/srv/backup/ceph_recovery/${BASE_DEV}_${UUID}.dd"
fi
if ! [ -r "/srv/backup/ceph_recovery/${BASE_DEV}_${UUID}.dd" ] ; then
echo "Error: backup file /srv/backup/ceph_recovery/${BASE_DEV}_${UUID}.dd does not exist." >&2
exit 1
fi
sudo dd if=/dev/zero of="${RAW_DEVICE}" || true
echo "Executing mkfs.xfs -f -i size=2048 -m uuid=${UUID} ${RAW_DEVICE}"
sudo mkfs.xfs -f -i size=2048 -d sunit=512 -d swidth=512 -m uuid="${UUID}" "${RAW_DEVICE}"
sudo mount "${RAW_DEVICE}" /mnt/ceph-recovery/
for file in activate.monmap active block_uuid bluefs ceph_fsid fsid keyring kv_backend magic mkfs_done ready require_osd_release systemd type whoami ; do
# NOTE - this path needs to be adjusted on each server accordingly!
sudo cp -p /mnt/ceph-data/var/lib/ceph/osd/ceph-34/"${file}" /mnt/ceph-recovery/
done
echo "Generating fsid file:"
echo "${UUID}" | sudo tee /mnt/ceph-recovery/fsid
echo "Generating whoami file:"
echo "${OSD_ID}" | sudo tee /mnt/ceph-recovery/whoami
KEY="$(sudo ceph auth get osd."${OSD_ID}" -f json 2>/dev/null | jq -r '.[] | .key')"
echo "Generating keyring file:"
echo "[osd.$OSD_ID]" | sudo tee /mnt/ceph-recovery/keyring
echo " key = ${KEY}" | sudo tee -a /mnt/ceph-recovery/keyring
BLOCK_DEVICE="$(sudo ceph osd metadata "${OSD_ID}" -f json | jq -r '.bluestore_bdev_partition_path')"
BLK_DEV_UUID="$(sudo blkid "${BLOCK_DEVICE}" -o value -s PARTUUID)"
echo "Generating block symlink:"
sudo ln -s /dev/disk/by-partuuid/${BLK_DEV_UUID} /mnt/ceph-recovery/block
echo "Generating block_uuid file:"
echo "${BLK_DEV_UUID}" | sudo tee /mnt/ceph-recovery/block_uuid
echo "Unmounting /mnt/ceph-recovery/"
sudo umount /mnt/ceph-recovery/
echo "Executing ceph-volume simple scan ${RAW_DEVICE}"
sudo ceph-volume simple scan "${RAW_DEVICE}" || true
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment