Skip to content

Instantly share code, notes, and snippets.

@fbettag
Last active August 5, 2018 18:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fbettag/59cbec4203bd280d0df25dc9de4f9c83 to your computer and use it in GitHub Desktop.
Save fbettag/59cbec4203bd280d0df25dc9de4f9c83 to your computer and use it in GitHub Desktop.
SmartOS Zone Migration script that takes care of installing the required image on the target compute node. Just keep this script on the headnode.
#!/bin/sh
# Franz Bettag <franz@bett.ag> - August 2018
# Script script migrates Zones/KVMs on Joyent SmartOS from one node to another.
# First it creates a new VM on the target from the same config as the original VM.
# It leaves out the nics section as to not run into any conflicts with the currently running machine.
# We then zfs send an intial snapshot of the original VM to the target.
# Then we shutdown the original VM and create another snapshot.
# After the last snapshot has been transferred, we update the new VM with nics and start it up.
# Cleanup of course.
#
if [ $# -ne 2 ]; then
echo "Usage: $0 <vm-uuid> <target-host-ip>"
exit 1
fi
vm_uuid=$1
target_ip=$2
zpool=zones
ssh_cmd="ssh -o StrictHostKeyChecking=no -i ~/.ssh/sdc.id_rsa"
ssh_target="${ssh_cmd} ${target_ip}"
tmp_path=/zones/archive
# first we find out on which compute node (uuid) the vm runs on
source_uuid=$(sdc-vmapi --no-headers /vms/${vm_uuid} | json .server_uuid)
if [ -z "${source_uuid}" ]; then
echo "Source server for VM ${vm_uuid} not found!"
exit 1
fi
echo "Source server VM: ${source_uuid}"
# then we find the ip of the source compute node
source_ip=$(sdc-cnapi --no-headers /servers/${source_uuid} | grep ip4addr | grep -v '""' | awk '{gsub(/[\",]/,""); print $2}')
if [ -z "${source_ip}" ]; then
echo "Source server for UUID ${source_uuid} not found!"
exit 1
fi
echo "Source server IP: ${source_ip}"
ssh_source="${ssh_cmd} ${source_ip}"
# here we look up the target compute node's uuid
target_uuid=$(${ssh_target} sysinfo | json .UUID)
if [ -z "${target_uuid}" ]; then
echo "${vm_uuid} on target ${target_uuid} not found!"
exit 1
fi
echo "Target server UUID: ${target_uuid}"
echo "Target server IP: ${target_ip}"
# find out the image_uuid of the vm
image_uuid=$(sdc-vmapi --no-headers /vms/${vm_uuid} | json .image_uuid)
if [ -z "${image_uuid}" ]; then
echo "Image for VM ${vm_uuid} not found!"
exit 1
fi
# ensure the image gets installed on the target compute node
out=$(sdc-imgadm enable ${image_uuid})
if [ $? -ne 0 ]; then
echo "Failed to find and enable image ${image_uuid}!"
echo $out
exit 1
fi
echo "Ensuring Image ${image_uuid} is on target"
image_import_url="/servers/${target_uuid}/ensure-image?image_uuid=${image_uuid}"
job_id=$(sdc-cnapi --no-headers "${image_import_url}" -X POST | json .id)
if [ -z "${job_id}" ]; then
echo "No task-id for image-import created!"
exit 1
fi
echo "Waiting for image-deployment on new target compute node"
j=$(sdc-cnapi --no-headers "/tasks/${job_id}/wait")
if [ `echo "$j" | grep -c finish` -gt 0 ]; then
if [ `echo "$j" | grep -c error` -gt 0 ]; then
echo "Failed to import image on target!"
sdc-cnapi --no-headers "/tasks/${job_id}" | awk '/"message":/ {print "{"$0"}"}' | json message
exit 1
fi
fi
disks=$(${ssh_source} "vmadm get ${vm_uuid} | json disks zfs_filesystem")
disks+=$(${ssh_source} "vmadm get ${vm_uuid} | json disks | json -a zfs_filesystem")
if [ -z "${disks}" ]; then
echo "No disks found.. huh?"
exit 1
fi
restoreAndExit() {
echo "Failed to migrate VM ${vm_uuid}, starting it back up on source"
${ssh_source} "vmadm start ${vm_uuid} && rm ${temp_prefix}*"
echo "Removing obsolete data for ${vm_uuid} on target"
for disk in $disks; do
${ssh_source} "zfs destroy -Rf ${disk}@first && zfs destroy -Rf ${disk}@last"
${ssh_target} "zfs destroy -Rf ${disk}"
done
${ssh_target} "vmadm delete ${vm_uuid}; rm ${temp_prefix}*"
exit 1
}
temp_prefix=${tmp_path}/${vm_uuid}
# initial snapshotting and transfer
for disk in $disks; do
disk=$(basename ${disk})
temp_name="${tmp_path}/${disk}-first.zfs.gz"
echo "Snapshotting disk ${disk}@first"
${ssh_source} "zfs snapshot -r ${zpool}/${disk}@first" || restoreAndExit
echo "Saving snapshot to ${temp_name}"
${ssh_source} "zfs send -Rp ${zpool}/${disk}@first | gzip > ${temp_name}" || restoreAndExit
echo "Copying snapshot to target ${target_ip}:${temp_name}"
${ssh_source} "scp -o StrictHostKeyChecking=no -i ~/.ssh/sdc.id_rsa ${temp_name} ${target_ip}:${temp_name}" || restoreAndExit
done
echo "Copying VM configuration to target ${target_ip}"
# create new vm without networking, as to not impact the running live vm
${ssh_source} vmadm get ${vm_uuid} | json -e 'nics = []' | ${ssh_target} vmadm create || exit 1
# stopping the freshly created vm and delete the datasets
${ssh_target} "vmadm stop ${vm_uuid} -F" || exit 1
for disk in $disks; do
${ssh_target} "zfs destroy -Rf ${disk}"
done
# initial import
for disk in $disks; do
disk=$(basename ${disk})
temp_name="${tmp_path}/${disk}-first.zfs.gz"
echo "Restoring snapshot ${disk}@first on target ${target_ip}"
${ssh_target} "gzcat ${temp_name} | zfs receive ${zpool}/${disk}" || restoreAndExit
# somehow smartos modifies the zone later on.. so we lock the zfs set
${ssh_target} "zfs set readonly=on ${zpool}/${disk}" || restoreAndExit
${ssh_target} "zfs rollback ${zpool}/${disk}@first" || restoreAndExit
done
# port over network config
nic_config="{\"add_nics\":$(${ssh_source} vmadm get ${vm_uuid} | json nics)}"
echo "${nic_config}" | ${ssh_target} vmadm update ${vm_uuid} || exit 1
# stop the original vm
echo "Stopping VM ${vm_uuid}"
${ssh_source} "vmadm stop ${vm_uuid}" || ${ssh_source} "vmadm stop ${vm_uuid}"
echo "Waiting for VM ${vm_uuid} to stop"
${ssh_source} "while :; do if [ \$(vmadm list -H uuid=${vm_uuid} | grep -c stopped) -gt 0 ]; then break; fi; sleep .25; done"
# snapshot again and transfer deltas
for disk in $disks; do
disk=$(basename ${disk})
temp_name="${tmp_path}/${disk}-last.zfs.gz"
echo "Snapshotting disk ${disk}@last"
${ssh_source} "zfs snapshot -r ${zpool}/${disk}@last" || restoreAndExit
echo "Saving snapshot to ${temp_name}"
${ssh_source} "zfs send -Rpi first ${zpool}/${disk}@last | gzip > ${temp_name}" || restoreAndExit
echo "Copying snapshot to target ${target_ip}:${temp_name}"
${ssh_source} "scp -i ~/.ssh/sdc.id_rsa ${temp_name} ${target_ip}:${temp_name}" || restoreAndExit
echo "Restoring snapshot ${disk}@last on target ${target_ip}"
${ssh_target} "gzcat ${temp_name} | zfs receive ${zpool}/${disk}" || restoreAndExit
# unlock the dataset again
${ssh_target} "zfs set readonly=off ${zpool}/${disk}" || restoreAndExit
# cleanup
echo "Removing obsolete snapshot ${disk}@first on source ${source_ip}"
${ssh_source} "zfs destroy -Rf ${zpool}/${disk}@first" || restoreAndExit
echo "Removing obsolete snapshot ${disk}@last on source ${source_ip}"
${ssh_source} "zfs destroy -Rf ${zpool}/${disk}@last" || restoreAndExit
echo "Removing obsolete snapshot ${disk}@first on target ${target_ip}"
${ssh_target} "zfs destroy -Rf ${zpool}/${disk}@first" || restoreAndExit
echo "Removing obsolete snapshot ${disk}@last on target ${target_ip}"
${ssh_target} "zfs destroy -Rf ${zpool}/${disk}@last" || restoreAndExit
done
echo "Starting VM ${vm_uuid} on target ${target_ip}"
${ssh_target} "vmadm start ${vm_uuid}" || restoreAndExit
echo "Removing obsolete VM-data from source ${source_ip}"
${ssh_source} "vmadm delete ${vm_uuid}; rm ${temp_prefix}*" || exit 1
${ssh_source} "imgadm vacuum -f" || exit 1
echo "Removing obsolete VM-data from target ${target_ip}"
${ssh_target} "rm ${temp_prefix}*" || exit 1
echo "Done!"
exit 0
@fbettag
Copy link
Author

fbettag commented Aug 3, 2018

latest version gets rid of the while loop and just calls the blocking wait api :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment