Skip to content

Instantly share code, notes, and snippets.

@chirauki
Last active March 29, 2024 15:52
Show Gist options
  • Save chirauki/96ad256e0dbe0d779541126cab9146d9 to your computer and use it in GitHub Desktop.
Save chirauki/96ad256e0dbe0d779541126cab9146d9 to your computer and use it in GitHub Desktop.
KVM migration script
#!/usr/bin/env python
import logging
import argparse
import libvirt
import sys
import os
import subprocess
import paramiko
import xml.etree.ElementTree as ET
from urlparse import urlparse
parser = argparse.ArgumentParser(description='KVM migration script')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--evacuate', dest='evacuate', action='store_true')
group.add_argument('--domain', type=str, help='domain name to migrate')
parser.add_argument('--is-shared', dest='sharedds', action='store_true')
parser.set_defaults(sharedds=False)
parser.add_argument('--dest', type=str, help='qemu destination uri', required=True)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
conn = None
def connect_libvirt(uri='qemu:///system'):
conn = libvirt.open(uri)
return conn
def lookup_domain(domain_name, uri='qemu:///system'):
# Lookup the domain
conn = connect_libvirt(uri)
if conn == None:
logger.info('Connection to libvirt at %s failed!' % uri)
sys.exit(1)
try:
domain = conn.lookupByName(domain_name)
return domain
except:
logger.error('Failed to lookup domain "%s" on "%s" Does it exist?' % (domain_name, uri))
sys.exit(1)
def list_domains(uri='qemu:///system'):
conn = connect_libvirt(uri)
if conn == None:
logger.info('Connection to libvirt at %s failed!' % uri)
sys.exit(1)
try:
domains = conn.listAllDomains(0)
return domains
except:
logger.error('Failed to list domains in %s' % uri)
sys.exit(1)
def create_missing_diks_on_dest(domain, dest_uri):
xml = ET.fromstring(domain.XMLDesc(0))
disks = xml.findall('./devices/disk')
disk_info = []
for disk in disks:
diskfile = disk.find("source").attrib.get("file")
logger.info('VM %s has disk %s' % (domain.name(), diskfile))
# Check if file exists in remote
o = urlparse(dest_uri)
ssh_host = o.hostname
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(ssh_host)
ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command('test -f %s' % diskfile)
ret_code = ssh_stdout.channel.recv_exit_status()
if ret_code == 0:
logger.info('Disk "%s" already exists in destination' % diskfile)
else:
inf = create_missing_disk(ssh, diskfile)
if inf == None:
logger.error('Failed to create disk %s at %s!' % (diskfile, ssh_host))
return None
else:
logger.info('Created disk %s at destination host %s' % (diskfile, ssh_host))
disk_info.append(inf)
return disk_info
def copy_disks_to_dest(domain, dest_uri):
xml = ET.fromstring(domain.XMLDesc(0))
disks = xml.findall('./devices/disk')
disk_info = []
for disk in disks:
diskfile = disk.find("source").attrib.get("file")
logger.info('VM %s has disk %s' % (domain.name(), diskfile))
# Check if file exists in remote
o = urlparse(dest_uri)
ssh_host = o.hostname
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(ssh_host)
sftp = paramiko.SFTPClient.from_transport(ssh.get_transport())
try:
sftp.put(diskfile, diskfile)
disk_info.append(diskfile)
except Exception, e:
logger.error('Could not copy disk %s to host %s' % (diskfile, ssh_host), exc_info=True)
return None
return disk_info
def create_missing_disk(ssh_client, disk_path):
ssh_stdin, ssh_stdout, ssh_stderr = ssh_client.exec_command('test -f %s' % disk_path)
ret_code = ssh_stdout.channel.recv_exit_status()
if not ret_code == 0:
# disk does not exist, create
vol_info = get_disk_info(disk_path)
if vol_info is not None:
disk_size = vol_info['size']
disk_format = vol_info['format']
logger.info('Disk %s has size %s and format %s' % (disk_path, disk_size, disk_format))
ssh_stdin, ssh_stdout, ssh_stderr = ssh_client.exec_command('qemu-img create %s -f %s %s' % (disk_path, disk_format, disk_size))
ret_code = ssh_stdout.channel.recv_exit_status()
if not ret_code == 0:
return None
else:
return vol_info
def get_disk_info(disk_path, uri='qemu:///system'):
disk_dir = os.path.dirname(disk_path)
disk_vol = os.path.basename(disk_path)
pool_xml = """<pool type='dir'>
<name>migscripttmp</name>
<target>
<path>%s</path>
</target>
</pool>""" % disk_dir
conn = connect_libvirt(uri)
# Check if a pool exists holding the volume
pools = conn.listAllStoragePools(0)
pool = None
for pool in pools:
poolxml = ET.fromstring(pool.XMLDesc(0))
pooltype = poolxml.find('pool').attrib.get('type')
if pooltype == 'dir':
poolpath = poolxml.find('target').find('path').text
if poolpath == disk_dir:
logger.info('Pool %s points to the same dir as disk %s. Using it.' % (pool.name(), disk_vol))
break
else:
logger.info('Skipping pool %s as it is not of type "dir"' % pool.name())
if pool == None:
logger.info('No usable pool found. Creating temporary pool.')
try:
pool = conn.storagePoolLookupByName('migscripttmp')
pool.destroy()
pool.undefine()
except:
logger.info('Storage pool migscripttmp not found. Creating.')
pool = conn.storagePoolDefineXML(pool_xml, 0)
if pool == None:
logger.error('Could not create migscripttmp pool!')
return None
pool.create(0)
pool.refresh()
vol = pool.storageVolLookupByName(disk_vol)
if vol == None:
# Could not find disk
return None
else:
volxml = ET.fromstring(vol.XMLDesc(0))
disk_size = volxml.find('capacity').text
disk_format = volxml.find('target').find('format').attrib.get('type')
return {'size': disk_size, 'format': disk_format, 'path': disk_path}
def migrate_live_domain(domain, dest_uri, shared):
# Get the disks
disk_info = []
if not shared:
disk_info = create_missing_diks_on_dest(domain, dest_uri)
if disk_info == None:
logger.error('Some disks failed to migrate for domain %s' % domain.name())
logger.error('Check for cleanup on disks.' % domain.name())
return None
flags = libvirt.VIR_MIGRATE_LIVE | libvirt.VIR_MIGRATE_PERSIST_DEST | libvirt.VIR_MIGRATE_UNDEFINE_SOURCE | libvirt.VIR_MIGRATE_NON_SHARED_DISK
else:
flags = libvirt.VIR_MIGRATE_LIVE | libvirt.VIR_MIGRATE_PERSIST_DEST | libvirt.VIR_MIGRATE_UNDEFINE_SOURCE
dest_con = connect_libvirt(dest_uri)
try:
new_domain = domain.migrate(dest_con, flags, None, None, 0)
logger.info('Domain %s successfully migrated to %s' % (domain.name(), dest_uri))
except Exception, e:
logger.error('Domain %s failed to migrate to %s' % (domain.name(), dest_uri), exc_info=True)
logger.info('Retrying offline migration of domain %s' % domain.name())
domain.destroy()
migrate_offline_domain(domain, dest_uri, shared, True)
def migrate_offline_domain(domain, dest_uri, shared, poweron=False):
if not shared:
ret = copy_disks_to_dest(domain, dest_uri)
if ret == None:
logger.error('Could not copy disks for domain %s to host %s' % (domain.name(), dest_uri))
sys.exit(1)
domxml = domain.XMLDesc(0)
dest_con = connect_libvirt(dest_uri)
new_domain = dest_con.defineXML(domxml)
if new_domain == None:
logger.error('Could not create domain %s in libvirt uri %s' % (domain.name(), dest_uri))
else:
domain.undefine()
if poweron:
new_domain.create()
def migrate_domain(domain_name, dest_uri, shared):
# Lookup the domain
domain = lookup_domain(domain_name)
if domain.isActive():
migrate_live_domain(domain, dest_uri, shared)
else:
migrate_offline_domain(domain, dest_uri, shared)
def evacuate_host(dest_uri, shared):
domains = list_domains()
for domain in domains:
logger.info('Starting migration for domain %s' % domain.name())
migrate_domain(domain.name(), dest_uri, shared)
logger.info('Finished host evacuation.')
if __name__ == "__main__":
args = parser.parse_args()
if args.evacuate:
# Evacuate
evacuate_host(args.dest, args.sharedds)
else:
# Migrate one domain.
migrate_domain(args.domain, args.dest, args.sharedds)
quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment