patch for virt/libvirt/driver.py
# Copyright 2010 United States Government as represented by the | |
# Administrator of the National Aeronautics and Space Administration. | |
# All Rights Reserved. | |
# Copyright (c) 2010 Citrix Systems, Inc. | |
# Copyright (c) 2011 Piston Cloud Computing, Inc | |
# Copyright (c) 2012 University Of Minho | |
# (c) Copyright 2013 Hewlett-Packard Development Company, L.P. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); you may | |
# not use this file except in compliance with the License. You may obtain | |
# a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | |
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | |
# License for the specific language governing permissions and limitations | |
# under the License. | |
""" | |
A connection to a hypervisor through libvirt. | |
Supports KVM, LXC, QEMU, UML, XEN and Parallels. | |
""" | |
import binascii | |
import collections | |
from collections import deque | |
import contextlib | |
import errno | |
import functools | |
import glob | |
import itertools | |
import operator | |
import os | |
import pwd | |
import shutil | |
import tempfile | |
import time | |
import uuid | |
from castellan import key_manager | |
import eventlet | |
from eventlet import greenthread | |
from eventlet import tpool | |
from lxml import etree | |
from os_brick import encryptors | |
from os_brick.encryptors import luks as luks_encryptor | |
from os_brick import exception as brick_exception | |
from os_brick.initiator import connector | |
from oslo_concurrency import processutils | |
from oslo_log import log as logging | |
from oslo_serialization import jsonutils | |
from oslo_service import loopingcall | |
from oslo_utils import encodeutils | |
from oslo_utils import excutils | |
from oslo_utils import fileutils | |
from oslo_utils import importutils | |
from oslo_utils import strutils | |
from oslo_utils import timeutils | |
from oslo_utils import units | |
from oslo_utils import uuidutils | |
import six | |
from six.moves import range | |
from nova.api.metadata import base as instance_metadata | |
from nova import block_device | |
from nova.compute import power_state | |
from nova.compute import task_states | |
from nova.compute import utils as compute_utils | |
import nova.conf | |
from nova.console import serial as serial_console | |
from nova.console import type as ctype | |
from nova import context as nova_context | |
from nova import exception | |
from nova.i18n import _ | |
from nova import image | |
from nova.network import model as network_model | |
from nova import objects | |
from nova.objects import diagnostics as diagnostics_obj | |
from nova.objects import fields | |
from nova.objects import migrate_data as migrate_data_obj | |
from nova.pci import manager as pci_manager | |
from nova.pci import utils as pci_utils | |
import nova.privsep.libvirt | |
import nova.privsep.path | |
from nova import utils | |
from nova import version | |
from nova.virt import block_device as driver_block_device | |
from nova.virt import configdrive | |
from nova.virt.disk import api as disk_api | |
from nova.virt.disk.vfs import guestfs | |
from nova.virt import driver | |
from nova.virt import firewall | |
from nova.virt import hardware | |
from nova.virt.image import model as imgmodel | |
from nova.virt import images | |
from nova.virt.libvirt import blockinfo | |
from nova.virt.libvirt import config as vconfig | |
from nova.virt.libvirt import firewall as libvirt_firewall | |
from nova.virt.libvirt import guest as libvirt_guest | |
from nova.virt.libvirt import host | |
from nova.virt.libvirt import imagebackend | |
from nova.virt.libvirt import imagecache | |
from nova.virt.libvirt import instancejobtracker | |
from nova.virt.libvirt import migration as libvirt_migrate | |
from nova.virt.libvirt.storage import dmcrypt | |
from nova.virt.libvirt.storage import lvm | |
from nova.virt.libvirt.storage import rbd_utils | |
from nova.virt.libvirt import utils as libvirt_utils | |
from nova.virt.libvirt import vif as libvirt_vif | |
from nova.virt.libvirt.volume import mount | |
from nova.virt.libvirt.volume import remotefs | |
from nova.virt import netutils | |
from nova.volume import cinder | |
libvirt = None | |
uefi_logged = False | |
LOG = logging.getLogger(__name__) | |
CONF = nova.conf.CONF | |
DEFAULT_FIREWALL_DRIVER = "%s.%s" % ( | |
libvirt_firewall.__name__, | |
libvirt_firewall.IptablesFirewallDriver.__name__) | |
DEFAULT_UEFI_LOADER_PATH = { | |
"x86_64": "/usr/share/OVMF/OVMF_CODE.fd", | |
"aarch64": "/usr/share/AAVMF/AAVMF_CODE.fd" | |
} | |
MAX_CONSOLE_BYTES = 100 * units.Ki | |
# The libvirt driver will prefix any disable reason codes with this string. | |
DISABLE_PREFIX = 'AUTO: ' | |
# Disable reason for the service which was enabled or disabled without reason | |
DISABLE_REASON_UNDEFINED = None | |
# Guest config console string | |
CONSOLE = "console=tty0 console=ttyS0 console=hvc0" | |
GuestNumaConfig = collections.namedtuple( | |
'GuestNumaConfig', ['cpuset', 'cputune', 'numaconfig', 'numatune']) | |
InjectionInfo = collections.namedtuple( | |
'InjectionInfo', ['network_info', 'files', 'admin_pass']) | |
libvirt_volume_drivers = [ | |
'iscsi=nova.virt.libvirt.volume.iscsi.LibvirtISCSIVolumeDriver', | |
'iser=nova.virt.libvirt.volume.iser.LibvirtISERVolumeDriver', | |
'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver', | |
#'drbd=nova.virt.libvirt.volume.drbd.LibvirtDRBDVolumeDriver', | |
'fake=nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver', | |
'rbd=nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver', | |
'sheepdog=nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver', | |
'nfs=nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver', | |
'smbfs=nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver', | |
'aoe=nova.virt.libvirt.volume.aoe.LibvirtAOEVolumeDriver', | |
'fibre_channel=' | |
'nova.virt.libvirt.volume.fibrechannel.' | |
'LibvirtFibreChannelVolumeDriver', | |
'gpfs=nova.virt.libvirt.volume.gpfs.LibvirtGPFSVolumeDriver', | |
'quobyte=nova.virt.libvirt.volume.quobyte.LibvirtQuobyteVolumeDriver', | |
'hgst=nova.virt.libvirt.volume.hgst.LibvirtHGSTVolumeDriver', | |
'scaleio=nova.virt.libvirt.volume.scaleio.LibvirtScaleIOVolumeDriver', | |
'disco=nova.virt.libvirt.volume.disco.LibvirtDISCOVolumeDriver', | |
'vzstorage=' | |
'nova.virt.libvirt.volume.vzstorage.LibvirtVZStorageVolumeDriver', | |
'veritas_hyperscale=' | |
'nova.virt.libvirt.volume.vrtshyperscale.' | |
'LibvirtHyperScaleVolumeDriver', | |
'storpool=nova.virt.libvirt.volume.storpool.LibvirtStorPoolVolumeDriver', | |
] | |
def patch_tpool_proxy(): | |
"""eventlet.tpool.Proxy doesn't work with old-style class in __str__() | |
or __repr__() calls. See bug #962840 for details. | |
We perform a monkey patch to replace those two instance methods. | |
""" | |
def str_method(self): | |
return str(self._obj) | |
def repr_method(self): | |
return repr(self._obj) | |
tpool.Proxy.__str__ = str_method | |
tpool.Proxy.__repr__ = repr_method | |
patch_tpool_proxy() | |
# For information about when MIN_LIBVIRT_VERSION and | |
# NEXT_MIN_LIBVIRT_VERSION can be changed, consult | |
# | |
# https://wiki.openstack.org/wiki/LibvirtDistroSupportMatrix | |
# | |
# Currently this is effectively the min version for i686/x86_64 | |
# + KVM/QEMU, as other architectures/hypervisors require newer | |
# versions. Over time, this will become a common min version | |
# for all architectures/hypervisors, as this value rises to | |
# meet them. | |
MIN_LIBVIRT_VERSION = (1, 2, 9) | |
MIN_QEMU_VERSION = (2, 1, 0) | |
# TODO(berrange): Re-evaluate this at start of each release cycle | |
# to decide if we want to plan a future min version bump. | |
# MIN_LIBVIRT_VERSION can be updated to match this after | |
# NEXT_MIN_LIBVIRT_VERSION has been at a higher value for | |
# one cycle | |
NEXT_MIN_LIBVIRT_VERSION = (1, 3, 1) | |
NEXT_MIN_QEMU_VERSION = (2, 5, 0) | |
# When the above version matches/exceeds this version | |
# delete it & corresponding code using it | |
# Libvirt version 1.2.17 is required for successful block live migration | |
# of vm booted from image with attached devices | |
MIN_LIBVIRT_BLOCK_LM_WITH_VOLUMES_VERSION = (1, 2, 17) | |
# PowerPC based hosts that support NUMA using libvirt | |
MIN_LIBVIRT_NUMA_VERSION_PPC = (1, 2, 19) | |
# Versions of libvirt with known NUMA topology issues | |
# See bug #1449028 | |
BAD_LIBVIRT_NUMA_VERSIONS = [(1, 2, 9, 2)] | |
# Versions of libvirt with broken cpu pinning support. This excludes | |
# versions of libvirt with broken NUMA support since pinning needs | |
# NUMA | |
# See bug #1438226 | |
BAD_LIBVIRT_CPU_POLICY_VERSIONS = [(1, 2, 10)] | |
# Virtuozzo driver support | |
MIN_VIRTUOZZO_VERSION = (7, 0, 0) | |
MIN_LIBVIRT_VIRTUOZZO_VERSION = (1, 2, 12) | |
# Ability to set the user guest password with Qemu | |
MIN_LIBVIRT_SET_ADMIN_PASSWD = (1, 2, 16) | |
# Ability to set the user guest password with parallels | |
MIN_LIBVIRT_PARALLELS_SET_ADMIN_PASSWD = (2, 0, 0) | |
# s/390 & s/390x architectures with KVM | |
MIN_LIBVIRT_KVM_S390_VERSION = (1, 2, 13) | |
MIN_QEMU_S390_VERSION = (2, 3, 0) | |
# libvirt < 1.3 reported virt_functions capability | |
# only when VFs are enabled. | |
# libvirt 1.3 fix f391889f4e942e22b9ef8ecca492de05106ce41e | |
MIN_LIBVIRT_PF_WITH_NO_VFS_CAP_VERSION = (1, 3, 0) | |
# Use the "logd" backend for handling stdout/stderr from QEMU processes. | |
MIN_LIBVIRT_VIRTLOGD = (1, 3, 3) | |
MIN_QEMU_VIRTLOGD = (2, 7, 0) | |
# ppc64/ppc64le architectures with KVM | |
# NOTE(rfolco): Same levels for Libvirt/Qemu on Big Endian and Little | |
# Endian giving the nuance around guest vs host architectures | |
MIN_LIBVIRT_KVM_PPC64_VERSION = (1, 2, 12) | |
# Names of the types that do not get compressed during migration | |
NO_COMPRESSION_TYPES = ('qcow2',) | |
# number of serial console limit | |
QEMU_MAX_SERIAL_PORTS = 4 | |
# Qemu supports 4 serial consoles, we remove 1 because of the PTY one defined | |
ALLOWED_QEMU_SERIAL_PORTS = QEMU_MAX_SERIAL_PORTS - 1 | |
# realtime support | |
MIN_LIBVIRT_REALTIME_VERSION = (1, 2, 13) | |
# libvirt postcopy support | |
MIN_LIBVIRT_POSTCOPY_VERSION = (1, 3, 3) | |
# qemu postcopy support | |
MIN_QEMU_POSTCOPY_VERSION = (2, 5, 0) | |
MIN_LIBVIRT_OTHER_ARCH = { | |
fields.Architecture.S390: MIN_LIBVIRT_KVM_S390_VERSION, | |
fields.Architecture.S390X: MIN_LIBVIRT_KVM_S390_VERSION, | |
fields.Architecture.PPC: MIN_LIBVIRT_KVM_PPC64_VERSION, | |
fields.Architecture.PPC64: MIN_LIBVIRT_KVM_PPC64_VERSION, | |
fields.Architecture.PPC64LE: MIN_LIBVIRT_KVM_PPC64_VERSION, | |
} | |
MIN_QEMU_OTHER_ARCH = { | |
fields.Architecture.S390: MIN_QEMU_S390_VERSION, | |
fields.Architecture.S390X: MIN_QEMU_S390_VERSION, | |
} | |
# perf events support | |
MIN_LIBVIRT_PERF_VERSION = (2, 0, 0) | |
LIBVIRT_PERF_EVENT_PREFIX = 'VIR_PERF_PARAM_' | |
PERF_EVENTS_CPU_FLAG_MAPPING = {'cmt': 'cmt', | |
'mbml': 'mbm_local', | |
'mbmt': 'mbm_total', | |
} | |
# Mediated devices support | |
MIN_LIBVIRT_MDEV_SUPPORT = (3, 4, 0) | |
# libvirt>=3.10 is required for volume multiattach if qemu<2.10. | |
# See https://bugzilla.redhat.com/show_bug.cgi?id=1378242 | |
# for details. | |
MIN_LIBVIRT_MULTIATTACH = (3, 10, 0) | |
MIN_LIBVIRT_LUKS_VERSION = (2, 2, 0) | |
MIN_QEMU_LUKS_VERSION = (2, 6, 0) | |
VGPU_RESOURCE_SEMAPHORE = "vgpu_resources" | |
class LibvirtDriver(driver.ComputeDriver): | |
capabilities = { | |
"has_imagecache": True, | |
"supports_recreate": True, | |
"supports_migrate_to_same_host": False, | |
"supports_attach_interface": True, | |
"supports_device_tagging": True, | |
"supports_tagged_attach_interface": True, | |
"supports_tagged_attach_volume": True, | |
"supports_extend_volume": True, | |
# Multiattach support is conditional on qemu and libvirt versions | |
# determined in init_host. | |
"supports_multiattach": False | |
} | |
def __init__(self, virtapi, read_only=False): | |
super(LibvirtDriver, self).__init__(virtapi) | |
global libvirt | |
if libvirt is None: | |
libvirt = importutils.import_module('libvirt') | |
libvirt_migrate.libvirt = libvirt | |
self._host = host.Host(self._uri(), read_only, | |
lifecycle_event_handler=self.emit_event, | |
conn_event_handler=self._handle_conn_event) | |
self._initiator = None | |
self._fc_wwnns = None | |
self._fc_wwpns = None | |
self._caps = None | |
self._supported_perf_events = [] | |
self.firewall_driver = firewall.load_driver( | |
DEFAULT_FIREWALL_DRIVER, | |
host=self._host) | |
self.vif_driver = libvirt_vif.LibvirtGenericVIFDriver() | |
# TODO(mriedem): Long-term we should load up the volume drivers on | |
# demand as needed rather than doing this on startup, as there might | |
# be unsupported volume drivers in this list based on the underlying | |
# platform. | |
self.volume_drivers = self._get_volume_drivers() | |
self._disk_cachemode = None | |
self.image_cache_manager = imagecache.ImageCacheManager() | |
self.image_backend = imagebackend.Backend(CONF.use_cow_images) | |
self.disk_cachemodes = {} | |
self.valid_cachemodes = ["default", | |
"none", | |
"writethrough", | |
"writeback", | |
"directsync", | |
"unsafe", | |
] | |
self._conn_supports_start_paused = CONF.libvirt.virt_type in ('kvm', | |
'qemu') | |
for mode_str in CONF.libvirt.disk_cachemodes: | |
disk_type, sep, cache_mode = mode_str.partition('=') | |
if cache_mode not in self.valid_cachemodes: | |
LOG.warning('Invalid cachemode %(cache_mode)s specified ' | |
'for disk type %(disk_type)s.', | |
{'cache_mode': cache_mode, 'disk_type': disk_type}) | |
continue | |
self.disk_cachemodes[disk_type] = cache_mode | |
self._volume_api = cinder.API() | |
self._image_api = image.API() | |
sysinfo_serial_funcs = { | |
'none': lambda: None, | |
'hardware': self._get_host_sysinfo_serial_hardware, | |
'os': self._get_host_sysinfo_serial_os, | |
'auto': self._get_host_sysinfo_serial_auto, | |
} | |
self._sysinfo_serial_func = sysinfo_serial_funcs.get( | |
CONF.libvirt.sysinfo_serial) | |
self.job_tracker = instancejobtracker.InstanceJobTracker() | |
self._remotefs = remotefs.RemoteFilesystem() | |
self._live_migration_flags = self._block_migration_flags = 0 | |
self.active_migrations = {} | |
# Compute reserved hugepages from conf file at the very | |
# beginning to ensure any syntax error will be reported and | |
# avoid any re-calculation when computing resources. | |
self._reserved_hugepages = hardware.numa_get_reserved_huge_pages() | |
def _get_volume_drivers(self): | |
driver_registry = dict() | |
for driver_str in libvirt_volume_drivers: | |
driver_type, _sep, driver = driver_str.partition('=') | |
driver_class = importutils.import_class(driver) | |
try: | |
driver_registry[driver_type] = driver_class(self._host) | |
except brick_exception.InvalidConnectorProtocol: | |
LOG.debug('Unable to load volume driver %s. It is not ' | |
'supported on this host.', driver) | |
return driver_registry | |
@property | |
def disk_cachemode(self): | |
if self._disk_cachemode is None: | |
# We prefer 'none' for consistent performance, host crash | |
# safety & migration correctness by avoiding host page cache. | |
# Some filesystems don't support O_DIRECT though. For those we | |
# fallback to 'writethrough' which gives host crash safety, and | |
# is safe for migration provided the filesystem is cache coherent | |
# (cluster filesystems typically are, but things like NFS are not). | |
self._disk_cachemode = "none" | |
if not utils.supports_direct_io(CONF.instances_path): | |
self._disk_cachemode = "writethrough" | |
return self._disk_cachemode | |
def _set_cache_mode(self, conf): | |
"""Set cache mode on LibvirtConfigGuestDisk object.""" | |
try: | |
source_type = conf.source_type | |
driver_cache = conf.driver_cache | |
except AttributeError: | |
return | |
# Shareable disks like for a multi-attach volume need to have the | |
# driver cache disabled. | |
if getattr(conf, 'shareable', False): | |
conf.driver_cache = 'none' | |
else: | |
cache_mode = self.disk_cachemodes.get(source_type, | |
driver_cache) | |
conf.driver_cache = cache_mode | |
def _do_quality_warnings(self): | |
"""Warn about untested driver configurations. | |
This will log a warning message about untested driver or host arch | |
configurations to indicate to administrators that the quality is | |
unknown. Currently, only qemu or kvm on intel 32- or 64-bit systems | |
is tested upstream. | |
""" | |
caps = self._host.get_capabilities() | |
hostarch = caps.host.cpu.arch | |
if (CONF.libvirt.virt_type not in ('qemu', 'kvm') or | |
hostarch not in (fields.Architecture.I686, | |
fields.Architecture.X86_64)): | |
LOG.warning('The libvirt driver is not tested on ' | |
'%(type)s/%(arch)s by the OpenStack project and ' | |
'thus its quality can not be ensured. For more ' | |
'information, see: https://docs.openstack.org/' | |
'nova/latest/user/support-matrix.html', | |
{'type': CONF.libvirt.virt_type, 'arch': hostarch}) | |
def _handle_conn_event(self, enabled, reason): | |
LOG.info("Connection event '%(enabled)d' reason '%(reason)s'", | |
{'enabled': enabled, 'reason': reason}) | |
self._set_host_enabled(enabled, reason) | |
def _version_to_string(self, version): | |
return '.'.join([str(x) for x in version]) | |
def init_host(self, host): | |
self._host.initialize() | |
self._do_quality_warnings() | |
self._parse_migration_flags() | |
self._supported_perf_events = self._get_supported_perf_events() | |
self._set_multiattach_support() | |
if (CONF.libvirt.virt_type == 'lxc' and | |
not (CONF.libvirt.uid_maps and CONF.libvirt.gid_maps)): | |
LOG.warning("Running libvirt-lxc without user namespaces is " | |
"dangerous. Containers spawned by Nova will be run " | |
"as the host's root user. It is highly suggested " | |
"that user namespaces be used in a public or " | |
"multi-tenant environment.") | |
# Stop libguestfs using KVM unless we're also configured | |
# to use this. This solves problem where people need to | |
# stop Nova use of KVM because nested-virt is broken | |
if CONF.libvirt.virt_type != "kvm": | |
guestfs.force_tcg() | |
if not self._host.has_min_version(MIN_LIBVIRT_VERSION): | |
raise exception.InternalError( | |
_('Nova requires libvirt version %s or greater.') % | |
self._version_to_string(MIN_LIBVIRT_VERSION)) | |
if CONF.libvirt.virt_type in ("qemu", "kvm"): | |
if self._host.has_min_version(hv_ver=MIN_QEMU_VERSION): | |
# "qemu-img info" calls are version dependent, so we need to | |
# store the version in the images module. | |
images.QEMU_VERSION = self._host.get_connection().getVersion() | |
else: | |
raise exception.InternalError( | |
_('Nova requires QEMU version %s or greater.') % | |
self._version_to_string(MIN_QEMU_VERSION)) | |
if CONF.libvirt.virt_type == 'parallels': | |
if not self._host.has_min_version(hv_ver=MIN_VIRTUOZZO_VERSION): | |
raise exception.InternalError( | |
_('Nova requires Virtuozzo version %s or greater.') % | |
self._version_to_string(MIN_VIRTUOZZO_VERSION)) | |
if not self._host.has_min_version(MIN_LIBVIRT_VIRTUOZZO_VERSION): | |
raise exception.InternalError( | |
_('Running Nova with parallels virt_type requires ' | |
'libvirt version %s') % | |
self._version_to_string(MIN_LIBVIRT_VIRTUOZZO_VERSION)) | |
# Give the cloud admin a heads up if we are intending to | |
# change the MIN_LIBVIRT_VERSION in the next release. | |
if not self._host.has_min_version(NEXT_MIN_LIBVIRT_VERSION): | |
LOG.warning('Running Nova with a libvirt version less than ' | |
'%(version)s is deprecated. The required minimum ' | |
'version of libvirt will be raised to %(version)s ' | |
'in the next release.', | |
{'version': self._version_to_string( | |
NEXT_MIN_LIBVIRT_VERSION)}) | |
if (CONF.libvirt.virt_type in ("qemu", "kvm") and | |
not self._host.has_min_version(hv_ver=NEXT_MIN_QEMU_VERSION)): | |
LOG.warning('Running Nova with a QEMU version less than ' | |
'%(version)s is deprecated. The required minimum ' | |
'version of QEMU will be raised to %(version)s ' | |
'in the next release.', | |
{'version': self._version_to_string( | |
NEXT_MIN_QEMU_VERSION)}) | |
kvm_arch = fields.Architecture.from_host() | |
if (CONF.libvirt.virt_type in ('kvm', 'qemu') and | |
kvm_arch in MIN_LIBVIRT_OTHER_ARCH and | |
not self._host.has_min_version( | |
MIN_LIBVIRT_OTHER_ARCH.get(kvm_arch), | |
MIN_QEMU_OTHER_ARCH.get(kvm_arch))): | |
if MIN_QEMU_OTHER_ARCH.get(kvm_arch): | |
raise exception.InternalError( | |
_('Running Nova with qemu/kvm virt_type on %(arch)s ' | |
'requires libvirt version %(libvirt_ver)s and ' | |
'qemu version %(qemu_ver)s, or greater') % | |
{'arch': kvm_arch, | |
'libvirt_ver': self._version_to_string( | |
MIN_LIBVIRT_OTHER_ARCH.get(kvm_arch)), | |
'qemu_ver': self._version_to_string( | |
MIN_QEMU_OTHER_ARCH.get(kvm_arch))}) | |
# no qemu version in the error message | |
raise exception.InternalError( | |
_('Running Nova with qemu/kvm virt_type on %(arch)s ' | |
'requires libvirt version %(libvirt_ver)s or greater') % | |
{'arch': kvm_arch, | |
'libvirt_ver': self._version_to_string( | |
MIN_LIBVIRT_OTHER_ARCH.get(kvm_arch))}) | |
# TODO(sbauza): Remove this code once mediated devices are persisted | |
# across reboots. | |
if self._host.has_min_version(MIN_LIBVIRT_MDEV_SUPPORT): | |
self._recreate_assigned_mediated_devices() | |
@staticmethod | |
def _is_existing_mdev(uuid): | |
# FIXME(sbauza): Some kernel can have a uevent race meaning that the | |
# libvirt daemon won't know when a mediated device is created unless | |
# you restart that daemon. Until all kernels we support are not having | |
# that possible race, check the sysfs directly instead of asking the | |
# libvirt API. | |
# See https://bugzilla.redhat.com/show_bug.cgi?id=1376907 for ref. | |
return os.path.exists('/sys/bus/mdev/devices/{0}'.format(uuid)) | |
def _recreate_assigned_mediated_devices(self): | |
"""Recreate assigned mdevs that could have disappeared if we reboot | |
the host. | |
""" | |
mdevs = self._get_all_assigned_mediated_devices() | |
requested_types = self._get_supported_vgpu_types() | |
for (mdev_uuid, instance_uuid) in six.iteritems(mdevs): | |
if not self._is_existing_mdev(mdev_uuid): | |
self._create_new_mediated_device(requested_types, mdev_uuid) | |
def _set_multiattach_support(self): | |
# Check to see if multiattach is supported. Based on bugzilla | |
# https://bugzilla.redhat.com/show_bug.cgi?id=1378242 and related | |
# clones, the shareable flag on a disk device will only work with | |
# qemu<2.10 or libvirt>=3.10. So check those versions here and set | |
# the capability appropriately. | |
if (self._host.has_min_version(lv_ver=MIN_LIBVIRT_MULTIATTACH) or | |
not self._host.has_min_version(hv_ver=(2, 10, 0))): | |
self.capabilities['supports_multiattach'] = True | |
else: | |
LOG.debug('Volume multiattach is not supported based on current ' | |
'versions of QEMU and libvirt. QEMU must be less than ' | |
'2.10 or libvirt must be greater than or equal to 3.10.') | |
def _prepare_migration_flags(self): | |
migration_flags = 0 | |
migration_flags |= libvirt.VIR_MIGRATE_LIVE | |
# Adding p2p flag only if xen is not in use, because xen does not | |
# support p2p migrations | |
if CONF.libvirt.virt_type != 'xen': | |
migration_flags |= libvirt.VIR_MIGRATE_PEER2PEER | |
# Adding VIR_MIGRATE_UNDEFINE_SOURCE because, without it, migrated | |
# instance will remain defined on the source host | |
migration_flags |= libvirt.VIR_MIGRATE_UNDEFINE_SOURCE | |
# Adding VIR_MIGRATE_PERSIST_DEST to persist the VM on the | |
# destination host | |
migration_flags |= libvirt.VIR_MIGRATE_PERSIST_DEST | |
live_migration_flags = block_migration_flags = migration_flags | |
# Adding VIR_MIGRATE_NON_SHARED_INC, otherwise all block-migrations | |
# will be live-migrations instead | |
block_migration_flags |= libvirt.VIR_MIGRATE_NON_SHARED_INC | |
return (live_migration_flags, block_migration_flags) | |
def _handle_live_migration_tunnelled(self, migration_flags): | |
if (CONF.libvirt.live_migration_tunnelled is None or | |
CONF.libvirt.live_migration_tunnelled): | |
migration_flags |= libvirt.VIR_MIGRATE_TUNNELLED | |
return migration_flags | |
def _is_post_copy_available(self): | |
if self._host.has_min_version(lv_ver=MIN_LIBVIRT_POSTCOPY_VERSION, | |
hv_ver=MIN_QEMU_POSTCOPY_VERSION): | |
return True | |
return False | |
def _is_virtlogd_available(self): | |
return self._host.has_min_version(MIN_LIBVIRT_VIRTLOGD, | |
MIN_QEMU_VIRTLOGD) | |
def _is_native_luks_available(self): | |
return self._host.has_min_version(MIN_LIBVIRT_LUKS_VERSION, | |
MIN_QEMU_LUKS_VERSION) | |
def _handle_live_migration_post_copy(self, migration_flags): | |
if CONF.libvirt.live_migration_permit_post_copy: | |
if self._is_post_copy_available(): | |
migration_flags |= libvirt.VIR_MIGRATE_POSTCOPY | |
else: | |
LOG.info('The live_migration_permit_post_copy is set ' | |
'to True, but it is not supported.') | |
return migration_flags | |
def _handle_live_migration_auto_converge(self, migration_flags): | |
if (self._is_post_copy_available() and | |
(migration_flags & libvirt.VIR_MIGRATE_POSTCOPY) != 0): | |
LOG.info('The live_migration_permit_post_copy is set to ' | |
'True and post copy live migration is available ' | |
'so auto-converge will not be in use.') | |
elif CONF.libvirt.live_migration_permit_auto_converge: | |
migration_flags |= libvirt.VIR_MIGRATE_AUTO_CONVERGE | |
return migration_flags | |
def _parse_migration_flags(self): | |
(live_migration_flags, | |
block_migration_flags) = self._prepare_migration_flags() | |
live_migration_flags = self._handle_live_migration_tunnelled( | |
live_migration_flags) | |
block_migration_flags = self._handle_live_migration_tunnelled( | |
block_migration_flags) | |
live_migration_flags = self._handle_live_migration_post_copy( | |
live_migration_flags) | |
block_migration_flags = self._handle_live_migration_post_copy( | |
block_migration_flags) | |
live_migration_flags = self._handle_live_migration_auto_converge( | |
live_migration_flags) | |
block_migration_flags = self._handle_live_migration_auto_converge( | |
block_migration_flags) | |
self._live_migration_flags = live_migration_flags | |
self._block_migration_flags = block_migration_flags | |
# TODO(sahid): This method is targeted for removal when the tests | |
# have been updated to avoid its use | |
# | |
# All libvirt API calls on the libvirt.Connect object should be | |
# encapsulated by methods on the nova.virt.libvirt.host.Host | |
# object, rather than directly invoking the libvirt APIs. The goal | |
# is to avoid a direct dependency on the libvirt API from the | |
# driver.py file. | |
def _get_connection(self): | |
return self._host.get_connection() | |
_conn = property(_get_connection) | |
@staticmethod | |
def _uri(): | |
if CONF.libvirt.virt_type == 'uml': | |
uri = CONF.libvirt.connection_uri or 'uml:///system' | |
elif CONF.libvirt.virt_type == 'xen': | |
uri = CONF.libvirt.connection_uri or 'xen:///' | |
elif CONF.libvirt.virt_type == 'lxc': | |
uri = CONF.libvirt.connection_uri or 'lxc:///' | |
elif CONF.libvirt.virt_type == 'parallels': | |
uri = CONF.libvirt.connection_uri or 'parallels:///system' | |
else: | |
uri = CONF.libvirt.connection_uri or 'qemu:///system' | |
return uri | |
@staticmethod | |
def _live_migration_uri(dest): | |
uris = { | |
'kvm': 'qemu+%s://%s/system', | |
'qemu': 'qemu+%s://%s/system', | |
'xen': 'xenmigr://%s/system', | |
'parallels': 'parallels+tcp://%s/system', | |
} | |
virt_type = CONF.libvirt.virt_type | |
# TODO(pkoniszewski): Remove fetching live_migration_uri in Pike | |
uri = CONF.libvirt.live_migration_uri | |
if uri: | |
return uri % dest | |
uri = uris.get(virt_type) | |
if uri is None: | |
raise exception.LiveMigrationURINotAvailable(virt_type=virt_type) | |
str_format = (dest,) | |
if virt_type in ('kvm', 'qemu'): | |
scheme = CONF.libvirt.live_migration_scheme or 'tcp' | |
str_format = (scheme, dest) | |
return uris.get(virt_type) % str_format | |
@staticmethod | |
def _migrate_uri(dest): | |
uri = None | |
# Only QEMU live migrations supports migrate-uri parameter | |
virt_type = CONF.libvirt.virt_type | |
if virt_type in ('qemu', 'kvm'): | |
# QEMU accept two schemes: tcp and rdma. By default | |
# libvirt build the URI using the remote hostname and the | |
# tcp schema. | |
uri = 'tcp://%s' % dest | |
# Because dest might be of type unicode, here we might return value of | |
# type unicode as well which is not acceptable by libvirt python | |
# binding when Python 2.7 is in use, so let's convert it explicitly | |
# back to string. When Python 3.x is in use, libvirt python binding | |
# accepts unicode type so it is completely fine to do a no-op str(uri) | |
# conversion which will return value of type unicode. | |
return uri and str(uri) | |
def instance_exists(self, instance): | |
"""Efficient override of base instance_exists method.""" | |
try: | |
self._host.get_guest(instance) | |
return True | |
except (exception.InternalError, exception.InstanceNotFound): | |
return False | |
def estimate_instance_overhead(self, instance_info): | |
overhead = super(LibvirtDriver, self).estimate_instance_overhead( | |
instance_info) | |
if isinstance(instance_info, objects.Flavor): | |
# A flavor object is passed during case of migrate | |
# TODO(sahid): We do not have any way to retrieve the | |
# image meta related to the instance so if the cpu_policy | |
# has been set in image_meta we will get an | |
# exception. Until we fix it we specifically set the | |
# cpu_policy in dedicated in an ImageMeta object so if the | |
# emulator threads has been requested nothing is going to | |
# fail. | |
image_meta = objects.ImageMeta.from_dict({"properties": { | |
"hw_cpu_policy": fields.CPUAllocationPolicy.DEDICATED, | |
}}) | |
if (hardware.get_emulator_threads_constraint( | |
instance_info, image_meta) | |
== fields.CPUEmulatorThreadsPolicy.ISOLATE): | |
overhead['vcpus'] += 1 | |
else: | |
# An instance object is passed during case of spawing or a | |
# dict is passed when computing resource for an instance | |
numa_topology = hardware.instance_topology_from_instance( | |
instance_info) | |
if numa_topology and numa_topology.emulator_threads_isolated: | |
overhead['vcpus'] += 1 | |
return overhead | |
def list_instances(self): | |
names = [] | |
for guest in self._host.list_guests(only_running=False): | |
names.append(guest.name) | |
return names | |
def list_instance_uuids(self): | |
uuids = [] | |
for guest in self._host.list_guests(only_running=False): | |
uuids.append(guest.uuid) | |
return uuids | |
def plug_vifs(self, instance, network_info): | |
"""Plug VIFs into networks.""" | |
for vif in network_info: | |
self.vif_driver.plug(instance, vif) | |
def _unplug_vifs(self, instance, network_info, ignore_errors): | |
"""Unplug VIFs from networks.""" | |
for vif in network_info: | |
try: | |
self.vif_driver.unplug(instance, vif) | |
except exception.NovaException: | |
if not ignore_errors: | |
raise | |
def unplug_vifs(self, instance, network_info): | |
self._unplug_vifs(instance, network_info, False) | |
def _teardown_container(self, instance): | |
inst_path = libvirt_utils.get_instance_path(instance) | |
container_dir = os.path.join(inst_path, 'rootfs') | |
rootfs_dev = instance.system_metadata.get('rootfs_device_name') | |
LOG.debug('Attempting to teardown container at path %(dir)s with ' | |
'root device: %(rootfs_dev)s', | |
{'dir': container_dir, 'rootfs_dev': rootfs_dev}, | |
instance=instance) | |
disk_api.teardown_container(container_dir, rootfs_dev) | |
def _destroy(self, instance, attempt=1): | |
try: | |
guest = self._host.get_guest(instance) | |
if CONF.serial_console.enabled: | |
# This method is called for several events: destroy, | |
# rebuild, hard-reboot, power-off - For all of these | |
# events we want to release the serial ports acquired | |
# for the guest before destroying it. | |
serials = self._get_serial_ports_from_guest(guest) | |
for hostname, port in serials: | |
serial_console.release_port(host=hostname, port=port) | |
except exception.InstanceNotFound: | |
guest = None | |
# If the instance is already terminated, we're still happy | |
# Otherwise, destroy it | |
old_domid = -1 | |
if guest is not None: | |
try: | |
old_domid = guest.id | |
guest.poweroff() | |
except libvirt.libvirtError as e: | |
is_okay = False | |
errcode = e.get_error_code() | |
if errcode == libvirt.VIR_ERR_NO_DOMAIN: | |
# Domain already gone. This can safely be ignored. | |
is_okay = True | |
elif errcode == libvirt.VIR_ERR_OPERATION_INVALID: | |
# If the instance is already shut off, we get this: | |
# Code=55 Error=Requested operation is not valid: | |
# domain is not running | |
state = guest.get_power_state(self._host) | |
if state == power_state.SHUTDOWN: | |
is_okay = True | |
elif errcode == libvirt.VIR_ERR_INTERNAL_ERROR: | |
errmsg = e.get_error_message() | |
if (CONF.libvirt.virt_type == 'lxc' and | |
errmsg == 'internal error: ' | |
'Some processes refused to die'): | |
# Some processes in the container didn't die | |
# fast enough for libvirt. The container will | |
# eventually die. For now, move on and let | |
# the wait_for_destroy logic take over. | |
is_okay = True | |
elif errcode == libvirt.VIR_ERR_OPERATION_TIMEOUT: | |
LOG.warning("Cannot destroy instance, operation time out", | |
instance=instance) | |
reason = _("operation time out") | |
raise exception.InstancePowerOffFailure(reason=reason) | |
elif errcode == libvirt.VIR_ERR_SYSTEM_ERROR: | |
if e.get_int1() == errno.EBUSY: | |
# NOTE(danpb): When libvirt kills a process it sends it | |
# SIGTERM first and waits 10 seconds. If it hasn't gone | |
# it sends SIGKILL and waits another 5 seconds. If it | |
# still hasn't gone then you get this EBUSY error. | |
# Usually when a QEMU process fails to go away upon | |
# SIGKILL it is because it is stuck in an | |
# uninterruptible kernel sleep waiting on I/O from | |
# some non-responsive server. | |
# Given the CPU load of the gate tests though, it is | |
# conceivable that the 15 second timeout is too short, | |
# particularly if the VM running tempest has a high | |
# steal time from the cloud host. ie 15 wallclock | |
# seconds may have passed, but the VM might have only | |
# have a few seconds of scheduled run time. | |
LOG.warning('Error from libvirt during destroy. ' | |
'Code=%(errcode)s Error=%(e)s; ' | |
'attempt %(attempt)d of 3', | |
{'errcode': errcode, 'e': e, | |
'attempt': attempt}, | |
instance=instance) | |
with excutils.save_and_reraise_exception() as ctxt: | |
# Try up to 3 times before giving up. | |
if attempt < 3: | |
ctxt.reraise = False | |
self._destroy(instance, attempt + 1) | |
return | |
if not is_okay: | |
with excutils.save_and_reraise_exception(): | |
LOG.error('Error from libvirt during destroy. ' | |
'Code=%(errcode)s Error=%(e)s', | |
{'errcode': errcode, 'e': e}, | |
instance=instance) | |
def _wait_for_destroy(expected_domid): | |
"""Called at an interval until the VM is gone.""" | |
# NOTE(vish): If the instance disappears during the destroy | |
# we ignore it so the cleanup can still be | |
# attempted because we would prefer destroy to | |
# never fail. | |
try: | |
dom_info = self.get_info(instance) | |
state = dom_info.state | |
new_domid = dom_info.internal_id | |
except exception.InstanceNotFound: | |
LOG.debug("During wait destroy, instance disappeared.", | |
instance=instance) | |
state = power_state.SHUTDOWN | |
if state == power_state.SHUTDOWN: | |
LOG.info("Instance destroyed successfully.", instance=instance) | |
raise loopingcall.LoopingCallDone() | |
# NOTE(wangpan): If the instance was booted again after destroy, | |
# this may be an endless loop, so check the id of | |
# domain here, if it changed and the instance is | |
# still running, we should destroy it again. | |
# see https://bugs.launchpad.net/nova/+bug/1111213 for more details | |
if new_domid != expected_domid: | |
LOG.info("Instance may be started again.", instance=instance) | |
kwargs['is_running'] = True | |
raise loopingcall.LoopingCallDone() | |
kwargs = {'is_running': False} | |
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_destroy, | |
old_domid) | |
timer.start(interval=0.5).wait() | |
if kwargs['is_running']: | |
LOG.info("Going to destroy instance again.", instance=instance) | |
self._destroy(instance) | |
else: | |
# NOTE(GuanQiang): teardown container to avoid resource leak | |
if CONF.libvirt.virt_type == 'lxc': | |
self._teardown_container(instance) | |
def destroy(self, context, instance, network_info, block_device_info=None, | |
destroy_disks=True): | |
self._destroy(instance) | |
self.cleanup(context, instance, network_info, block_device_info, | |
destroy_disks) | |
def _undefine_domain(self, instance): | |
try: | |
guest = self._host.get_guest(instance) | |
try: | |
support_uefi = self._has_uefi_support() | |
guest.delete_configuration(support_uefi) | |
except libvirt.libvirtError as e: | |
with excutils.save_and_reraise_exception() as ctxt: | |
errcode = e.get_error_code() | |
if errcode == libvirt.VIR_ERR_NO_DOMAIN: | |
LOG.debug("Called undefine, but domain already gone.", | |
instance=instance) | |
ctxt.reraise = False | |
else: | |
LOG.error('Error from libvirt during undefine. ' | |
'Code=%(errcode)s Error=%(e)s', | |
{'errcode': errcode, | |
'e': encodeutils.exception_to_unicode(e)}, | |
instance=instance) | |
except exception.InstanceNotFound: | |
pass | |
def cleanup(self, context, instance, network_info, block_device_info=None, | |
destroy_disks=True, migrate_data=None, destroy_vifs=True): | |
if destroy_vifs: | |
self._unplug_vifs(instance, network_info, True) | |
# Continue attempting to remove firewall filters for the instance | |
# until it's done or there is a failure to remove the filters. If | |
# unfilter fails because the instance is not yet shutdown, try to | |
# destroy the guest again and then retry the unfilter. | |
while True: | |
try: | |
self.unfilter_instance(instance, network_info) | |
break | |
except libvirt.libvirtError as e: | |
try: | |
state = self.get_info(instance).state | |
except exception.InstanceNotFound: | |
state = power_state.SHUTDOWN | |
if state != power_state.SHUTDOWN: | |
LOG.warning("Instance may be still running, destroy " | |
"it again.", instance=instance) | |
self._destroy(instance) | |
else: | |
errcode = e.get_error_code() | |
LOG.exception(_('Error from libvirt during unfilter. ' | |
'Code=%(errcode)s Error=%(e)s'), | |
{'errcode': errcode, 'e': e}, | |
instance=instance) | |
reason = _("Error unfiltering instance.") | |
raise exception.InstanceTerminationFailure(reason=reason) | |
except Exception: | |
raise | |
# FIXME(wangpan): if the instance is booted again here, such as the | |
# soft reboot operation boot it here, it will become | |
# "running deleted", should we check and destroy it | |
# at the end of this method? | |
# NOTE(vish): we disconnect from volumes regardless | |
block_device_mapping = driver.block_device_info_get_mapping( | |
block_device_info) | |
for vol in block_device_mapping: | |
connection_info = vol['connection_info'] | |
disk_dev = vol['mount_device'] | |
if disk_dev is not None: | |
disk_dev = disk_dev.rpartition("/")[2] | |
try: | |
self._disconnect_volume(context, connection_info, instance) | |
except Exception as exc: | |
with excutils.save_and_reraise_exception() as ctxt: | |
if destroy_disks: | |
# Don't block on Volume errors if we're trying to | |
# delete the instance as we may be partially created | |
# or deleted | |
ctxt.reraise = False | |
LOG.warning( | |
"Ignoring Volume Error on vol %(vol_id)s " | |
"during delete %(exc)s", | |
{'vol_id': vol.get('volume_id'), | |
'exc': encodeutils.exception_to_unicode(exc)}, | |
instance=instance) | |
if destroy_disks: | |
# NOTE(haomai): destroy volumes if needed | |
if CONF.libvirt.images_type == 'lvm': | |
self._cleanup_lvm(instance, block_device_info) | |
if CONF.libvirt.images_type == 'rbd': | |
self._cleanup_rbd(instance) | |
is_shared_block_storage = False | |
if migrate_data and 'is_shared_block_storage' in migrate_data: | |
is_shared_block_storage = migrate_data.is_shared_block_storage | |
if destroy_disks or is_shared_block_storage: | |
attempts = int(instance.system_metadata.get('clean_attempts', | |
'0')) | |
success = self.delete_instance_files(instance) | |
# NOTE(mriedem): This is used in the _run_pending_deletes periodic | |
# task in the compute manager. The tight coupling is not great... | |
instance.system_metadata['clean_attempts'] = str(attempts + 1) | |
if success: | |
instance.cleaned = True | |
instance.save() | |
self._undefine_domain(instance) | |
def _detach_encrypted_volumes(self, instance, block_device_info): | |
"""Detaches encrypted volumes attached to instance.""" | |
disks = self._get_instance_disk_info(instance, block_device_info) | |
encrypted_volumes = filter(dmcrypt.is_encrypted, | |
[disk['path'] for disk in disks]) | |
for path in encrypted_volumes: | |
dmcrypt.delete_volume(path) | |
def _get_serial_ports_from_guest(self, guest, mode=None): | |
"""Returns an iterator over serial port(s) configured on guest. | |
:param mode: Should be a value in (None, bind, connect) | |
""" | |
xml = guest.get_xml_desc() | |
tree = etree.fromstring(xml) | |
# The 'serial' device is the base for x86 platforms. Other platforms | |
# (e.g. kvm on system z = S390X) can only use 'console' devices. | |
xpath_mode = "[@mode='%s']" % mode if mode else "" | |
serial_tcp = "./devices/serial[@type='tcp']/source" + xpath_mode | |
console_tcp = "./devices/console[@type='tcp']/source" + xpath_mode | |
tcp_devices = tree.findall(serial_tcp) | |
if len(tcp_devices) == 0: | |
tcp_devices = tree.findall(console_tcp) | |
for source in tcp_devices: | |
yield (source.get("host"), int(source.get("service"))) | |
def _get_scsi_controller_max_unit(self, guest): | |
"""Returns the max disk unit used by scsi controller""" | |
xml = guest.get_xml_desc() | |
tree = etree.fromstring(xml) | |
addrs = "./devices/disk[@device='disk']/address[@type='drive']" | |
ret = [] | |
for obj in tree.findall(addrs): | |
ret.append(int(obj.get('unit', 0))) | |
return max(ret) | |
@staticmethod | |
def _get_rbd_driver(): | |
return rbd_utils.RBDDriver( | |
pool=CONF.libvirt.images_rbd_pool, | |
ceph_conf=CONF.libvirt.images_rbd_ceph_conf, | |
rbd_user=CONF.libvirt.rbd_user) | |
def _cleanup_rbd(self, instance): | |
# NOTE(nic): On revert_resize, the cleanup steps for the root | |
# volume are handled with an "rbd snap rollback" command, | |
# and none of this is needed (and is, in fact, harmful) so | |
# filter out non-ephemerals from the list | |
if instance.task_state == task_states.RESIZE_REVERTING: | |
filter_fn = lambda disk: (disk.startswith(instance.uuid) and | |
disk.endswith('disk.local')) | |
else: | |
filter_fn = lambda disk: disk.startswith(instance.uuid) | |
LibvirtDriver._get_rbd_driver().cleanup_volumes(filter_fn) | |
def _cleanup_lvm(self, instance, block_device_info): | |
"""Delete all LVM disks for given instance object.""" | |
if instance.get('ephemeral_key_uuid') is not None: | |
self._detach_encrypted_volumes(instance, block_device_info) | |
disks = self._lvm_disks(instance) | |
if disks: | |
lvm.remove_volumes(disks) | |
def _lvm_disks(self, instance): | |
"""Returns all LVM disks for given instance object.""" | |
if CONF.libvirt.images_volume_group: | |
vg = os.path.join('/dev', CONF.libvirt.images_volume_group) | |
if not os.path.exists(vg): | |
return [] | |
pattern = '%s_' % instance.uuid | |
def belongs_to_instance(disk): | |
return disk.startswith(pattern) | |
def fullpath(name): | |
return os.path.join(vg, name) | |
logical_volumes = lvm.list_volumes(vg) | |
disks = [fullpath(disk) for disk in logical_volumes | |
if belongs_to_instance(disk)] | |
return disks | |
return [] | |
def get_volume_connector(self, instance): | |
root_helper = utils.get_root_helper() | |
return connector.get_connector_properties( | |
root_helper, CONF.my_block_storage_ip, | |
CONF.libvirt.volume_use_multipath, | |
enforce_multipath=True, | |
host=CONF.host) | |
def _cleanup_resize(self, context, instance, network_info): | |
inst_base = libvirt_utils.get_instance_path(instance) | |
target = inst_base + '_resize' | |
if os.path.exists(target): | |
# Deletion can fail over NFS, so retry the deletion as required. | |
# Set maximum attempt as 5, most test can remove the directory | |
# for the second time. | |
utils.execute('rm', '-rf', target, delay_on_retry=True, | |
attempts=5) | |
root_disk = self.image_backend.by_name(instance, 'disk') | |
# TODO(nic): Set ignore_errors=False in a future release. | |
# It is set to True here to avoid any upgrade issues surrounding | |
# instances being in pending resize state when the software is updated; | |
# in that case there will be no snapshot to remove. Once it can be | |
# reasonably assumed that no such instances exist in the wild | |
# anymore, it should be set back to False (the default) so it will | |
# throw errors, like it should. | |
if root_disk.exists(): | |
root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME, | |
ignore_errors=True) | |
# NOTE(mjozefcz): | |
# self.image_backend.image for some backends recreates instance | |
# directory and image disk.info - remove it here if exists | |
# Do not remove inst_base for volume-backed instances since that | |
# could potentially remove the files on the destination host | |
# if using shared storage. | |
if (os.path.exists(inst_base) and not root_disk.exists() and | |
not compute_utils.is_volume_backed_instance( | |
context, instance)): | |
try: | |
shutil.rmtree(inst_base) | |
except OSError as e: | |
if e.errno != errno.ENOENT: | |
raise | |
if instance.host != CONF.host: | |
self._undefine_domain(instance) | |
self.unplug_vifs(instance, network_info) | |
self.unfilter_instance(instance, network_info) | |
def _get_volume_driver(self, connection_info): | |
driver_type = connection_info.get('driver_volume_type') | |
if driver_type not in self.volume_drivers: | |
raise exception.VolumeDriverNotFound(driver_type=driver_type) | |
return self.volume_drivers[driver_type] | |
def _connect_volume(self, context, connection_info, instance, | |
encryption=None, allow_native_luks=True): | |
vol_driver = self._get_volume_driver(connection_info) | |
vol_driver.connect_volume(connection_info, instance) | |
self._attach_encryptor(context, connection_info, encryption, | |
allow_native_luks) | |
def _disconnect_volume(self, context, connection_info, instance, | |
encryption=None): | |
self._detach_encryptor(context, connection_info, encryption=encryption) | |
vol_driver = self._get_volume_driver(connection_info) | |
vol_driver.disconnect_volume(connection_info, instance) | |
def _extend_volume(self, connection_info, instance): | |
vol_driver = self._get_volume_driver(connection_info) | |
return vol_driver.extend_volume(connection_info, instance) | |
def _use_native_luks(self, encryption=None): | |
"""Is LUKS the required provider and native QEMU LUKS available | |
""" | |
provider = None | |
if encryption: | |
provider = encryption.get('provider', None) | |
if provider in encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP: | |
provider = encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP[provider] | |
return provider == encryptors.LUKS and self._is_native_luks_available() | |
def _get_volume_config(self, connection_info, disk_info): | |
vol_driver = self._get_volume_driver(connection_info) | |
conf = vol_driver.get_config(connection_info, disk_info) | |
self._set_cache_mode(conf) | |
return conf | |
def _get_volume_encryptor(self, connection_info, encryption): | |
root_helper = utils.get_root_helper() | |
return encryptors.get_volume_encryptor(root_helper=root_helper, | |
keymgr=key_manager.API(CONF), | |
connection_info=connection_info, | |
**encryption) | |
def _get_volume_encryption(self, context, connection_info): | |
"""Get the encryption metadata dict if it is not provided | |
""" | |
encryption = {} | |
volume_id = driver_block_device.get_volume_id(connection_info) | |
if volume_id: | |
encryption = encryptors.get_encryption_metadata(context, | |
self._volume_api, volume_id, connection_info) | |
return encryption | |
def _attach_encryptor(self, context, connection_info, encryption, | |
allow_native_luks): | |
"""Attach the frontend encryptor if one is required by the volume. | |
The request context is only used when an encryption metadata dict is | |
not provided. The encryption metadata dict being populated is then used | |
to determine if an attempt to attach the encryptor should be made. | |
If native LUKS decryption is enabled then create a Libvirt volume | |
secret containing the LUKS passphrase for the volume. | |
""" | |
if encryption is None: | |
encryption = self._get_volume_encryption(context, connection_info) | |
if (encryption and allow_native_luks and | |
self._use_native_luks(encryption)): | |
# NOTE(lyarwood): Fetch the associated key for the volume and | |
# decode the passphrase from the key. | |
# FIXME(lyarwood): c-vol currently creates symmetric keys for use | |
# with volumes, leading to the binary to hex to string conversion | |
# below. | |
keymgr = key_manager.API(CONF) | |
key = keymgr.get(context, encryption['encryption_key_id']) | |
key_encoded = key.get_encoded() | |
passphrase = binascii.hexlify(key_encoded).decode('utf-8') | |
# NOTE(lyarwood): Retain the behaviour of the original os-brick | |
# encryptors and format any volume that does not identify as | |
# encrypted with LUKS. | |
# FIXME(lyarwood): Remove this once c-vol correctly formats | |
# encrypted volumes during their initial creation: | |
# https://bugs.launchpad.net/cinder/+bug/1739442 | |
device_path = connection_info.get('data').get('device_path') | |
if device_path: | |
root_helper = utils.get_root_helper() | |
if not luks_encryptor.is_luks(root_helper, device_path): | |
encryptor = self._get_volume_encryptor(connection_info, | |
encryption) | |
encryptor._format_volume(passphrase, **encryption) | |
# NOTE(lyarwood): Store the passphrase as a libvirt secret locally | |
# on the compute node. This secret is used later when generating | |
# the volume config. | |
volume_id = driver_block_device.get_volume_id(connection_info) | |
self._host.create_secret('volume', volume_id, password=passphrase) | |
elif encryption: | |
encryptor = self._get_volume_encryptor(connection_info, | |
encryption) | |
encryptor.attach_volume(context, **encryption) | |
def _detach_encryptor(self, context, connection_info, encryption): | |
"""Detach the frontend encryptor if one is required by the volume. | |
The request context is only used when an encryption metadata dict is | |
not provided. The encryption metadata dict being populated is then used | |
to determine if an attempt to detach the encryptor should be made. | |
If native LUKS decryption is enabled then delete previously created | |
Libvirt volume secret from the host. | |
""" | |
volume_id = driver_block_device.get_volume_id(connection_info) | |
if volume_id and self._host.find_secret('volume', volume_id): | |
return self._host.delete_secret('volume', volume_id) | |
if encryption is None: | |
encryption = self._get_volume_encryption(context, connection_info) | |
if encryption: | |
encryptor = self._get_volume_encryptor(connection_info, | |
encryption) | |
encryptor.detach_volume(**encryption) | |
def _check_discard_for_attach_volume(self, conf, instance): | |
"""Perform some checks for volumes configured for discard support. | |
If discard is configured for the volume, and the guest is using a | |
configuration known to not work, we will log a message explaining | |
the reason why. | |
""" | |
if conf.driver_discard == 'unmap' and conf.target_bus == 'virtio': | |
LOG.debug('Attempting to attach volume %(id)s with discard ' | |
'support enabled to an instance using an ' | |
'unsupported configuration. target_bus = ' | |
'%(bus)s. Trim commands will not be issued to ' | |
'the storage device.', | |
{'bus': conf.target_bus, | |
'id': conf.serial}, | |
instance=instance) | |
def attach_volume(self, context, connection_info, instance, mountpoint, | |
disk_bus=None, device_type=None, encryption=None): | |
guest = self._host.get_guest(instance) | |
disk_dev = mountpoint.rpartition("/")[2] | |
bdm = { | |
'device_name': disk_dev, | |
'disk_bus': disk_bus, | |
'device_type': device_type} | |
# Note(cfb): If the volume has a custom block size, check that | |
# that we are using QEMU/KVM and libvirt >= 0.10.2. The | |
# presence of a block size is considered mandatory by | |
# cinder so we fail if we can't honor the request. | |
data = {} | |
if ('data' in connection_info): | |
data = connection_info['data'] | |
if ('logical_block_size' in data or 'physical_block_size' in data): | |
if ((CONF.libvirt.virt_type != "kvm" and | |
CONF.libvirt.virt_type != "qemu")): | |
msg = _("Volume sets block size, but the current " | |
"libvirt hypervisor '%s' does not support custom " | |
"block size") % CONF.libvirt.virt_type | |
raise exception.InvalidHypervisorType(msg) | |
self._connect_volume(context, connection_info, instance, | |
encryption=encryption) | |
disk_info = blockinfo.get_info_from_bdm( | |
instance, CONF.libvirt.virt_type, instance.image_meta, bdm) | |
if disk_info['bus'] == 'scsi': | |
disk_info['unit'] = self._get_scsi_controller_max_unit(guest) + 1 | |
conf = self._get_volume_config(connection_info, disk_info) | |
self._check_discard_for_attach_volume(conf, instance) | |
try: | |
state = guest.get_power_state(self._host) | |
live = state in (power_state.RUNNING, power_state.PAUSED) | |
guest.attach_device(conf, persistent=True, live=live) | |
# NOTE(artom) If we're attaching with a device role tag, we need to | |
# rebuild device_metadata. If we're attaching without a role | |
# tag, we're rebuilding it here needlessly anyways. This isn't a | |
# massive deal, and it helps reduce code complexity by not having | |
# to indicate to the virt driver that the attach is tagged. The | |
# really important optimization of not calling the database unless | |
# device_metadata has actually changed is done for us by | |
# instance.save(). | |
instance.device_metadata = self._build_device_metadata( | |
context, instance) | |
instance.save() | |
except Exception: | |
LOG.exception(_('Failed to attach volume at mountpoint: %s'), | |
mountpoint, instance=instance) | |
with excutils.save_and_reraise_exception(): | |
self._disconnect_volume(context, connection_info, instance, | |
encryption=encryption) | |
def _swap_volume(self, guest, disk_path, conf, resize_to): | |
"""Swap existing disk with a new block device.""" | |
dev = guest.get_block_device(disk_path) | |
# Save a copy of the domain's persistent XML file. We'll use this | |
# to redefine the domain if anything fails during the volume swap. | |
xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True) | |
# Abort is an idempotent operation, so make sure any block | |
# jobs which may have failed are ended. | |
try: | |
dev.abort_job() | |
except Exception: | |
pass | |
try: | |
# NOTE (rmk): blockRebase cannot be executed on persistent | |
# domains, so we need to temporarily undefine it. | |
# If any part of this block fails, the domain is | |
# re-defined regardless. | |
if guest.has_persistent_configuration(): | |
support_uefi = self._has_uefi_support() | |
guest.delete_configuration(support_uefi) | |
try: | |
# Start copy with VIR_DOMAIN_BLOCK_REBASE_REUSE_EXT flag to | |
# allow writing to existing external volume file. Use | |
# VIR_DOMAIN_BLOCK_REBASE_COPY_DEV if it's a block device to | |
# make sure XML is generated correctly (bug 1691195) | |
copy_dev = conf.source_type == 'block' | |
dev.rebase(conf.source_path, copy=True, reuse_ext=True, | |
copy_dev=copy_dev) | |
while not dev.is_job_complete(): | |
time.sleep(0.5) | |
dev.abort_job(pivot=True) | |
except Exception as exc: | |
LOG.exception("Failure rebasing volume %(new_path)s on " | |
"%(old_path)s.", {'new_path': conf.source_path, | |
'old_path': disk_path}) | |
raise exception.VolumeRebaseFailed(reason=six.text_type(exc)) | |
if resize_to: | |
dev.resize(resize_to * units.Gi / units.Ki) | |
# Make sure we will redefine the domain using the updated | |
# configuration after the volume was swapped. The dump_inactive | |
# keyword arg controls whether we pull the inactive (persistent) | |
# or active (live) config from the domain. We want to pull the | |
# live config after the volume was updated to use when we redefine | |
# the domain. | |
xml = guest.get_xml_desc(dump_inactive=False, dump_sensitive=True) | |
finally: | |
self._host.write_instance_config(xml) | |
def swap_volume(self, context, old_connection_info, | |
new_connection_info, instance, mountpoint, resize_to): | |
# NOTE(lyarwood): https://bugzilla.redhat.com/show_bug.cgi?id=760547 | |
encryption = self._get_volume_encryption(context, old_connection_info) | |
if encryption and self._use_native_luks(encryption): | |
raise NotImplementedError(_("Swap volume is not supported for" | |
"encrypted volumes when native LUKS decryption is enabled.")) | |
guest = self._host.get_guest(instance) | |
disk_dev = mountpoint.rpartition("/")[2] | |
if not guest.get_disk(disk_dev): | |
raise exception.DiskNotFound(location=disk_dev) | |
disk_info = { | |
'dev': disk_dev, | |
'bus': blockinfo.get_disk_bus_for_disk_dev( | |
CONF.libvirt.virt_type, disk_dev), | |
'type': 'disk', | |
} | |
# NOTE (lyarwood): new_connection_info will be modified by the | |
# following _connect_volume call down into the volume drivers. The | |
# majority of the volume drivers will add a device_path that is in turn | |
# used by _get_volume_config to set the source_path of the | |
# LibvirtConfigGuestDisk object it returns. We do not explicitly save | |
# this to the BDM here as the upper compute swap_volume method will | |
# eventually do this for us. | |
self._connect_volume(context, new_connection_info, instance) | |
conf = self._get_volume_config(new_connection_info, disk_info) | |
if not conf.source_path: | |
self._disconnect_volume(context, new_connection_info, instance) | |
raise NotImplementedError(_("Swap only supports host devices")) | |
try: | |
self._swap_volume(guest, disk_dev, conf, resize_to) | |
except exception.VolumeRebaseFailed: | |
with excutils.save_and_reraise_exception(): | |
self._disconnect_volume(context, new_connection_info, instance) | |
self._disconnect_volume(context, old_connection_info, instance) | |
def _get_existing_domain_xml(self, instance, network_info, | |
block_device_info=None): | |
try: | |
guest = self._host.get_guest(instance) | |
xml = guest.get_xml_desc() | |
except exception.InstanceNotFound: | |
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type, | |
instance, | |
instance.image_meta, | |
block_device_info) | |
xml = self._get_guest_xml(nova_context.get_admin_context(), | |
instance, network_info, disk_info, | |
instance.image_meta, | |
block_device_info=block_device_info) | |
return xml | |
def detach_volume(self, connection_info, instance, mountpoint, | |
encryption=None): | |
disk_dev = mountpoint.rpartition("/")[2] | |
try: | |
guest = self._host.get_guest(instance) | |
state = guest.get_power_state(self._host) | |
live = state in (power_state.RUNNING, power_state.PAUSED) | |
# NOTE(lyarwood): The volume must be detached from the VM before | |
# detaching any attached encryptors or disconnecting the underlying | |
# volume in _disconnect_volume. Otherwise, the encryptor or volume | |
# driver may report that the volume is still in use. | |
wait_for_detach = guest.detach_device_with_retry(guest.get_disk, | |
disk_dev, | |
live=live) | |
wait_for_detach() | |
except exception.InstanceNotFound: | |
# NOTE(zhaoqin): If the instance does not exist, _lookup_by_name() | |
# will throw InstanceNotFound exception. Need to | |
# disconnect volume under this circumstance. | |
LOG.warning("During detach_volume, instance disappeared.", | |
instance=instance) | |
except exception.DeviceNotFound: | |
raise exception.DiskNotFound(location=disk_dev) | |
except libvirt.libvirtError as ex: | |
# NOTE(vish): This is called to cleanup volumes after live | |
# migration, so we should still disconnect even if | |
# the instance doesn't exist here anymore. | |
error_code = ex.get_error_code() | |
if error_code == libvirt.VIR_ERR_NO_DOMAIN: | |
# NOTE(vish): | |
LOG.warning("During detach_volume, instance disappeared.", | |
instance=instance) | |
else: | |
raise | |
# NOTE(lyarwood): We can provide None as the request context here as we | |
# already have the encryption metadata dict from the compute layer. | |
# This avoids the need to add the request context to the signature of | |
# detach_volume requiring changes across all drivers. | |
self._disconnect_volume(None, connection_info, instance, | |
encryption=encryption) | |
def extend_volume(self, connection_info, instance): | |
try: | |
new_size = self._extend_volume(connection_info, instance) | |
except NotImplementedError: | |
raise exception.ExtendVolumeNotSupported() | |
# Resize the device in QEMU so its size is updated and | |
# detected by the instance without rebooting. | |
try: | |
guest = self._host.get_guest(instance) | |
state = guest.get_power_state(self._host) | |
active_state = state in (power_state.RUNNING, power_state.PAUSED) | |
if active_state: | |
disk_path = connection_info['data']['device_path'] | |
LOG.debug('resizing block device %(dev)s to %(size)u kb', | |
{'dev': disk_path, 'size': new_size}) | |
dev = guest.get_block_device(disk_path) | |
dev.resize(new_size // units.Ki) | |
else: | |
LOG.debug('Skipping block device resize, guest is not running', | |
instance=instance) | |
except exception.InstanceNotFound: | |
with excutils.save_and_reraise_exception(): | |
LOG.warning('During extend_volume, instance disappeared.', | |
instance=instance) | |
except libvirt.libvirtError: | |
with excutils.save_and_reraise_exception(): | |
LOG.exception('resizing block device failed.', | |
instance=instance) | |
def attach_interface(self, context, instance, image_meta, vif): | |
guest = self._host.get_guest(instance) | |
self.vif_driver.plug(instance, vif) | |
self.firewall_driver.setup_basic_filtering(instance, [vif]) | |
cfg = self.vif_driver.get_config(instance, vif, image_meta, | |
instance.flavor, | |
CONF.libvirt.virt_type, | |
self._host) | |
try: | |
state = guest.get_power_state(self._host) | |
live = state in (power_state.RUNNING, power_state.PAUSED) | |
guest.attach_device(cfg, persistent=True, live=live) | |
except libvirt.libvirtError: | |
LOG.error('attaching network adapter failed.', | |
instance=instance, exc_info=True) | |
self.vif_driver.unplug(instance, vif) | |
raise exception.InterfaceAttachFailed( | |
instance_uuid=instance.uuid) | |
try: | |
# NOTE(artom) If we're attaching with a device role tag, we need to | |
# rebuild device_metadata. If we're attaching without a role | |
# tag, we're rebuilding it here needlessly anyways. This isn't a | |
# massive deal, and it helps reduce code complexity by not having | |
# to indicate to the virt driver that the attach is tagged. The | |
# really important optimization of not calling the database unless | |
# device_metadata has actually changed is done for us by | |
# instance.save(). | |
instance.device_metadata = self._build_device_metadata( | |
context, instance) | |
instance.save() | |
except Exception: | |
# NOTE(artom) If we fail here it means the interface attached | |
# successfully but building and/or saving the device metadata | |
# failed. Just unplugging the vif is therefore not enough cleanup, | |
# we need to detach the interface. | |
with excutils.save_and_reraise_exception(reraise=False): | |
LOG.error('Interface attached successfully but building ' | |
'and/or saving device metadata failed.', | |
instance=instance, exc_info=True) | |
self.detach_interface(context, instance, vif) | |
raise exception.InterfaceAttachFailed( | |
instance_uuid=instance.uuid) | |
def detach_interface(self, context, instance, vif): | |
guest = self._host.get_guest(instance) | |
cfg = self.vif_driver.get_config(instance, vif, | |
instance.image_meta, | |
instance.flavor, | |
CONF.libvirt.virt_type, self._host) | |
interface = guest.get_interface_by_cfg(cfg) | |
try: | |
self.vif_driver.unplug(instance, vif) | |
# NOTE(mriedem): When deleting an instance and using Neutron, | |
# we can be racing against Neutron deleting the port and | |
# sending the vif-deleted event which then triggers a call to | |
# detach the interface, so if the interface is not found then | |
# we can just log it as a warning. | |
if not interface: | |
mac = vif.get('address') | |
# The interface is gone so just log it as a warning. | |
LOG.warning('Detaching interface %(mac)s failed because ' | |
'the device is no longer found on the guest.', | |
{'mac': mac}, instance=instance) | |
return | |
state = guest.get_power_state(self._host) | |
live = state in (power_state.RUNNING, power_state.PAUSED) | |
# Now we are going to loop until the interface is detached or we | |
# timeout. | |
wait_for_detach = guest.detach_device_with_retry( | |
guest.get_interface_by_cfg, cfg, live=live, | |
alternative_device_name=self.vif_driver.get_vif_devname(vif)) | |
wait_for_detach() | |
except exception.DeviceDetachFailed: | |
# We failed to detach the device even with the retry loop, so let's | |
# dump some debug information to the logs before raising back up. | |
with excutils.save_and_reraise_exception(): | |
devname = self.vif_driver.get_vif_devname(vif) | |
interface = guest.get_interface_by_cfg(cfg) | |
if interface: | |
LOG.warning( | |
'Failed to detach interface %(devname)s after ' | |
'repeated attempts. Final interface xml:\n' | |
'%(interface_xml)s\nFinal guest xml:\n%(guest_xml)s', | |
{'devname': devname, | |
'interface_xml': interface.to_xml(), | |
'guest_xml': guest.get_xml_desc()}, | |
instance=instance) | |
except exception.DeviceNotFound: | |
# The interface is gone so just log it as a warning. | |
LOG.warning('Detaching interface %(mac)s failed because ' | |
'the device is no longer found on the guest.', | |
{'mac': vif.get('address')}, instance=instance) | |
except libvirt.libvirtError as ex: | |
error_code = ex.get_error_code() | |
if error_code == libvirt.VIR_ERR_NO_DOMAIN: | |
LOG.warning("During detach_interface, instance disappeared.", | |
instance=instance) | |
else: | |
# NOTE(mriedem): When deleting an instance and using Neutron, | |
# we can be racing against Neutron deleting the port and | |
# sending the vif-deleted event which then triggers a call to | |
# detach the interface, so we might have failed because the | |
# network device no longer exists. Libvirt will fail with | |
# "operation failed: no matching network device was found" | |
# which unfortunately does not have a unique error code so we | |
# need to look up the interface by config and if it's not found | |
# then we can just log it as a warning rather than tracing an | |
# error. | |
mac = vif.get('address') | |
interface = guest.get_interface_by_cfg(cfg) | |
if interface: | |
LOG.error('detaching network adapter failed.', | |
instance=instance, exc_info=True) | |
raise exception.InterfaceDetachFailed( | |
instance_uuid=instance.uuid) | |
# The interface is gone so just log it as a warning. | |
LOG.warning('Detaching interface %(mac)s failed because ' | |
'the device is no longer found on the guest.', | |
{'mac': mac}, instance=instance) | |
def _create_snapshot_metadata(self, image_meta, instance, | |
img_fmt, snp_name): | |
metadata = {'is_public': False, | |
'status': 'active', | |
'name': snp_name, | |
'properties': { | |
'kernel_id': instance.kernel_id, | |
'image_location': 'snapshot', | |
'image_state': 'available', | |
'owner_id': instance.project_id, | |
'ramdisk_id': instance.ramdisk_id, | |
} | |
} | |
if instance.os_type: | |
metadata['properties']['os_type'] = instance.os_type | |
# NOTE(vish): glance forces ami disk format to be ami | |
if image_meta.disk_format == 'ami': | |
metadata['disk_format'] = 'ami' | |
else: | |
metadata['disk_format'] = img_fmt | |
if image_meta.obj_attr_is_set("container_format"): | |
metadata['container_format'] = image_meta.container_format | |
else: | |
metadata['container_format'] = "bare" | |
return metadata | |
def snapshot(self, context, instance, image_id, update_task_state): | |
"""Create snapshot from a running VM instance. | |
This command only works with qemu 0.14+ | |
""" | |
try: | |
guest = self._host.get_guest(instance) | |
# TODO(sahid): We are converting all calls from a | |
# virDomain object to use nova.virt.libvirt.Guest. | |
# We should be able to remove virt_dom at the end. | |
virt_dom = guest._domain | |
except exception.InstanceNotFound: | |
raise exception.InstanceNotRunning(instance_id=instance.uuid) | |
snapshot = self._image_api.get(context, image_id) | |
# source_format is an on-disk format | |
# source_type is a backend type | |
disk_path, source_format = libvirt_utils.find_disk(guest) | |
source_type = libvirt_utils.get_disk_type_from_path(disk_path) | |
# We won't have source_type for raw or qcow2 disks, because we can't | |
# determine that from the path. We should have it from the libvirt | |
# xml, though. | |
if source_type is None: | |
source_type = source_format | |
# For lxc instances we won't have it either from libvirt xml | |
# (because we just gave libvirt the mounted filesystem), or the path, | |
# so source_type is still going to be None. In this case, | |
# root_disk is going to default to CONF.libvirt.images_type | |
# below, which is still safe. | |
image_format = CONF.libvirt.snapshot_image_format or source_type | |
# NOTE(bfilippov): save lvm and rbd as raw | |
if image_format == 'lvm' or image_format == 'rbd': | |
image_format = 'raw' | |
metadata = self._create_snapshot_metadata(instance.image_meta, | |
instance, | |
image_format, | |
snapshot['name']) | |
snapshot_name = uuidutils.generate_uuid(dashed=False) | |
state = guest.get_power_state(self._host) | |
# NOTE(dgenin): Instances with LVM encrypted ephemeral storage require | |
# cold snapshots. Currently, checking for encryption is | |
# redundant because LVM supports only cold snapshots. | |
# It is necessary in case this situation changes in the | |
# future. | |
if (self._host.has_min_version(hv_type=host.HV_DRIVER_QEMU) | |
and source_type not in ('lvm') | |
and not CONF.ephemeral_storage_encryption.enabled | |
and not CONF.workarounds.disable_libvirt_livesnapshot | |
# NOTE(rmk): We cannot perform live snapshots when a | |
# managedSave file is present, so we will use the cold/legacy | |
# method for instances which are shutdown or paused. | |
# NOTE(mriedem): Live snapshot doesn't work with paused | |
# instances on older versions of libvirt/qemu. We can likely | |
# remove the restriction on PAUSED once we require | |
# libvirt>=3.6.0 and qemu>=2.10 since that works with the | |
# Pike Ubuntu Cloud Archive testing in Queens. | |
and state not in (power_state.SHUTDOWN, power_state.PAUSED)): | |
live_snapshot = True | |
# Abort is an idempotent operation, so make sure any block | |
# jobs which may have failed are ended. This operation also | |
# confirms the running instance, as opposed to the system as a | |
# whole, has a new enough version of the hypervisor (bug 1193146). | |
try: | |
guest.get_block_device(disk_path).abort_job() | |
except libvirt.libvirtError as ex: | |
error_code = ex.get_error_code() | |
if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED: | |
live_snapshot = False | |
else: | |
pass | |
else: | |
live_snapshot = False | |
self._prepare_domain_for_snapshot(context, live_snapshot, state, | |
instance) | |
root_disk = self.image_backend.by_libvirt_path( | |
instance, disk_path, image_type=source_type) | |
if live_snapshot: | |
LOG.info("Beginning live snapshot process", instance=instance) | |
else: | |
LOG.info("Beginning cold snapshot process", instance=instance) | |
update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD) | |
try: | |
update_task_state(task_state=task_states.IMAGE_UPLOADING, | |
expected_state=task_states.IMAGE_PENDING_UPLOAD) | |
metadata['location'] = root_disk.direct_snapshot( | |
context, snapshot_name, image_format, image_id, | |
instance.image_ref) | |
self._snapshot_domain(context, live_snapshot, virt_dom, state, | |
instance) | |
self._image_api.update(context, image_id, metadata, | |
purge_props=False) | |
except (NotImplementedError, exception.ImageUnacceptable, | |
exception.Forbidden) as e: | |
if type(e) != NotImplementedError: | |
LOG.warning('Performing standard snapshot because direct ' | |
'snapshot failed: %(error)s', | |
{'error': encodeutils.exception_to_unicode(e)}) | |
failed_snap = metadata.pop('location', None) | |
if failed_snap: | |
failed_snap = {'url': str(failed_snap)} | |
root_disk.cleanup_direct_snapshot(failed_snap, | |
also_destroy_volume=True, | |
ignore_errors=True) | |
update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD, | |
expected_state=task_states.IMAGE_UPLOADING) | |
# TODO(nic): possibly abstract this out to the root_disk | |
if source_type == 'rbd' and live_snapshot: | |
# Standard snapshot uses qemu-img convert from RBD which is | |
# not safe to run with live_snapshot. | |
live_snapshot = False | |
# Suspend the guest, so this is no longer a live snapshot | |
self._prepare_domain_for_snapshot(context, live_snapshot, | |
state, instance) | |
snapshot_directory = CONF.libvirt.snapshots_directory | |
fileutils.ensure_tree(snapshot_directory) | |
with utils.tempdir(dir=snapshot_directory) as tmpdir: | |
try: | |
out_path = os.path.join(tmpdir, snapshot_name) | |
if live_snapshot: | |
# NOTE(xqueralt): libvirt needs o+x in the tempdir | |
os.chmod(tmpdir, 0o701) | |
self._live_snapshot(context, instance, guest, | |
disk_path, out_path, source_format, | |
image_format, instance.image_meta) | |
else: | |
root_disk.snapshot_extract(out_path, image_format) | |
LOG.info("Snapshot extracted, beginning image upload", | |
instance=instance) | |
finally: | |
self._snapshot_domain(context, live_snapshot, virt_dom, | |
state, instance) | |
# Upload that image to the image service | |
update_task_state(task_state=task_states.IMAGE_UPLOADING, | |
expected_state=task_states.IMAGE_PENDING_UPLOAD) | |
with libvirt_utils.file_open(out_path, 'rb') as image_file: | |
self._image_api.update(context, | |
image_id, | |
metadata, | |
image_file) | |
except Exception: | |
with excutils.save_and_reraise_exception(): | |
LOG.exception(_("Failed to snapshot image")) | |
failed_snap = metadata.pop('location', None) | |
if failed_snap: | |
failed_snap = {'url': str(failed_snap)} | |
root_disk.cleanup_direct_snapshot( | |
failed_snap, also_destroy_volume=True, | |
ignore_errors=True) | |
LOG.info("Snapshot image upload complete", instance=instance) | |
def _prepare_domain_for_snapshot(self, context, live_snapshot, state, | |
instance): | |
# NOTE(dkang): managedSave does not work for LXC | |
if CONF.libvirt.virt_type != 'lxc' and not live_snapshot: | |
if state == power_state.RUNNING or state == power_state.PAUSED: | |
self.suspend(context, instance) | |
def _snapshot_domain(self, context, live_snapshot, virt_dom, state, | |
instance): | |
guest = None | |
# NOTE(dkang): because previous managedSave is not called | |
# for LXC, _create_domain must not be called. | |
if CONF.libvirt.virt_type != 'lxc' and not live_snapshot: | |
if state == power_state.RUNNING: | |
guest = self._create_domain(domain=virt_dom) | |
elif state == power_state.PAUSED: | |
guest = self._create_domain(domain=virt_dom, pause=True) | |
if guest is not None: | |
self._attach_pci_devices( | |
guest, pci_manager.get_instance_pci_devs(instance)) | |
self._attach_direct_passthrough_ports( | |
context, instance, guest) | |
def _can_set_admin_password(self, image_meta): | |
if CONF.libvirt.virt_type == 'parallels': | |
if not self._host.has_min_version( | |
MIN_LIBVIRT_PARALLELS_SET_ADMIN_PASSWD): | |
raise exception.SetAdminPasswdNotSupported() | |
elif CONF.libvirt.virt_type in ('kvm', 'qemu'): | |
if not self._host.has_min_version( | |
MIN_LIBVIRT_SET_ADMIN_PASSWD): | |
raise exception.SetAdminPasswdNotSupported() | |
if not image_meta.properties.get('hw_qemu_guest_agent', False): | |
raise exception.QemuGuestAgentNotEnabled() | |
else: | |
raise exception.SetAdminPasswdNotSupported() | |
def set_admin_password(self, instance, new_pass): | |
self._can_set_admin_password(instance.image_meta) | |
guest = self._host.get_guest(instance) | |
user = instance.image_meta.properties.get("os_admin_user") | |
if not user: | |
if instance.os_type == "windows": | |
user = "Administrator" | |
else: | |
user = "root" | |
try: | |
guest.set_user_password(user, new_pass) | |
except libvirt.libvirtError as ex: | |
error_code = ex.get_error_code() | |
if error_code == libvirt.VIR_ERR_AGENT_UNRESPONSIVE: | |
LOG.debug('Failed to set password: QEMU agent unresponsive', | |
instance_uuid=instance.uuid) | |
raise NotImplementedError() | |
err_msg = encodeutils.exception_to_unicode(ex) | |
msg = (_('Error from libvirt while set password for username ' | |
'"%(user)s": [Error Code %(error_code)s] %(ex)s') | |
% {'user': user, 'error_code': error_code, 'ex': err_msg}) | |
raise exception.InternalError(msg) | |
def _can_quiesce(self, instance, image_meta): | |
if CONF.libvirt.virt_type not in ('kvm', 'qemu'): | |
raise exception.InstanceQuiesceNotSupported( | |
instance_id=instance.uuid) | |
if not image_meta.properties.get('hw_qemu_guest_agent', False): | |
raise exception.QemuGuestAgentNotEnabled() | |
def _requires_quiesce(self, image_meta): | |
return image_meta.properties.get('os_require_quiesce', False) | |
def _set_quiesced(self, context, instance, image_meta, quiesced): | |
self._can_quiesce(instance, image_meta) | |
try: | |
guest = self._host.get_guest(instance) | |
if quiesced: | |
guest.freeze_filesystems() | |
else: | |
guest.thaw_filesystems() | |
except libvirt.libvirtError as ex: | |
error_code = ex.get_error_code() | |
err_msg = encodeutils.exception_to_unicode(ex) | |
msg = (_('Error from libvirt while quiescing %(instance_name)s: ' | |
'[Error Code %(error_code)s] %(ex)s') | |
% {'instance_name': instance.name, | |
'error_code': error_code, 'ex': err_msg}) | |
raise exception.InternalError(msg) | |
def quiesce(self, context, instance, image_meta): | |
"""Freeze the guest filesystems to prepare for snapshot. | |
The qemu-guest-agent must be setup to execute fsfreeze. | |
""" | |
self._set_quiesced(context, instance, image_meta, True) | |
def unquiesce(self, context, instance, image_meta): | |
"""Thaw the guest filesystems after snapshot.""" | |
self._set_quiesced(context, instance, image_meta, False) | |
def _live_snapshot(self, context, instance, guest, disk_path, out_path, | |
source_format, image_format, image_meta): | |
"""Snapshot an instance without downtime.""" | |
dev = guest.get_block_device(disk_path) | |
# Save a copy of the domain's persistent XML file | |
xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True) | |
# Abort is an idempotent operation, so make sure any block | |
# jobs which may have failed are ended. | |
try: | |
dev.abort_job() | |
except Exception: | |
pass | |
# NOTE (rmk): We are using shallow rebases as a workaround to a bug | |
# in QEMU 1.3. In order to do this, we need to create | |
# a destination image with the original backing file | |
# and matching size of the instance root disk. | |
src_disk_size = libvirt_utils.get_disk_size(disk_path, | |
format=source_format) | |
src_back_path = libvirt_utils.get_disk_backing_file(disk_path, | |
format=source_format, | |
basename=False) | |
disk_delta = out_path + '.delta' | |
libvirt_utils.create_cow_image(src_back_path, disk_delta, | |
src_disk_size) | |
quiesced = False | |
try: | |
self._set_quiesced(context, instance, image_meta, True) | |
quiesced = True | |
except exception.NovaException as err: | |
if self._requires_quiesce(image_meta): | |
raise | |
LOG.info('Skipping quiescing instance: %(reason)s.', | |
{'reason': err}, instance=instance) | |
try: | |
# NOTE (rmk): blockRebase cannot be executed on persistent | |
# domains, so we need to temporarily undefine it. | |
# If any part of this block fails, the domain is | |
# re-defined regardless. | |
if guest.has_persistent_configuration(): | |
support_uefi = self._has_uefi_support() | |
guest.delete_configuration(support_uefi) | |
# NOTE (rmk): Establish a temporary mirror of our root disk and | |
# issue an abort once we have a complete copy. | |
dev.rebase(disk_delta, copy=True, reuse_ext=True, shallow=True) | |
while not dev.is_job_complete(): | |
time.sleep(0.5) | |
dev.abort_job() | |
nova.privsep.path.chown(disk_delta, uid=os.getuid()) | |
finally: | |
self._host.write_instance_config(xml) | |
if quiesced: | |
self._set_quiesced(context, instance, image_meta, False) | |
# Convert the delta (CoW) image with a backing file to a flat | |
# image with no backing file. | |
libvirt_utils.extract_snapshot(disk_delta, 'qcow2', | |
out_path, image_format) | |
def _volume_snapshot_update_status(self, context, snapshot_id, status): | |
"""Send a snapshot status update to Cinder. | |
This method captures and logs exceptions that occur | |
since callers cannot do anything useful with these exceptions. | |
Operations on the Cinder side waiting for this will time out if | |
a failure occurs sending the update. | |
:param context: security context | |
:param snapshot_id: id of snapshot being updated | |
:param status: new status value | |
""" | |
try: | |
self._volume_api.update_snapshot_status(context, | |
snapshot_id, | |
status) | |
except Exception: | |
LOG.exception(_('Failed to send updated snapshot status ' | |
'to volume service.')) | |
def _volume_snapshot_create(self, context, instance, guest, | |
volume_id, new_file): | |
"""Perform volume snapshot. | |
:param guest: VM that volume is attached to | |
:param volume_id: volume UUID to snapshot | |
:param new_file: relative path to new qcow2 file present on share | |
""" | |
xml = guest.get_xml_desc() | |
xml_doc = etree.fromstring(xml) | |
device_info = vconfig.LibvirtConfigGuest() | |
device_info.parse_dom(xml_doc) | |
disks_to_snap = [] # to be snapshotted by libvirt | |
network_disks_to_snap = [] # network disks (netfs, etc.) | |
disks_to_skip = [] # local disks not snapshotted | |
for guest_disk in device_info.devices: | |
if (guest_disk.root_name != 'disk'): | |
continue | |
if (guest_disk.target_dev is None): | |
continue | |
if (guest_disk.serial is None or guest_disk.serial != volume_id): | |
disks_to_skip.append(guest_disk.target_dev) | |
continue | |
# disk is a Cinder volume with the correct volume_id | |
disk_info = { | |
'dev': guest_disk.target_dev, | |
'serial': guest_disk.serial, | |
'current_file': guest_disk.source_path, | |
'source_protocol': guest_disk.source_protocol, | |
'source_name': guest_disk.source_name, | |
'source_hosts': guest_disk.source_hosts, | |
'source_ports': guest_disk.source_ports | |
} | |
# Determine path for new_file based on current path | |
if disk_info['current_file'] is not None: | |
current_file = disk_info['current_file'] | |
new_file_path = os.path.join(os.path.dirname(current_file), | |
new_file) | |
disks_to_snap.append((current_file, new_file_path)) | |
# NOTE(mriedem): This used to include a check for gluster in | |
# addition to netfs since they were added together. Support for | |
# gluster was removed in the 16.0.0 Pike release. It is unclear, | |
# however, if other volume drivers rely on the netfs disk source | |
# protocol. | |
elif disk_info['source_protocol'] == 'netfs': | |
network_disks_to_snap.append((disk_info, new_file)) | |
if not disks_to_snap and not network_disks_to_snap: | |
msg = _('Found no disk to snapshot.') | |
raise exception.InternalError(msg) | |
snapshot = vconfig.LibvirtConfigGuestSnapshot() | |
for current_name, new_filename in disks_to_snap: | |
snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk() | |
snap_disk.name = current_name | |
snap_disk.source_path = new_filename | |
snap_disk.source_type = 'file' | |
snap_disk.snapshot = 'external' | |
snap_disk.driver_name = 'qcow2' | |
snapshot.add_disk(snap_disk) | |
for disk_info, new_filename in network_disks_to_snap: | |
snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk() | |
snap_disk.name = disk_info['dev'] | |
snap_disk.source_type = 'network' | |
snap_disk.source_protocol = disk_info['source_protocol'] | |
snap_disk.snapshot = 'external' | |
snap_disk.source_path = new_filename | |
old_dir = disk_info['source_name'].split('/')[0] | |
snap_disk.source_name = '%s/%s' % (old_dir, new_filename) | |
snap_disk.source_hosts = disk_info['source_hosts'] | |
snap_disk.source_ports = disk_info['source_ports'] | |
snapshot.add_disk(snap_disk) | |
for dev in disks_to_skip: | |
snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk() | |
snap_disk.name = dev | |
snap_disk.snapshot = 'no' | |
snapshot.add_disk(snap_disk) | |
snapshot_xml = snapshot.to_xml() | |
LOG.debug("snap xml: %s", snapshot_xml, instance=instance) | |
image_meta = instance.image_meta | |
try: | |
# Check to see if we can quiesce the guest before taking the | |
# snapshot. | |
self._can_quiesce(instance, image_meta) | |
try: | |
guest.snapshot(snapshot, no_metadata=True, disk_only=True, | |
reuse_ext=True, quiesce=True) | |
return | |
except libvirt.libvirtError: | |
# If the image says that quiesce is required then we fail. | |
if self._requires_quiesce(image_meta): | |
raise | |
LOG.exception(_('Unable to create quiesced VM snapshot, ' | |
'attempting again with quiescing disabled.'), | |
instance=instance) | |
except (exception.InstanceQuiesceNotSupported, | |
exception.QemuGuestAgentNotEnabled) as err: | |
# If the image says that quiesce is required then we need to fail. | |
if self._requires_quiesce(image_meta): | |
raise | |
LOG.info('Skipping quiescing instance: %(reason)s.', | |
{'reason': err}, instance=instance) | |
try: | |
guest.snapshot(snapshot, no_metadata=True, disk_only=True, | |
reuse_ext=True, quiesce=False) | |
except libvirt.libvirtError: | |
LOG.exception(_('Unable to create VM snapshot, ' | |
'failing volume_snapshot operation.'), | |
instance=instance) | |
raise | |
def _volume_refresh_connection_info(self, context, instance, volume_id): | |
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance( | |
context, volume_id, instance.uuid) | |
driver_bdm = driver_block_device.convert_volume(bdm) | |
if driver_bdm: | |
driver_bdm.refresh_connection_info(context, instance, | |
self._volume_api, self) | |
def volume_snapshot_create(self, context, instance, volume_id, | |
create_info): | |
"""Create snapshots of a Cinder volume via libvirt. | |
:param instance: VM instance object reference | |
:param volume_id: id of volume being snapshotted | |
:param create_info: dict of information used to create snapshots | |
- snapshot_id : ID of snapshot | |
- type : qcow2 / <other> | |
- new_file : qcow2 file created by Cinder which | |
becomes the VM's active image after | |
the snapshot is complete | |
""" | |
LOG.debug("volume_snapshot_create: create_info: %(c_info)s", | |
{'c_info': create_info}, instance=instance) | |
try: | |
guest = self._host.get_guest(instance) | |
except exception.InstanceNotFound: | |
raise exception.InstanceNotRunning(instance_id=instance.uuid) | |
if create_info['type'] != 'qcow2': | |
msg = _('Unknown type: %s') % create_info['type'] | |
raise exception.InternalError(msg) | |
snapshot_id = create_info.get('snapshot_id', None) | |
if snapshot_id is None: | |
msg = _('snapshot_id required in create_info') | |
raise exception.InternalError(msg) | |
try: | |
self._volume_snapshot_create(context, instance, guest, | |
volume_id, create_info['new_file']) | |
except Exception: | |
with excutils.save_and_reraise_exception(): | |
LOG.exception(_('Error occurred during ' | |
'volume_snapshot_create, ' | |
'sending error status to Cinder.'), | |
instance=instance) | |
self._volume_snapshot_update_status( | |
context, snapshot_id, 'error') | |
self._volume_snapshot_update_status( | |
context, snapshot_id, 'creating') | |
def _wait_for_snapshot(): | |
snapshot = self._volume_api.get_snapshot(context, snapshot_id) | |
if snapshot.get('status') != 'creating': | |
self._volume_refresh_connection_info(context, instance, | |
volume_id) | |
raise loopingcall.LoopingCallDone() | |
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_snapshot) | |
timer.start(interval=0.5).wait() | |
@staticmethod | |
def _rebase_with_qemu_img(guest, device, active_disk_object, | |
rebase_base): | |
"""Rebase a device tied to a guest using qemu-img. | |
:param guest:the Guest which owns the device being rebased | |
:type guest: nova.virt.libvirt.guest.Guest | |
:param device: the guest block device to rebase | |
:type device: nova.virt.libvirt.guest.BlockDevice | |
:param active_disk_object: the guest block device to rebase | |
:type active_disk_object: nova.virt.libvirt.config.\ | |
LibvirtConfigGuestDisk | |
:param rebase_base: the new parent in the backing chain | |
:type rebase_base: None or string | |
""" | |
# It's unsure how well qemu-img handles network disks for | |
# every protocol. So let's be safe. | |
active_protocol = active_disk_object.source_protocol | |
if active_protocol is not None: | |
msg = _("Something went wrong when deleting a volume snapshot: " | |
"rebasing a %(protocol)s network disk using qemu-img " | |
"has not been fully tested") % {'protocol': | |
active_protocol} | |
LOG.error(msg) | |
raise exception.InternalError(msg) | |
if rebase_base is None: | |
# If backing_file is specified as "" (the empty string), then | |
# the image is rebased onto no backing file (i.e. it will exist | |
# independently of any backing file). | |
backing_file = "" | |
qemu_img_extra_arg = [] | |
else: | |
# If the rebased image is going to have a backing file then | |
# explicitly set the backing file format to avoid any security | |
# concerns related to file format auto detection. | |
backing_file = rebase_base | |
b_file_fmt = images.qemu_img_info(backing_file).file_format | |
qemu_img_extra_arg = ['-F', b_file_fmt] | |
qemu_img_extra_arg.append(active_disk_object.source_path) | |
utils.execute("qemu-img", "rebase", "-b", backing_file, | |
*qemu_img_extra_arg) | |
def _volume_snapshot_delete(self, context, instance, volume_id, | |
snapshot_id, delete_info=None): | |
"""Note: | |
if file being merged into == active image: | |
do a blockRebase (pull) operation | |
else: | |
do a blockCommit operation | |
Files must be adjacent in snap chain. | |
:param instance: instance object reference | |
:param volume_id: volume UUID | |
:param snapshot_id: snapshot UUID (unused currently) | |
:param delete_info: { | |
'type': 'qcow2', | |
'file_to_merge': 'a.img', | |
'merge_target_file': 'b.img' or None (if merging file_to_merge into | |
active image) | |
} | |
""" | |
LOG.debug('volume_snapshot_delete: delete_info: %s', delete_info, | |
instance=instance) | |
if delete_info['type'] != 'qcow2': | |
msg = _('Unknown delete_info type %s') % delete_info['type'] | |
raise exception.InternalError(msg) | |
try: | |
guest = self._host.get_guest(instance) | |
except exception.InstanceNotFound: | |
raise exception.InstanceNotRunning(instance_id=instance.uuid) | |
# Find dev name | |
my_dev = None | |
active_disk = None | |
xml = guest.get_xml_desc() | |
xml_doc = etree.fromstring(xml) | |
device_info = vconfig.LibvirtConfigGuest() | |
device_info.parse_dom(xml_doc) | |
active_disk_object = None | |
for guest_disk in device_info.devices: | |
if (guest_disk.root_name != 'disk'): | |
continue | |
if (guest_disk.target_dev is None or guest_disk.serial is None): | |
continue | |
if guest_disk.serial == volume_id: | |
my_dev = guest_disk.target_dev | |
active_disk = guest_disk.source_path | |
active_protocol = guest_disk.source_protocol | |
active_disk_object = guest_disk | |
break | |
if my_dev is None or (active_disk is None and active_protocol is None): | |
LOG.debug('Domain XML: %s', xml, instance=instance) | |
msg = (_('Disk with id: %s not found attached to instance.') | |
% volume_id) | |
raise exception.InternalError(msg) | |
LOG.debug("found device at %s", my_dev, instance=instance) | |
def _get_snap_dev(filename, backing_store): | |
if filename is None: | |
msg = _('filename cannot be None') | |
raise exception.InternalError(msg) | |
# libgfapi delete | |
LOG.debug("XML: %s", xml) | |
LOG.debug("active disk object: %s", active_disk_object) | |
# determine reference within backing store for desired image | |
filename_to_merge = filename | |
matched_name = None | |
b = backing_store | |
index = None | |
current_filename = active_disk_object.source_name.split('/')[1] | |
if current_filename == filename_to_merge: | |
return my_dev + '[0]' | |
while b is not None: | |
source_filename = b.source_name.split('/')[1] | |
if source_filename == filename_to_merge: | |
LOG.debug('found match: %s', b.source_name) | |
matched_name = b.source_name | |
index = b.index | |
break | |
b = b.backing_store | |
if matched_name is None: | |
msg = _('no match found for %s') % (filename_to_merge) | |
raise exception.InternalError(msg) | |
LOG.debug('index of match (%s) is %s', b.source_name, index) | |
my_snap_dev = '%s[%s]' % (my_dev, index) | |
return my_snap_dev | |
if delete_info['merge_target_file'] is None: | |
# pull via blockRebase() | |
# Merge the most recent snapshot into the active image | |
rebase_disk = my_dev | |
rebase_base = delete_info['file_to_merge'] # often None | |
if (active_protocol is not None) and (rebase_base is not None): | |
rebase_base = _get_snap_dev(rebase_base, | |
active_disk_object.backing_store) | |
# NOTE(deepakcs): libvirt added support for _RELATIVE in v1.2.7, | |
# and when available this flag _must_ be used to ensure backing | |
# paths are maintained relative by qemu. | |
# | |
# If _RELATIVE flag not found, continue with old behaviour | |
# (relative backing path seems to work for this case) | |
try: | |
libvirt.VIR_DOMAIN_BLOCK_REBASE_RELATIVE | |
relative = rebase_base is not None | |
except AttributeError: | |
LOG.warning( | |
"Relative blockrebase support was not detected. " | |
"Continuing with old behaviour.") | |
relative = False | |
LOG.debug( | |
'disk: %(disk)s, base: %(base)s, ' | |
'bw: %(bw)s, relative: %(relative)s', | |
{'disk': rebase_disk, | |
'base': rebase_base, | |
'bw': libvirt_guest.BlockDevice.REBASE_DEFAULT_BANDWIDTH, | |
'relative': str(relative)}, instance=instance) | |
dev = guest.get_block_device(rebase_disk) | |
if guest.is_active(): | |
result = dev.rebase(rebase_base, relative=relative) | |
if result == 0: | |
LOG.debug('blockRebase started successfully', | |
instance=instance) | |
while not dev.is_job_complete(): | |
LOG.debug('waiting for blockRebase job completion', | |
instance=instance) | |
time.sleep(0.5) | |
# If the guest is not running libvirt won't do a blockRebase. | |
# In that case, let's ask qemu-img to rebase the disk. | |
else: | |
LOG.debug('Guest is not running so doing a block rebase ' | |
'using "qemu-img rebase"', instance=instance) | |
self._rebase_with_qemu_img(guest, dev, active_disk_object, | |
rebase_base) | |
else: | |
# commit with blockCommit() | |
my_snap_base = None | |
my_snap_top = None | |
commit_disk = my_dev | |
if active_protocol is not None: | |
my_snap_base = _get_snap_dev(delete_info['merge_target_file'], | |
active_disk_object.backing_store) | |
my_snap_top = _get_snap_dev(delete_info['file_to_merge'], | |
active_disk_object.backing_store) | |
commit_base = my_snap_base or delete_info['merge_target_file'] | |
commit_top = my_snap_top or delete_info['file_to_merge'] | |
LOG.debug('will call blockCommit with commit_disk=%(commit_disk)s ' | |
'commit_base=%(commit_base)s ' | |
'commit_top=%(commit_top)s ', | |
{'commit_disk': commit_disk, | |
'commit_base': commit_base, | |
'commit_top': commit_top}, instance=instance) | |
dev = guest.get_block_device(commit_disk) | |
result = dev.commit(commit_base, commit_top, relative=True) | |
if result == 0: | |
LOG.debug('blockCommit started successfully', | |
instance=instance) | |
while not dev.is_job_complete(): | |
LOG.debug('waiting for blockCommit job completion', | |
instance=instance) | |
time.sleep(0.5) | |
def volume_snapshot_delete(self, context, instance, volume_id, snapshot_id, | |
delete_info): | |
try: | |
self._volume_snapshot_delete(context, instance, volume_id, | |
snapshot_id, delete_info=delete_info) | |
except Exception: | |
with excutils.save_and_reraise_exception(): | |
LOG.exception(_('Error occurred during ' | |
'volume_snapshot_delete, ' | |
'sending error status to Cinder.'), | |
instance=instance) | |
self._volume_snapshot_update_status( | |
context, snapshot_id, 'error_deleting') | |
self._volume_snapshot_update_status(context, snapshot_id, 'deleting') | |
self._volume_refresh_connection_info(context, instance, volume_id) | |
def reboot(self, context, instance, network_info, reboot_type, | |
block_device_info=None, bad_volumes_callback=None): | |
"""Reboot a virtual machine, given an instance reference.""" | |
if reboot_type == 'SOFT': | |
# NOTE(vish): This will attempt to do a graceful shutdown/restart. | |
try: | |
soft_reboot_success = self._soft_reboot(instance) | |
except libvirt.libvirtError as e: | |
LOG.debug("Instance soft reboot failed: %s", | |
encodeutils.exception_to_unicode(e), | |
instance=instance) | |
soft_reboot_success = False | |
if soft_reboot_success: | |
LOG.info("Instance soft rebooted successfully.", | |
instance=instance) | |
return | |
else: | |
LOG.warning("Failed to soft reboot instance. " | |
"Trying hard reboot.", | |
instance=instance) | |
return self._hard_reboot(context, instance, network_info, | |
block_device_info) | |
def _soft_reboot(self, instance): | |
"""Attempt to shutdown and restart the instance gracefully. | |
We use shutdown and create here so we can return if the guest | |
responded and actually rebooted. Note that this method only | |
succeeds if the guest responds to acpi. Therefore we return | |
success or failure so we can fall back to a hard reboot if | |
necessary. | |
:returns: True if the reboot succeeded | |
""" | |
guest = self._host.get_guest(instance) | |
state = guest.get_power_state(self._host) | |
old_domid = guest.id | |
# NOTE(vish): This check allows us to reboot an instance that | |
# is already shutdown. | |
if state == power_state.RUNNING: | |
guest.shutdown() | |
# NOTE(vish): This actually could take slightly longer than the | |
# FLAG defines depending on how long the get_info | |
# call takes to return. | |
self._prepare_pci_devices_for_use( | |
pci_manager.get_instance_pci_devs(instance, 'all')) | |
for x in range(CONF.libvirt.wait_soft_reboot_seconds): | |
guest = self._host.get_guest(instance) | |
state = guest.get_power_state(self._host) | |
new_domid = guest.id | |
# NOTE(ivoks): By checking domain IDs, we make sure we are | |
# not recreating domain that's already running. | |
if old_domid != new_domid: | |
if state in [power_state.SHUTDOWN, | |
power_state.CRASHED]: | |
LOG.info("Instance shutdown successfully.", | |
instance=instance) | |
self._create_domain(domain=guest._domain) | |
timer = loopingcall.FixedIntervalLoopingCall( | |
self._wait_for_running, instance) | |
timer.start(interval=0.5).wait() | |
return True | |
else: | |
LOG.info("Instance may have been rebooted during soft " | |
"reboot, so return now.", instance=instance) | |
return True | |
greenthread.sleep(1) | |
return False | |
def _hard_reboot(self, context, instance, network_info, | |
block_device_info=None): | |
"""Reboot a virtual machine, given an instance reference. | |
Performs a Libvirt reset (if supported) on the domain. | |
If Libvirt reset is unavailable this method actually destroys and | |
re-creates the domain to ensure the reboot happens, as the guest | |
OS cannot ignore this action. | |
""" | |
# NOTE(sbauza): Since we undefine the guest XML when destroying, we | |
# need to remember the existing mdevs for reusing them. | |
mdevs = self._get_all_assigned_mediated_devices(instance) | |
mdevs = list(mdevs.keys()) | |
# NOTE(mdbooth): In addition to performing a hard reboot of the domain, | |
# the hard reboot operation is relied upon by operators to be an | |
# automated attempt to fix as many things as possible about a | |
# non-functioning instance before resorting to manual intervention. | |
# With this goal in mind, we tear down all the aspects of an instance | |
# we can here without losing data. This allows us to re-initialise from | |
# scratch, and hopefully fix, most aspects of a non-functioning guest. | |
self.destroy(context, instance, network_info, destroy_disks=False, | |
block_device_info=block_device_info) | |
# Convert the system metadata to image metadata | |
# NOTE(mdbooth): This is a workaround for stateless Nova compute | |
# https://bugs.launchpad.net/nova/+bug/1349978 | |
instance_dir = libvirt_utils.get_instance_path(instance) | |
fileutils.ensure_tree(instance_dir) | |
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type, | |
instance, | |
instance.image_meta, | |
block_device_info) | |
# NOTE(vish): This could generate the wrong device_format if we are | |
# using the raw backend and the images don't exist yet. | |
# The create_images_and_backing below doesn't properly | |
# regenerate raw backend images, however, so when it | |
# does we need to (re)generate the xml after the images | |
# are in place. | |
xml = self._get_guest_xml(context, instance, network_info, disk_info, | |
instance.image_meta, | |
block_device_info=block_device_info, | |
mdevs=mdevs) | |
# NOTE(mdbooth): context.auth_token will not be set when we call | |
# _hard_reboot from resume_state_on_host_boot() | |
if context.auth_token is not None: | |
# NOTE (rmk): Re-populate any missing backing files. | |
config = vconfig.LibvirtConfigGuest() | |
config.parse_str(xml) | |
backing_disk_info = self._get_instance_disk_info_from_config( | |
config, block_device_info) | |
self._create_images_and_backing(context, instance, instance_dir, | |
backing_disk_info) | |
# Initialize all the necessary networking, block devices and | |
# start the instance. | |
# NOTE(melwitt): Pass vifs_already_plugged=True here even though we've | |
# unplugged vifs earlier. The behavior of neutron plug events depends | |
# on which vif type we're using and we are working with a stale network | |
# info cache here, so won't rely on waiting for neutron plug events. | |
# vifs_already_plugged=True means "do not wait for neutron plug events" | |
self._create_domain_and_network(context, xml, instance, network_info, | |
block_device_info=block_device_info, | |
vifs_already_plugged=True) | |
self._prepare_pci_devices_for_use( | |
pci_manager.get_instance_pci_devs(instance, 'all')) | |
def _wait_for_reboot(): | |
"""Called at an interval until the VM is running again.""" | |
state = self.get_info(instance).state | |
if state == power_state.RUNNING: | |
LOG.info("Instance rebooted successfully.", | |
instance=instance) | |
raise loopingcall.LoopingCallDone() | |
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_reboot) | |
timer.start(interval=0.5).wait() | |
def pause(self, instance): | |
"""Pause VM instance.""" | |
self._host.get_guest(instance).pause() | |
def unpause(self, instance): | |
"""Unpause paused VM instance.""" | |
guest = self._host.get_guest(instance) | |
guest.resume() | |
guest.sync_guest_time() | |
def _clean_shutdown(self, instance, timeout, retry_interval): | |
"""Attempt to shutdown the instance gracefully. | |
:param instance: The instance to be shutdown | |
:param timeout: How long to wait in seconds for the instance to | |
shutdown | |
:param retry_interval: How often in seconds to signal the instance | |
to shutdown while waiting | |
:returns: True if the shutdown succeeded | |
""" | |
# List of states that represent a shutdown instance | |
SHUTDOWN_STATES = [power_state.SHUTDOWN, | |
power_state.CRASHED] | |
try: | |
guest = self._host.get_guest(instance) | |
except exception.InstanceNotFound: | |
# If the instance has gone then we don't need to | |
# wait for it to shutdown | |
return True | |
state = guest.get_power_state(self._host) | |
if state in SHUTDOWN_STATES: | |
LOG.info("Instance already shutdown.", instance=instance) | |
return True | |
LOG.debug("Shutting down instance from state %s", state, | |
instance=instance) | |
guest.shutdown() | |
retry_countdown = retry_interval | |
for sec in range(timeout): | |
guest = self._host.get_guest(instance) | |
state = guest.get_power_state(self._host) | |
if state in SHUTDOWN_STATES: | |
LOG.info("Instance shutdown successfully after %d seconds.", | |
sec, instance=instance) | |
return True | |
# Note(PhilD): We can't assume that the Guest was able to process | |
# any previous shutdown signal (for example it may | |
# have still been startingup, so within the overall | |
# timeout we re-trigger the shutdown every | |
# retry_interval | |
if retry_countdown == 0: | |
retry_countdown = retry_interval | |
# Instance could shutdown at any time, in which case we | |
# will get an exception when we call shutdown | |
try: | |
LOG.debug("Instance in state %s after %d seconds - " | |
"resending shutdown", state, sec, | |
instance=instance) | |
guest.shutdown() | |
except libvirt.libvirtError: | |
# Assume this is because its now shutdown, so loop | |
# one more time to clean up. | |
LOG.debug("Ignoring libvirt exception from shutdown " | |
"request.", instance=instance) | |
continue | |
else: | |
retry_countdown -= 1 | |
time.sleep(1) | |
LOG.info("Instance failed to shutdown in %d seconds.", | |
timeout, instance=instance) | |
return False | |
def power_off(self, instance, timeout=0, retry_interval=0): | |
"""Power off the specified instance.""" | |
if timeout: | |
self._clean_shutdown(instance, timeout, retry_interval) | |
self._destroy(instance) | |
def power_on(self, context, instance, network_info, | |
block_device_info=None): | |
"""Power on the specified instance.""" | |
# We use _hard_reboot here to ensure that all backing files, | |
# network, and block device connections, etc. are established | |
# and available before we attempt to start the instance. | |
self._hard_reboot(context, instance, network_info, block_device_info) | |
def trigger_crash_dump(self, instance): | |
"""Trigger crash dump by injecting an NMI to the specified instance.""" | |
try: | |
self._host.get_guest(instance).inject_nmi() | |
except libvirt.libvirtError as ex: | |
error_code = ex.get_error_code() | |
if error_code == libvirt.VIR_ERR_NO_SUPPORT: | |
raise exception.TriggerCrashDumpNotSupported() | |
elif error_code == libvirt.VIR_ERR_OPERATION_INVALID: | |
raise exception.InstanceNotRunning(instance_id=instance.uuid) | |
LOG.exception(_('Error from libvirt while injecting an NMI to ' | |
'%(instance_uuid)s: ' | |
'[Error Code %(error_code)s] %(ex)s'), | |
{'instance_uuid': instance.uuid, | |
'error_code': error_code, 'ex': ex}) | |
raise | |
def suspend(self, context, instance): | |
"""Suspend the specified instance.""" | |
guest = self._host.get_guest(instance) | |
self._detach_pci_devices(guest, | |
pci_manager.get_instance_pci_devs(instance)) | |
self._detach_direct_passthrough_ports(context, instance, guest) | |
self._detach_mediated_devices(guest) | |
guest.save_memory_state() | |
def resume(self, context, instance, network_info, block_device_info=None): | |
"""resume the specified instance.""" | |
xml = self._get_existing_domain_xml(instance, network_info, | |
block_device_info) | |
guest = self._create_domain_and_network(context, xml, instance, | |
network_info, block_device_info=block_device_info, | |
vifs_already_plugged=True) | |
self._attach_pci_devices(guest, | |
pci_manager.get_instance_pci_devs(instance)) | |
self._attach_direct_passthrough_ports( | |
context, instance, guest, network_info) | |
timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running, | |
instance) | |
timer.start(interval=0.5).wait() | |
guest.sync_guest_time() | |
def resume_state_on_host_boot(self, context, instance, network_info, | |
block_device_info=None): | |
"""resume guest state when a host is booted.""" | |
# Check if the instance is running already and avoid doing | |
# anything if it is. | |
try: | |
guest = self._host.get_guest(instance) | |
state = guest.get_power_state(self._host) | |
ignored_states = (power_state.RUNNING, | |
power_state.SUSPENDED, | |
power_state.NOSTATE, | |
power_state.PAUSED) | |
if state in ignored_states: | |
return | |
except (exception.InternalError, exception.InstanceNotFound): | |
pass | |
# Instance is not up and could be in an unknown state. | |
# Be as absolute as possible about getting it back into | |
# a known and running state. | |
self._hard_reboot(context, instance, network_info, block_device_info) | |
def rescue(self, context, instance, network_info, image_meta, | |
rescue_password): | |
"""Loads a VM using rescue images. | |
A rescue is normally performed when something goes wrong with the | |
primary images and data needs to be corrected/recovered. Rescuing | |
should not edit or over-ride the original image, only allow for | |
data recovery. | |
""" | |
instance_dir = libvirt_utils.get_instance_path(instance) | |
unrescue_xml = self._get_existing_domain_xml(instance, network_info) | |
unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml') | |
libvirt_utils.write_to_file(unrescue_xml_path, unrescue_xml) | |
rescue_image_id = None | |
if image_meta.obj_attr_is_set("id"): | |
rescue_image_id = image_meta.id | |
rescue_images = { | |
'image_id': (rescue_image_id or | |
CONF.libvirt.rescue_image_id or instance.image_ref), | |
'kernel_id': (CONF.libvirt.rescue_kernel_id or | |
instance.kernel_id), | |
'ramdisk_id': (CONF.libvirt.rescue_ramdisk_id or | |
instance.ramdisk_id), | |
} | |
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type, | |
instance, | |
image_meta, | |
rescue=True) | |
injection_info = InjectionInfo(network_info=network_info, | |
admin_pass=rescue_password, | |
files=None) | |
gen_confdrive = functools.partial(self._create_configdrive, | |
context, instance, injection_info, | |
rescue=True) | |
self._create_image(context, instance, disk_info['mapping'], | |
injection_info=injection_info, suffix='.rescue', | |
disk_images=rescue_images) | |
xml = self._get_guest_xml(context, instance, network_info, disk_info, | |
image_meta, rescue=rescue_images) | |
self._destroy(instance) | |
self._create_domain(xml, post_xml_callback=gen_confdrive) | |
def unrescue(self, instance, network_info): | |
"""Reboot the VM which is being rescued back into primary images. | |
""" | |
instance_dir = libvirt_utils.get_instance_path(instance) | |
unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml') | |
xml = libvirt_utils.load_file(unrescue_xml_path) | |
guest = self._host.get_guest(instance) | |
# TODO(sahid): We are converting all calls from a | |
# virDomain object to use nova.virt.libvirt.Guest. | |
# We should be able to remove virt_dom at the end. | |
virt_dom = guest._domain | |
self._destroy(instance) | |
self._create_domain(xml, virt_dom) | |
os.unlink(unrescue_xml_path) | |
rescue_files = os.path.join(instance_dir, "*.rescue") | |
for rescue_file in glob.iglob(rescue_files): | |
if os.path.isdir(rescue_file): | |
shutil.rmtree(rescue_file) | |
else: | |
os.unlink(rescue_file) | |
# cleanup rescue volume | |
lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance) | |
if lvmdisk.endswith('.rescue')]) | |
if CONF.libvirt.images_type == 'rbd': | |
filter_fn = lambda disk: (disk.startswith(instance.uuid) and | |
disk.endswith('.rescue')) | |
LibvirtDriver._get_rbd_driver().cleanup_volumes(filter_fn) | |
def poll_rebooting_instances(self, timeout, instances): | |
pass | |
# NOTE(ilyaalekseyev): Implementation like in multinics | |
# for xenapi(tr3buchet) | |
def spawn(self, context, instance, image_meta, injected_files, | |
admin_password, allocations, network_info=None, | |
block_device_info=None): | |
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type, | |
instance, | |
image_meta, | |
block_device_info) | |
injection_info = InjectionInfo(network_info=network_info, | |
files=injected_files, | |
admin_pass=admin_password) | |
gen_confdrive = functools.partial(self._create_configdrive, | |
context, instance, | |
injection_info) | |
self._create_image(context, instance, disk_info['mapping'], | |
injection_info=injection_info, | |
block_device_info=block_device_info) | |
# Required by Quobyte CI | |
self._ensure_console_log_for_instance(instance) | |
# Does the guest need to be assigned some vGPU mediated devices ? | |
mdevs = self._allocate_mdevs(allocations) | |
xml = self._get_guest_xml(context, instance, network_info, | |
disk_info, image_meta, | |
block_device_info=block_device_info, | |
mdevs=mdevs) | |
self._create_domain_and_network( | |
context, xml, instance, network_info, | |
block_device_info=block_device_info, | |
post_xml_callback=gen_confdrive, | |
destroy_disks_on_failure=True) | |
LOG.debug("Instance is running", instance=instance) | |
def _wait_for_boot(): | |
"""Called at an interval until the VM is running.""" | |
state = self.get_info(instance).state | |
if state == power_state.RUNNING: | |
LOG.info("Instance spawned successfully.", instance=instance) | |
raise loopingcall.LoopingCallDone() | |
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot) | |
timer.start(interval=0.5).wait() | |
def _get_console_output_file(self, instance, console_log): | |
bytes_to_read = MAX_CONSOLE_BYTES | |
log_data = b"" # The last N read bytes | |
i = 0 # in case there is a log rotation (like "virtlogd") | |
path = console_log | |
while bytes_to_read > 0 and os.path.exists(path): | |
read_log_data, remaining = nova.privsep.path.last_bytes( | |
path, bytes_to_read) | |
# We need the log file content in chronological order, | |
# that's why we *prepend* the log data. | |
log_data = read_log_data + log_data | |
# Prep to read the next file in the chain | |
bytes_to_read -= len(read_log_data) | |
path = console_log + "." + str(i) | |
i += 1 | |
if remaining > 0: | |
LOG.info('Truncated console log returned, ' | |
'%d bytes ignored', remaining, instance=instance) | |
return log_data | |
def get_console_output(self, context, instance): | |
guest = self._host.get_guest(instance) | |
xml = guest.get_xml_desc() | |
tree = etree.fromstring(xml) | |
# If the guest has a console logging to a file prefer to use that | |
file_consoles = tree.findall("./devices/console[@type='file']") | |
if file_consoles: | |
for file_console in file_consoles: | |
source_node = file_console.find('./source') | |
if source_node is None: | |