Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvichery/ec123560ed1916a24478b8b0dc7b7094 to your computer and use it in GitHub Desktop.
Save rvichery/ec123560ed1916a24478b8b0dc7b7094 to your computer and use it in GitHub Desktop.
diff -r -u a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
--- a/nova/virt/libvirt/driver.py 2017-05-01 10:01:08.000000000 -0700
+++ b/nova/virt/libvirt/driver.py 2017-04-28 16:55:33.000000000 -0700
@@ -1426,48 +1426,85 @@
instance=instance)
update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD)
- snapshot_directory = CONF.libvirt.snapshots_directory
- fileutils.ensure_tree(snapshot_directory)
- with utils.tempdir(dir=snapshot_directory) as tmpdir:
- try:
- out_path = os.path.join(tmpdir, snapshot_name)
- if live_snapshot:
- # NOTE(xqueralt): libvirt needs o+x in the temp directory
- os.chmod(tmpdir, 0o701)
- self._live_snapshot(context, instance, guest, disk_path,
- out_path, source_format, image_format,
- image_meta)
- else:
- snapshot_backend.snapshot_extract(out_path, image_format)
- finally:
- guest = None
- # NOTE(dkang): because previous managedSave is not called
- # for LXC, _create_domain must not be called.
- if CONF.libvirt.virt_type != 'lxc' and not live_snapshot:
- if state == power_state.RUNNING:
- guest = self._create_domain(domain=virt_dom)
- elif state == power_state.PAUSED:
- guest = self._create_domain(
- domain=virt_dom, pause=True)
-
- if guest is not None:
- self._attach_pci_devices(guest,
- pci_manager.get_instance_pci_devs(instance))
- self._attach_sriov_ports(context, instance, guest)
- LOG.info(_LI("Snapshot extracted, beginning image upload"),
- instance=instance)
+ try:
+ update_task_state(task_state=task_states.IMAGE_UPLOADING,
+ expected_state=task_states.IMAGE_PENDING_UPLOAD)
+ metadata['location'] = snapshot_backend.direct_snapshot(
+ context, snapshot_name, image_format, image_id,
+ instance.image_ref)
+ self._snapshot_domain(context, live_snapshot, virt_dom, state,
+ instance)
+ self._image_api.update(context, image_id, metadata,
+ purge_props=False)
+ except (NotImplementedError, exception.ImageUnacceptable,
+ exception.Forbidden) as e:
+ if type(e) != NotImplementedError:
+ LOG.warning(_LW('Performing standard snapshot because direct '
+ 'snapshot failed: %(error)s'), {'error': e})
+ failed_snap = metadata.pop('location', None)
+ if failed_snap:
+ failed_snap = {'url': str(failed_snap)}
+ snapshot_backend.cleanup_direct_snapshot(failed_snap,
+ also_destroy_volume=True,
+ ignore_errors=True)
+ update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD,
+ expected_state=task_states.IMAGE_UPLOADING)
+
+ snapshot_directory = CONF.libvirt.snapshots_directory
+ fileutils.ensure_tree(snapshot_directory)
+ with utils.tempdir(dir=snapshot_directory) as tmpdir:
+ try:
+ out_path = os.path.join(tmpdir, snapshot_name)
+ if live_snapshot:
+ # NOTE(xqueralt): libvirt needs o+x in the tempdir
+ os.chmod(tmpdir, 0o701)
+ self._live_snapshot(context, instance, guest,
+ disk_path, out_path, source_format,
+ image_format, image_meta)
+ else:
+ snapshot_backend.snapshot_extract(out_path,
+ image_format)
+ finally:
+ self._snapshot_domain(context, live_snapshot, virt_dom,
+ state, instance)
+ LOG.info(_LI("Snapshot extracted, beginning image upload"),
+ instance=instance)
- # Upload that image to the image service
+ # Upload that image to the image service
+ update_task_state(task_state=task_states.IMAGE_UPLOADING,
+ expected_state=task_states.IMAGE_PENDING_UPLOAD)
+ with libvirt_utils.file_open(out_path) as image_file:
+ self._image_api.update(context,
+ image_id,
+ metadata,
+ image_file)
+ except Exception:
+ with excutils.save_and_reraise_exception():
+ LOG.exception(_LE("Failed to snapshot image"))
+ failed_snap = metadata.pop('location', None)
+ if failed_snap:
+ failed_snap = {'url': str(failed_snap)}
+ snapshot_backend.cleanup_direct_snapshot(
+ failed_snap, also_destroy_volume=True,
+ ignore_errors=True)
- update_task_state(task_state=task_states.IMAGE_UPLOADING,
- expected_state=task_states.IMAGE_PENDING_UPLOAD)
- with libvirt_utils.file_open(out_path) as image_file:
- self._image_api.update(context,
- image_id,
- metadata,
- image_file)
- LOG.info(_LI("Snapshot image upload complete"),
- instance=instance)
+ LOG.info(_LI("Snapshot image upload complete"), instance=instance)
+
+ def _snapshot_domain(self, context, live_snapshot, virt_dom, state,
+ instance):
+ guest = None
+ # NOTE(dkang): because previous managedSave is not called
+ # for LXC, _create_domain must not be called.
+ if CONF.libvirt.virt_type != 'lxc' and not live_snapshot:
+ if state == power_state.RUNNING:
+ guest = self._create_domain(domain=virt_dom)
+ elif state == power_state.PAUSED:
+ guest = self._create_domain(domain=virt_dom, pause=True)
+
+ if guest is not None:
+ self._attach_pci_devices(
+ guest, pci_manager.get_instance_pci_devs(instance))
+ self._attach_sriov_ports(context, instance, guest)
def _can_set_admin_password(self, image_meta):
if (CONF.libvirt.virt_type not in ('kvm', 'qemu') or
diff -r -u a/nova/virt/libvirt/imagebackend.py b/nova/virt/libvirt/imagebackend.py
--- a/nova/virt/libvirt/imagebackend.py 2017-05-01 10:00:49.000000000 -0700
+++ b/nova/virt/libvirt/imagebackend.py 2017-04-28 16:57:09.000000000 -0700
@@ -380,6 +380,26 @@
raise exception.ImageUnacceptable(image_id=image_id_or_uri,
reason=reason)
+ def direct_snapshot(self, context, snapshot_name, image_format, image_id,
+ base_image_id):
+ """Prepare a snapshot for direct reference from glance
+
+ :raises: exception.ImageUnacceptable if it cannot be
+ referenced directly in the specified image format
+ :returns: URL to be given to glance
+ """
+ raise NotImplementedError(_('direct_snapshot() is not implemented'))
+
+ def cleanup_direct_snapshot(self, location, also_destroy_volume=False,
+ ignore_errors=False):
+ """Performs any cleanup actions required after calling
+ direct_snapshot(), for graceful exception handling and the like.
+
+ This should be a no-op on any backend where it is not implemented.
+ """
+ pass
+
+
def _get_lock_name(self, base):
"""Get an image's name of a base file."""
return os.path.split(base)[-1]
@@ -433,6 +453,96 @@
"""
pass
+ def _get_parent_pool(self, context, base_image_id, fsid):
+ parent_pool = None
+ try:
+ # The easy way -- the image is an RBD clone, so use the parent
+ # images' storage pool
+ parent_pool, _im, _snap = self.driver.parent_info(self.rbd_name)
+ except exception.ImageUnacceptable:
+ # The hard way -- the image is itself a parent, so ask Glance
+ # where it came from
+ LOG.debug('No parent info for %s; asking the Image API where its '
+ 'store is', base_image_id)
+ try:
+ image_meta = IMAGE_API.get(context, base_image_id,
+ include_locations=True)
+ except Exception as e:
+ LOG.debug('Unable to get image %(image_id)s; error: %(error)s',
+ {'image_id': base_image_id, 'error': e})
+ image_meta = {}
+
+ # Find the first location that is in the same RBD cluster
+ for location in image_meta.get('locations', []):
+ try:
+ parent_fsid, parent_pool, _im, _snap = \
+ self.driver.parse_url(location['url'])
+ if parent_fsid == fsid:
+ break
+ else:
+ parent_pool = None
+ except exception.ImageUnacceptable:
+ continue
+
+ if not parent_pool:
+ raise exception.ImageUnacceptable(
+ _('Cannot determine the parent storage pool for %s; '
+ 'cannot determine where to store images') %
+ base_image_id)
+
+ return parent_pool
+
+ def direct_snapshot(self, context, snapshot_name, image_format,
+ image_id, base_image_id):
+ """Creates an RBD snapshot directly.
+ """
+ fsid = self.driver.get_fsid()
+ # NOTE(nic): Nova has zero comprehension of how Glance's image store
+ # is configured, but we can infer what storage pool Glance is using
+ # by looking at the parent image. If using authx, write access should
+ # be enabled on that pool for the Nova user
+ parent_pool = self._get_parent_pool(context, base_image_id, fsid)
+
+ # Snapshot the disk and clone it into Glance's storage pool. librbd
+ # requires that snapshots be set to "protected" in order to clone them
+ self.driver.create_snap(self.rbd_name, snapshot_name, protect=True)
+ location = {'url': 'rbd://%(fsid)s/%(pool)s/%(image)s/%(snap)s' %
+ dict(fsid=fsid,
+ pool=self.pool,
+ image=self.rbd_name,
+ snap=snapshot_name)}
+ try:
+ self.driver.clone(location, image_id, dest_pool=parent_pool)
+ # Flatten the image, which detaches it from the source snapshot
+ self.driver.flatten(image_id, pool=parent_pool)
+ finally:
+ # all done with the source snapshot, clean it up
+ self.cleanup_direct_snapshot(location)
+
+ # Glance makes a protected snapshot called 'snap' on uploaded
+ # images and hands it out, so we'll do that too. The name of
+ # the snapshot doesn't really matter, this just uses what the
+ # glance-store rbd backend sets (which is not configurable).
+ self.driver.create_snap(image_id, 'snap', pool=parent_pool,
+ protect=True)
+ return ('rbd://%(fsid)s/%(pool)s/%(image)s/snap' %
+ dict(fsid=fsid, pool=parent_pool, image=image_id))
+
+ def cleanup_direct_snapshot(self, location, also_destroy_volume=False,
+ ignore_errors=False):
+ """Unprotects and destroys the name snapshot.
+
+ With also_destroy_volume=True, it will also cleanup/destroy the parent
+ volume. This is useful for cleaning up when the target volume fails
+ to snapshot properly.
+ """
+ if location:
+ _fsid, _pool, _im, _snap = self.driver.parse_url(location['url'])
+ self.driver.remove_snap(_im, _snap, pool=_pool, force=True,
+ ignore_errors=ignore_errors)
+ if also_destroy_volume:
+ self.driver.destroy_volume(_im, pool=_pool)
+
class Raw(Image):
def __init__(self, instance=None, disk_name=None, path=None):
diff -r -u a/nova/virt/libvirt/storage/rbd_utils.py b/nova/virt/libvirt/storage/rbd_utils.py
--- a/nova/virt/libvirt/storage/rbd_utils.py 2017-05-01 10:01:41.000000000 -0700
+++ b/nova/virt/libvirt/storage/rbd_utils.py 2017-04-28 17:01:53.000000000 -0700
@@ -177,7 +177,7 @@
raise exception.ImageUnacceptable(image_id=url, reason=reason)
return pieces
- def _get_fsid(self):
+ def get_fsid(self):
with RADOSClient(self) as client:
return client.cluster.get_fsid()
@@ -189,7 +189,7 @@
LOG.debug('not cloneable: %s', e)
return False
- if self._get_fsid() != fsid:
+ if self.get_fsid() != fsid:
reason = '%s is in a different ceph cluster' % url
LOG.debug(reason)
return False
@@ -209,19 +209,25 @@
dict(loc=url, err=e))
return False
- def clone(self, image_location, dest_name):
+ def clone(self, image_location, dest_name, dest_pool=None):
_fsid, pool, image, snapshot = self.parse_url(
image_location['url'])
- LOG.debug('cloning %(pool)s/%(img)s@%(snap)s' %
- dict(pool=pool, img=image, snap=snapshot))
+ LOG.debug('cloning %(pool)s/%(img)s@%(snap)s to '
+ '%(dest_pool)s/%(dest_name)s',
+ dict(pool=pool, img=image, snap=snapshot,
+ dest_pool=dest_pool, dest_name=dest_name))
with RADOSClient(self, str(pool)) as src_client:
- with RADOSClient(self) as dest_client:
- rbd.RBD().clone(src_client.ioctx,
- image.encode('utf-8'),
- snapshot.encode('utf-8'),
- dest_client.ioctx,
- dest_name,
- features=src_client.features)
+ with RADOSClient(self, dest_pool) as dest_client:
+ try:
+ rbd.RBD().clone(src_client.ioctx,
+ image.encode('utf-8'),
+ snapshot.encode('utf-8'),
+ dest_client.ioctx,
+ str(dest_name),
+ features=src_client.features)
+ except rbd.PermissionError:
+ raise exception.Forbidden(_('no write permission on '
+ 'storage pool %s') % dest_pool)
def size(self, name):
with RBDVolumeProxy(self, name) as vol:
@@ -237,6 +243,31 @@
with RBDVolumeProxy(self, name) as vol:
vol.resize(size)
+ def parent_info(self, volume, pool=None):
+ """Returns the pool, image and snapshot name for the parent of an
+ RBD volume.
+
+ :volume: Name of RBD object
+ :pool: Name of pool
+ """
+ try:
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol:
+ return vol.parent_info()
+ except rbd.ImageNotFound:
+ raise exception.ImageUnacceptable(_("no usable parent snapshot "
+ "for volume %s") % volume)
+
+ def flatten(self, volume, pool=None):
+ """"Flattens" a snapshotted image with the parents' data,
+ effectively detaching it from the parent.
+
+ :volume: Name of RBD object
+ :pool: Name of pool
+ """
+ LOG.debug('flattening %(pool)s/%(vol)s', dict(pool=pool, vol=volume))
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol:
+ tpool.execute(vol.flatten)
+
def exists(self, name, pool=None, snapshot=None):
try:
with RBDVolumeProxy(self, name,
@@ -281,10 +312,12 @@
args += self.ceph_args()
utils.execute('rbd', 'import', *args)
- def cleanup_volumes(self, instance):
+ def _destroy_volume(self, client, volume, pool=None):
+ """Destroy an RBD volume, retrying as needed.
+ """
def _cleanup_vol(ioctx, volume, retryctx):
try:
- rbd.RBD().remove(client.ioctx, volume)
+ rbd.RBD().remove(ioctx, volume)
raise loopingcall.LoopingCallDone(retvalue=False)
except rbd.ImageHasSnapshots:
self.remove_snap(volume, libvirt_utils.RESIZE_SNAPSHOT_NAME,
@@ -297,6 +330,20 @@
if retryctx['retries'] <= 0:
raise loopingcall.LoopingCallDone()
+ # NOTE(danms): We let it go for ten seconds
+ retryctx = {'retries': 10}
+ timer = loopingcall.FixedIntervalLoopingCall(
+ _cleanup_vol, client.ioctx, volume, retryctx)
+ timed_out = timer.start(interval=1).wait()
+ if timed_out:
+ # NOTE(danms): Run this again to propagate the error, but
+ # if it succeeds, don't raise the loopingcall exception
+ try:
+ _cleanup_vol(client.ioctx, volume, retryctx)
+ except loopingcall.LoopingCallDone:
+ pass
+
+ def cleanup_volumes(self, instance):
with RADOSClient(self, self.pool) as client:
def belongs_to_instance(disk):
@@ -312,18 +359,7 @@
volumes = rbd.RBD().list(client.ioctx)
for volume in filter(belongs_to_instance, volumes):
- # NOTE(danms): We let it go for ten seconds
- retryctx = {'retries': 10}
- timer = loopingcall.FixedIntervalLoopingCall(
- _cleanup_vol, client.ioctx, volume, retryctx)
- timed_out = timer.start(interval=1).wait()
- if timed_out:
- # NOTE(danms): Run this again to propagate the error, but
- # if it succeeds, don't raise the loopingcall exception
- try:
- _cleanup_vol(client.ioctx, volume, retryctx)
- except loopingcall.LoopingCallDone:
- pass
+ self._destroy_volume(client, volume)
def get_pool_info(self):
with RADOSClient(self) as client:
@@ -332,34 +368,49 @@
'free': stats['kb_avail'] * units.Ki,
'used': stats['kb_used'] * units.Ki}
- def create_snap(self, volume, name):
- """Create a snapshot on an RBD object.
+ def create_snap(self, volume, name, pool=None, protect=False):
+ """Create a snapshot of an RBD volume.
:volume: Name of RBD object
:name: Name of snapshot
+ :pool: Name of pool
+ :protect: Set the snapshot to "protected"
"""
LOG.debug('creating snapshot(%(snap)s) on rbd image(%(img)s)',
{'snap': name, 'img': volume})
- with RBDVolumeProxy(self, volume) as vol:
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol:
tpool.execute(vol.create_snap, name)
+ if protect and not vol.is_protected_snap(name):
+ tpool.execute(vol.protect_snap, name)
- def remove_snap(self, volume, name, ignore_errors=False):
- """Remove a snapshot from an RBD volume.
+ def remove_snap(self, volume, name, ignore_errors=False, pool=None,
+ force=False):
+ """Removes a snapshot from an RBD volume.
:volume: Name of RBD object
:name: Name of snapshot
:ignore_errors: whether or not to log warnings on failures
+ :pool: Name of pool
+ :force: Remove snapshot even if it is protected
"""
- with RBDVolumeProxy(self, volume) as vol:
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol:
if name in [snap.get('name', '') for snap in vol.list_snaps()]:
- LOG.debug('removing snapshot(%(snap)s) on rbd image(%(img)s)',
- {'snap': name, 'img': volume})
+ if vol.is_protected_snap(name):
+ if force:
+ tpool.execute(vol.unprotect_snap, name)
+ elif not ignore_errors:
+ LOG.warning(_LW('snapshot(%(name)s) on rbd '
+ 'image(%(img)s) is protected, '
+ 'skipping'),
+ {'name': name, 'img': volume})
+ return
+ LOG.debug('removing snapshot(%(name)s) on rbd image(%(img)s)',
+ {'name': name, 'img': volume})
tpool.execute(vol.remove_snap, name)
- else:
- if not ignore_errors:
- LOG.warning(_LW('no snapshot(%(snap)s) found on '
- 'image(%(img)s)'), {'snap': name,
- 'img': volume})
+ elif not ignore_errors:
+ LOG.warning(_LW('no snapshot(%(name)s) found on rbd '
+ 'image(%(img)s)'),
+ {'name': name, 'img': volume})
def rollback_to_snap(self, volume, name):
"""Revert an RBD volume to its contents at a snapshot.
@@ -374,3 +425,9 @@
tpool.execute(vol.rollback_to_snap, name)
else:
raise exception.SnapshotNotFound(snapshot_id=name)
+
+ def destroy_volume(self, volume, pool=None):
+ """A one-shot version of cleanup_volumes()
+ """
+ with RADOSClient(self, pool) as client:
+ self._destroy_volume(client, volume)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment