Created
May 1, 2017 18:00
-
-
Save rvichery/ec123560ed1916a24478b8b0dc7b7094 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -r -u a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py | |
--- a/nova/virt/libvirt/driver.py 2017-05-01 10:01:08.000000000 -0700 | |
+++ b/nova/virt/libvirt/driver.py 2017-04-28 16:55:33.000000000 -0700 | |
@@ -1426,48 +1426,85 @@ | |
instance=instance) | |
update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD) | |
- snapshot_directory = CONF.libvirt.snapshots_directory | |
- fileutils.ensure_tree(snapshot_directory) | |
- with utils.tempdir(dir=snapshot_directory) as tmpdir: | |
- try: | |
- out_path = os.path.join(tmpdir, snapshot_name) | |
- if live_snapshot: | |
- # NOTE(xqueralt): libvirt needs o+x in the temp directory | |
- os.chmod(tmpdir, 0o701) | |
- self._live_snapshot(context, instance, guest, disk_path, | |
- out_path, source_format, image_format, | |
- image_meta) | |
- else: | |
- snapshot_backend.snapshot_extract(out_path, image_format) | |
- finally: | |
- guest = None | |
- # NOTE(dkang): because previous managedSave is not called | |
- # for LXC, _create_domain must not be called. | |
- if CONF.libvirt.virt_type != 'lxc' and not live_snapshot: | |
- if state == power_state.RUNNING: | |
- guest = self._create_domain(domain=virt_dom) | |
- elif state == power_state.PAUSED: | |
- guest = self._create_domain( | |
- domain=virt_dom, pause=True) | |
- | |
- if guest is not None: | |
- self._attach_pci_devices(guest, | |
- pci_manager.get_instance_pci_devs(instance)) | |
- self._attach_sriov_ports(context, instance, guest) | |
- LOG.info(_LI("Snapshot extracted, beginning image upload"), | |
- instance=instance) | |
+ try: | |
+ update_task_state(task_state=task_states.IMAGE_UPLOADING, | |
+ expected_state=task_states.IMAGE_PENDING_UPLOAD) | |
+ metadata['location'] = snapshot_backend.direct_snapshot( | |
+ context, snapshot_name, image_format, image_id, | |
+ instance.image_ref) | |
+ self._snapshot_domain(context, live_snapshot, virt_dom, state, | |
+ instance) | |
+ self._image_api.update(context, image_id, metadata, | |
+ purge_props=False) | |
+ except (NotImplementedError, exception.ImageUnacceptable, | |
+ exception.Forbidden) as e: | |
+ if type(e) != NotImplementedError: | |
+ LOG.warning(_LW('Performing standard snapshot because direct ' | |
+ 'snapshot failed: %(error)s'), {'error': e}) | |
+ failed_snap = metadata.pop('location', None) | |
+ if failed_snap: | |
+ failed_snap = {'url': str(failed_snap)} | |
+ snapshot_backend.cleanup_direct_snapshot(failed_snap, | |
+ also_destroy_volume=True, | |
+ ignore_errors=True) | |
+ update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD, | |
+ expected_state=task_states.IMAGE_UPLOADING) | |
+ | |
+ snapshot_directory = CONF.libvirt.snapshots_directory | |
+ fileutils.ensure_tree(snapshot_directory) | |
+ with utils.tempdir(dir=snapshot_directory) as tmpdir: | |
+ try: | |
+ out_path = os.path.join(tmpdir, snapshot_name) | |
+ if live_snapshot: | |
+ # NOTE(xqueralt): libvirt needs o+x in the tempdir | |
+ os.chmod(tmpdir, 0o701) | |
+ self._live_snapshot(context, instance, guest, | |
+ disk_path, out_path, source_format, | |
+ image_format, image_meta) | |
+ else: | |
+ snapshot_backend.snapshot_extract(out_path, | |
+ image_format) | |
+ finally: | |
+ self._snapshot_domain(context, live_snapshot, virt_dom, | |
+ state, instance) | |
+ LOG.info(_LI("Snapshot extracted, beginning image upload"), | |
+ instance=instance) | |
- # Upload that image to the image service | |
+ # Upload that image to the image service | |
+ update_task_state(task_state=task_states.IMAGE_UPLOADING, | |
+ expected_state=task_states.IMAGE_PENDING_UPLOAD) | |
+ with libvirt_utils.file_open(out_path) as image_file: | |
+ self._image_api.update(context, | |
+ image_id, | |
+ metadata, | |
+ image_file) | |
+ except Exception: | |
+ with excutils.save_and_reraise_exception(): | |
+ LOG.exception(_LE("Failed to snapshot image")) | |
+ failed_snap = metadata.pop('location', None) | |
+ if failed_snap: | |
+ failed_snap = {'url': str(failed_snap)} | |
+ snapshot_backend.cleanup_direct_snapshot( | |
+ failed_snap, also_destroy_volume=True, | |
+ ignore_errors=True) | |
- update_task_state(task_state=task_states.IMAGE_UPLOADING, | |
- expected_state=task_states.IMAGE_PENDING_UPLOAD) | |
- with libvirt_utils.file_open(out_path) as image_file: | |
- self._image_api.update(context, | |
- image_id, | |
- metadata, | |
- image_file) | |
- LOG.info(_LI("Snapshot image upload complete"), | |
- instance=instance) | |
+ LOG.info(_LI("Snapshot image upload complete"), instance=instance) | |
+ | |
+ def _snapshot_domain(self, context, live_snapshot, virt_dom, state, | |
+ instance): | |
+ guest = None | |
+ # NOTE(dkang): because previous managedSave is not called | |
+ # for LXC, _create_domain must not be called. | |
+ if CONF.libvirt.virt_type != 'lxc' and not live_snapshot: | |
+ if state == power_state.RUNNING: | |
+ guest = self._create_domain(domain=virt_dom) | |
+ elif state == power_state.PAUSED: | |
+ guest = self._create_domain(domain=virt_dom, pause=True) | |
+ | |
+ if guest is not None: | |
+ self._attach_pci_devices( | |
+ guest, pci_manager.get_instance_pci_devs(instance)) | |
+ self._attach_sriov_ports(context, instance, guest) | |
def _can_set_admin_password(self, image_meta): | |
if (CONF.libvirt.virt_type not in ('kvm', 'qemu') or | |
diff -r -u a/nova/virt/libvirt/imagebackend.py b/nova/virt/libvirt/imagebackend.py | |
--- a/nova/virt/libvirt/imagebackend.py 2017-05-01 10:00:49.000000000 -0700 | |
+++ b/nova/virt/libvirt/imagebackend.py 2017-04-28 16:57:09.000000000 -0700 | |
@@ -380,6 +380,26 @@ | |
raise exception.ImageUnacceptable(image_id=image_id_or_uri, | |
reason=reason) | |
+ def direct_snapshot(self, context, snapshot_name, image_format, image_id, | |
+ base_image_id): | |
+ """Prepare a snapshot for direct reference from glance | |
+ | |
+ :raises: exception.ImageUnacceptable if it cannot be | |
+ referenced directly in the specified image format | |
+ :returns: URL to be given to glance | |
+ """ | |
+ raise NotImplementedError(_('direct_snapshot() is not implemented')) | |
+ | |
+ def cleanup_direct_snapshot(self, location, also_destroy_volume=False, | |
+ ignore_errors=False): | |
+ """Performs any cleanup actions required after calling | |
+ direct_snapshot(), for graceful exception handling and the like. | |
+ | |
+ This should be a no-op on any backend where it is not implemented. | |
+ """ | |
+ pass | |
+ | |
+ | |
def _get_lock_name(self, base): | |
"""Get an image's name of a base file.""" | |
return os.path.split(base)[-1] | |
@@ -433,6 +453,96 @@ | |
""" | |
pass | |
+ def _get_parent_pool(self, context, base_image_id, fsid): | |
+ parent_pool = None | |
+ try: | |
+ # The easy way -- the image is an RBD clone, so use the parent | |
+ # images' storage pool | |
+ parent_pool, _im, _snap = self.driver.parent_info(self.rbd_name) | |
+ except exception.ImageUnacceptable: | |
+ # The hard way -- the image is itself a parent, so ask Glance | |
+ # where it came from | |
+ LOG.debug('No parent info for %s; asking the Image API where its ' | |
+ 'store is', base_image_id) | |
+ try: | |
+ image_meta = IMAGE_API.get(context, base_image_id, | |
+ include_locations=True) | |
+ except Exception as e: | |
+ LOG.debug('Unable to get image %(image_id)s; error: %(error)s', | |
+ {'image_id': base_image_id, 'error': e}) | |
+ image_meta = {} | |
+ | |
+ # Find the first location that is in the same RBD cluster | |
+ for location in image_meta.get('locations', []): | |
+ try: | |
+ parent_fsid, parent_pool, _im, _snap = \ | |
+ self.driver.parse_url(location['url']) | |
+ if parent_fsid == fsid: | |
+ break | |
+ else: | |
+ parent_pool = None | |
+ except exception.ImageUnacceptable: | |
+ continue | |
+ | |
+ if not parent_pool: | |
+ raise exception.ImageUnacceptable( | |
+ _('Cannot determine the parent storage pool for %s; ' | |
+ 'cannot determine where to store images') % | |
+ base_image_id) | |
+ | |
+ return parent_pool | |
+ | |
+ def direct_snapshot(self, context, snapshot_name, image_format, | |
+ image_id, base_image_id): | |
+ """Creates an RBD snapshot directly. | |
+ """ | |
+ fsid = self.driver.get_fsid() | |
+ # NOTE(nic): Nova has zero comprehension of how Glance's image store | |
+ # is configured, but we can infer what storage pool Glance is using | |
+ # by looking at the parent image. If using authx, write access should | |
+ # be enabled on that pool for the Nova user | |
+ parent_pool = self._get_parent_pool(context, base_image_id, fsid) | |
+ | |
+ # Snapshot the disk and clone it into Glance's storage pool. librbd | |
+ # requires that snapshots be set to "protected" in order to clone them | |
+ self.driver.create_snap(self.rbd_name, snapshot_name, protect=True) | |
+ location = {'url': 'rbd://%(fsid)s/%(pool)s/%(image)s/%(snap)s' % | |
+ dict(fsid=fsid, | |
+ pool=self.pool, | |
+ image=self.rbd_name, | |
+ snap=snapshot_name)} | |
+ try: | |
+ self.driver.clone(location, image_id, dest_pool=parent_pool) | |
+ # Flatten the image, which detaches it from the source snapshot | |
+ self.driver.flatten(image_id, pool=parent_pool) | |
+ finally: | |
+ # all done with the source snapshot, clean it up | |
+ self.cleanup_direct_snapshot(location) | |
+ | |
+ # Glance makes a protected snapshot called 'snap' on uploaded | |
+ # images and hands it out, so we'll do that too. The name of | |
+ # the snapshot doesn't really matter, this just uses what the | |
+ # glance-store rbd backend sets (which is not configurable). | |
+ self.driver.create_snap(image_id, 'snap', pool=parent_pool, | |
+ protect=True) | |
+ return ('rbd://%(fsid)s/%(pool)s/%(image)s/snap' % | |
+ dict(fsid=fsid, pool=parent_pool, image=image_id)) | |
+ | |
+ def cleanup_direct_snapshot(self, location, also_destroy_volume=False, | |
+ ignore_errors=False): | |
+ """Unprotects and destroys the name snapshot. | |
+ | |
+ With also_destroy_volume=True, it will also cleanup/destroy the parent | |
+ volume. This is useful for cleaning up when the target volume fails | |
+ to snapshot properly. | |
+ """ | |
+ if location: | |
+ _fsid, _pool, _im, _snap = self.driver.parse_url(location['url']) | |
+ self.driver.remove_snap(_im, _snap, pool=_pool, force=True, | |
+ ignore_errors=ignore_errors) | |
+ if also_destroy_volume: | |
+ self.driver.destroy_volume(_im, pool=_pool) | |
+ | |
class Raw(Image): | |
def __init__(self, instance=None, disk_name=None, path=None): | |
diff -r -u a/nova/virt/libvirt/storage/rbd_utils.py b/nova/virt/libvirt/storage/rbd_utils.py | |
--- a/nova/virt/libvirt/storage/rbd_utils.py 2017-05-01 10:01:41.000000000 -0700 | |
+++ b/nova/virt/libvirt/storage/rbd_utils.py 2017-04-28 17:01:53.000000000 -0700 | |
@@ -177,7 +177,7 @@ | |
raise exception.ImageUnacceptable(image_id=url, reason=reason) | |
return pieces | |
- def _get_fsid(self): | |
+ def get_fsid(self): | |
with RADOSClient(self) as client: | |
return client.cluster.get_fsid() | |
@@ -189,7 +189,7 @@ | |
LOG.debug('not cloneable: %s', e) | |
return False | |
- if self._get_fsid() != fsid: | |
+ if self.get_fsid() != fsid: | |
reason = '%s is in a different ceph cluster' % url | |
LOG.debug(reason) | |
return False | |
@@ -209,19 +209,25 @@ | |
dict(loc=url, err=e)) | |
return False | |
- def clone(self, image_location, dest_name): | |
+ def clone(self, image_location, dest_name, dest_pool=None): | |
_fsid, pool, image, snapshot = self.parse_url( | |
image_location['url']) | |
- LOG.debug('cloning %(pool)s/%(img)s@%(snap)s' % | |
- dict(pool=pool, img=image, snap=snapshot)) | |
+ LOG.debug('cloning %(pool)s/%(img)s@%(snap)s to ' | |
+ '%(dest_pool)s/%(dest_name)s', | |
+ dict(pool=pool, img=image, snap=snapshot, | |
+ dest_pool=dest_pool, dest_name=dest_name)) | |
with RADOSClient(self, str(pool)) as src_client: | |
- with RADOSClient(self) as dest_client: | |
- rbd.RBD().clone(src_client.ioctx, | |
- image.encode('utf-8'), | |
- snapshot.encode('utf-8'), | |
- dest_client.ioctx, | |
- dest_name, | |
- features=src_client.features) | |
+ with RADOSClient(self, dest_pool) as dest_client: | |
+ try: | |
+ rbd.RBD().clone(src_client.ioctx, | |
+ image.encode('utf-8'), | |
+ snapshot.encode('utf-8'), | |
+ dest_client.ioctx, | |
+ str(dest_name), | |
+ features=src_client.features) | |
+ except rbd.PermissionError: | |
+ raise exception.Forbidden(_('no write permission on ' | |
+ 'storage pool %s') % dest_pool) | |
def size(self, name): | |
with RBDVolumeProxy(self, name) as vol: | |
@@ -237,6 +243,31 @@ | |
with RBDVolumeProxy(self, name) as vol: | |
vol.resize(size) | |
+ def parent_info(self, volume, pool=None): | |
+ """Returns the pool, image and snapshot name for the parent of an | |
+ RBD volume. | |
+ | |
+ :volume: Name of RBD object | |
+ :pool: Name of pool | |
+ """ | |
+ try: | |
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol: | |
+ return vol.parent_info() | |
+ except rbd.ImageNotFound: | |
+ raise exception.ImageUnacceptable(_("no usable parent snapshot " | |
+ "for volume %s") % volume) | |
+ | |
+ def flatten(self, volume, pool=None): | |
+ """"Flattens" a snapshotted image with the parents' data, | |
+ effectively detaching it from the parent. | |
+ | |
+ :volume: Name of RBD object | |
+ :pool: Name of pool | |
+ """ | |
+ LOG.debug('flattening %(pool)s/%(vol)s', dict(pool=pool, vol=volume)) | |
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol: | |
+ tpool.execute(vol.flatten) | |
+ | |
def exists(self, name, pool=None, snapshot=None): | |
try: | |
with RBDVolumeProxy(self, name, | |
@@ -281,10 +312,12 @@ | |
args += self.ceph_args() | |
utils.execute('rbd', 'import', *args) | |
- def cleanup_volumes(self, instance): | |
+ def _destroy_volume(self, client, volume, pool=None): | |
+ """Destroy an RBD volume, retrying as needed. | |
+ """ | |
def _cleanup_vol(ioctx, volume, retryctx): | |
try: | |
- rbd.RBD().remove(client.ioctx, volume) | |
+ rbd.RBD().remove(ioctx, volume) | |
raise loopingcall.LoopingCallDone(retvalue=False) | |
except rbd.ImageHasSnapshots: | |
self.remove_snap(volume, libvirt_utils.RESIZE_SNAPSHOT_NAME, | |
@@ -297,6 +330,20 @@ | |
if retryctx['retries'] <= 0: | |
raise loopingcall.LoopingCallDone() | |
+ # NOTE(danms): We let it go for ten seconds | |
+ retryctx = {'retries': 10} | |
+ timer = loopingcall.FixedIntervalLoopingCall( | |
+ _cleanup_vol, client.ioctx, volume, retryctx) | |
+ timed_out = timer.start(interval=1).wait() | |
+ if timed_out: | |
+ # NOTE(danms): Run this again to propagate the error, but | |
+ # if it succeeds, don't raise the loopingcall exception | |
+ try: | |
+ _cleanup_vol(client.ioctx, volume, retryctx) | |
+ except loopingcall.LoopingCallDone: | |
+ pass | |
+ | |
+ def cleanup_volumes(self, instance): | |
with RADOSClient(self, self.pool) as client: | |
def belongs_to_instance(disk): | |
@@ -312,18 +359,7 @@ | |
volumes = rbd.RBD().list(client.ioctx) | |
for volume in filter(belongs_to_instance, volumes): | |
- # NOTE(danms): We let it go for ten seconds | |
- retryctx = {'retries': 10} | |
- timer = loopingcall.FixedIntervalLoopingCall( | |
- _cleanup_vol, client.ioctx, volume, retryctx) | |
- timed_out = timer.start(interval=1).wait() | |
- if timed_out: | |
- # NOTE(danms): Run this again to propagate the error, but | |
- # if it succeeds, don't raise the loopingcall exception | |
- try: | |
- _cleanup_vol(client.ioctx, volume, retryctx) | |
- except loopingcall.LoopingCallDone: | |
- pass | |
+ self._destroy_volume(client, volume) | |
def get_pool_info(self): | |
with RADOSClient(self) as client: | |
@@ -332,34 +368,49 @@ | |
'free': stats['kb_avail'] * units.Ki, | |
'used': stats['kb_used'] * units.Ki} | |
- def create_snap(self, volume, name): | |
- """Create a snapshot on an RBD object. | |
+ def create_snap(self, volume, name, pool=None, protect=False): | |
+ """Create a snapshot of an RBD volume. | |
:volume: Name of RBD object | |
:name: Name of snapshot | |
+ :pool: Name of pool | |
+ :protect: Set the snapshot to "protected" | |
""" | |
LOG.debug('creating snapshot(%(snap)s) on rbd image(%(img)s)', | |
{'snap': name, 'img': volume}) | |
- with RBDVolumeProxy(self, volume) as vol: | |
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol: | |
tpool.execute(vol.create_snap, name) | |
+ if protect and not vol.is_protected_snap(name): | |
+ tpool.execute(vol.protect_snap, name) | |
- def remove_snap(self, volume, name, ignore_errors=False): | |
- """Remove a snapshot from an RBD volume. | |
+ def remove_snap(self, volume, name, ignore_errors=False, pool=None, | |
+ force=False): | |
+ """Removes a snapshot from an RBD volume. | |
:volume: Name of RBD object | |
:name: Name of snapshot | |
:ignore_errors: whether or not to log warnings on failures | |
+ :pool: Name of pool | |
+ :force: Remove snapshot even if it is protected | |
""" | |
- with RBDVolumeProxy(self, volume) as vol: | |
+ with RBDVolumeProxy(self, str(volume), pool=pool) as vol: | |
if name in [snap.get('name', '') for snap in vol.list_snaps()]: | |
- LOG.debug('removing snapshot(%(snap)s) on rbd image(%(img)s)', | |
- {'snap': name, 'img': volume}) | |
+ if vol.is_protected_snap(name): | |
+ if force: | |
+ tpool.execute(vol.unprotect_snap, name) | |
+ elif not ignore_errors: | |
+ LOG.warning(_LW('snapshot(%(name)s) on rbd ' | |
+ 'image(%(img)s) is protected, ' | |
+ 'skipping'), | |
+ {'name': name, 'img': volume}) | |
+ return | |
+ LOG.debug('removing snapshot(%(name)s) on rbd image(%(img)s)', | |
+ {'name': name, 'img': volume}) | |
tpool.execute(vol.remove_snap, name) | |
- else: | |
- if not ignore_errors: | |
- LOG.warning(_LW('no snapshot(%(snap)s) found on ' | |
- 'image(%(img)s)'), {'snap': name, | |
- 'img': volume}) | |
+ elif not ignore_errors: | |
+ LOG.warning(_LW('no snapshot(%(name)s) found on rbd ' | |
+ 'image(%(img)s)'), | |
+ {'name': name, 'img': volume}) | |
def rollback_to_snap(self, volume, name): | |
"""Revert an RBD volume to its contents at a snapshot. | |
@@ -374,3 +425,9 @@ | |
tpool.execute(vol.rollback_to_snap, name) | |
else: | |
raise exception.SnapshotNotFound(snapshot_id=name) | |
+ | |
+ def destroy_volume(self, volume, pool=None): | |
+ """A one-shot version of cleanup_volumes() | |
+ """ | |
+ with RADOSClient(self, pool) as client: | |
+ self._destroy_volume(client, volume) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment