Last active
June 27, 2020 13:24
-
-
Save JCallicoat/43505cab0535057ca4fb to your computer and use it in GitHub Desktop.
Overview and code dive showing how disk_available_least is calculated
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
How disk_available_least is calculated | |
====================================== | |
Summary: | |
The nova-scheduler uses disk_available_least on the host to determine if there is enough space to boot / migrate / resize and instance of a given flavor. | |
This is the calculation scheduler uses: | |
free_gb = total size of filesystem (minus) physical size on filesystem of all instance disks (minus) anything other than instace disks on that filesystem (e.g., /var/lib/nova/instance/_base/*) | |
overcommitted_total_gb = virtual size of all instance disks (i.e., how big their flavor specifies) (minus) physical size on filesystem of all instance disks | |
disk_available_least = free_gb - overcommitted_total_gb | |
Example: | |
5000G host filesystem | |
400G actually used by instances | |
4000G virtual size of instances | |
free_gb = 5000 - 400 = 4600 (the 400 here doesn't account for _base images, snapshots, etc) | |
overcommited_total_gb = 4000 - 400 = 3600 | |
disk_available_least = 4600 - 3600 = 1000 | |
So scheduler sees only 1000G free, even though 4600G is actually free | |
If _base images take up 1200G on disk in addition to the 400G from instances, | |
then the calculation looks like: | |
free_gb = 5000 - 1600 = 3400 | |
overcommited_total_gb = 4000 - 400 = 3600 | |
disk_available_least = 3400 - 3600 = -200 | |
-------- | |
Actual code path in nova/virt/libvirt/driver.py: | |
update_status calls locally defined _get_disk_available_least | |
_get_disk_available_least sets disk_free_gb = disk_info_dict['free'] | |
which uses the value returned from disk_info_dict = self.driver._get_local_gb_info() | |
_get_local_gb_info calls libvirt_utils.get_fs_info | |
which uses os.statvfs on the filesystem where /var/lib/nova/instances directory lives | |
and returns the total size of the fs, the free space and the used space | |
_get_disk_available_least then sets disk_over_committed = (self.driver. | |
_get_disk_over_committed_size_total()) | |
_get_disk_over_committed_size_total calls _get_instance_disk_info() which walks all the | |
disks associated with a domain, and for each one it adds to a dictionary that has | |
'over_committed_disk_size' set to the virtual size of the disk, minus the actual space | |
it is using | |
_get_disk_over_committed_size_total then loops over each dictionary and sets | |
disk_over_committed_size += int( | |
info['over_committed_disk_size']) | |
then returns the total in disk_over_committed_size | |
finally _get_disk_available_least calculates the disk_available_least metric | |
available_least = disk_free_gb * units.Gi - disk_over_committed | |
return (available_least / units.Gi) | |
""" | |
# nova/virt/libvirt/utils.py | |
def get_fs_info(path): | |
"""Get free/used/total space info for a filesystem | |
:param path: Any dirent on the filesystem | |
:returns: A dict containing: | |
:free: How much space is free (in bytes) | |
:used: How much space is used (in bytes) | |
:total: How big the filesystem is (in bytes) | |
""" | |
hddinfo = os.statvfs(path) | |
total = hddinfo.f_frsize * hddinfo.f_blocks | |
free = hddinfo.f_frsize * hddinfo.f_bavail | |
used = hddinfo.f_frsize * (hddinfo.f_blocks - hddinfo.f_bfree) | |
return {'total': total, | |
'free': free, | |
'used': used} | |
# nova/virt/libvirt/driver.py | |
def _get_instance_disk_info(self, instance_name, xml, | |
block_device_info=None): | |
block_device_mapping = driver.block_device_info_get_mapping( | |
block_device_info) | |
volume_devices = set() | |
for vol in block_device_mapping: | |
disk_dev = vol['mount_device'].rpartition("/")[2] | |
volume_devices.add(disk_dev) | |
disk_info = [] | |
doc = etree.fromstring(xml) | |
disk_nodes = doc.findall('.//devices/disk') | |
path_nodes = doc.findall('.//devices/disk/source') | |
driver_nodes = doc.findall('.//devices/disk/driver') | |
target_nodes = doc.findall('.//devices/disk/target') | |
for cnt, path_node in enumerate(path_nodes): | |
disk_type = disk_nodes[cnt].get('type') | |
path = path_node.get('file') or path_node.get('dev') | |
target = target_nodes[cnt].attrib['dev'] | |
if not path: | |
LOG.debug('skipping disk for %s as it does not have a path', | |
instance_name) | |
continue | |
if disk_type not in ['file', 'block']: | |
LOG.debug('skipping disk because it looks like a volume', path) | |
continue | |
if target in volume_devices: | |
LOG.debug('skipping disk %(path)s (%(target)s) as it is a ' | |
'volume', {'path': path, 'target': target}) | |
continue | |
# get the real disk size or | |
# raise a localized error if image is unavailable | |
if disk_type == 'file': | |
dk_size = int(os.path.getsize(path)) | |
elif disk_type == 'block': | |
dk_size = lvm.get_volume_size(path) | |
disk_type = driver_nodes[cnt].get('type') | |
if disk_type == "qcow2": | |
backing_file = libvirt_utils.get_disk_backing_file(path) | |
virt_size = disk.get_disk_size(path) | |
over_commit_size = int(virt_size) - dk_size | |
else: | |
backing_file = "" | |
virt_size = dk_size | |
over_commit_size = 0 | |
disk_info.append({'type': disk_type, | |
'path': path, | |
'virt_disk_size': virt_size, | |
'backing_file': backing_file, | |
'disk_size': dk_size, | |
'over_committed_disk_size': over_commit_size}) | |
return jsonutils.dumps(disk_info) | |
def get_instance_disk_info(self, instance_name, | |
block_device_info=None): | |
try: | |
dom = self._lookup_by_name(instance_name) | |
xml = dom.XMLDesc(0) | |
except libvirt.libvirtError as ex: | |
error_code = ex.get_error_code() | |
msg = (_('Error from libvirt while getting description of ' | |
'%(instance_name)s: [Error Code %(error_code)s] ' | |
'%(ex)s') % | |
{'instance_name': instance_name, | |
'error_code': error_code, | |
'ex': ex}) | |
LOG.warn(msg) | |
raise exception.InstanceNotFound(instance_id=instance_name) | |
return self._get_instance_disk_info(instance_name, xml, | |
block_device_info) | |
def _get_disk_over_committed_size_total(self): | |
"""Return total over committed disk size for all instances.""" | |
# Disk size that all instance uses : virtual_size - disk_size | |
disk_over_committed_size = 0 | |
for dom in self._list_instance_domains(): | |
try: | |
xml = dom.XMLDesc(0) | |
disk_infos = jsonutils.loads( | |
self._get_instance_disk_info(dom.name(), xml)) | |
for info in disk_infos: | |
disk_over_committed_size += int( | |
info['over_committed_disk_size']) | |
except libvirt.libvirtError as ex: | |
error_code = ex.get_error_code() | |
LOG.warn(_LW( | |
'Error from libvirt while getting description of ' | |
'%(instance_name)s: [Error Code %(error_code)s] %(ex)s' | |
) % {'instance_name': dom.name(), | |
'error_code': error_code, | |
'ex': ex}) | |
except OSError as e: | |
if e.errno == errno.ENOENT: | |
LOG.warn(_LW('Periodic task is updating the host stat, ' | |
'it is trying to get disk %(i_name)s, ' | |
'but disk file was removed by concurrent ' | |
'operations such as resize.'), | |
{'i_name': dom.name()}) | |
elif e.errno == errno.EACCES: | |
LOG.warn(_LW('Periodic task is updating the host stat, ' | |
'it is trying to get disk %(i_name)s, ' | |
'but access is denied. It is most likely ' | |
'due to a VM that exists on the compute ' | |
'node but is not managed by Nova.'), | |
{'i_name': dom.name()}) | |
else: | |
raise | |
except exception.VolumeBDMPathNotFound as e: | |
LOG.warn(_LW('Periodic task is updating the host stats, ' | |
'it is trying to get disk info for %(i_name)s, ' | |
'but the backing volume block device was removed ' | |
'by concurrent operations such as resize. ' | |
'Error: %(error)s'), | |
{'i_name': dom.name(), | |
'error': e}) | |
# NOTE(gtt116): give other tasks a chance. | |
greenthread.sleep(0) | |
return disk_over_committed_size | |
@staticmethod | |
def _get_local_gb_info(): | |
"""Get local storage info of the compute node in GB. | |
:returns: A dict containing: | |
:total: How big the overall usable filesystem is (in gigabytes) | |
:free: How much space is free (in gigabytes) | |
:used: How much space is used (in gigabytes) | |
""" | |
if CONF.libvirt.images_type == 'lvm': | |
info = lvm.get_volume_group_info( | |
CONF.libvirt.images_volume_group) | |
elif CONF.libvirt.images_type == 'rbd': | |
info = LibvirtDriver._get_rbd_driver().get_pool_info() | |
else: | |
info = libvirt_utils.get_fs_info(CONF.instances_path) | |
for (k, v) in info.iteritems(): | |
info[k] = v / units.Gi | |
return info | |
def update_status(self): | |
"""Retrieve status info from libvirt.""" | |
def _get_disk_available_least(): | |
"""Return total real disk available least size. | |
The size of available disk, when block_migration command given | |
disk_over_commit param is FALSE. | |
The size that deducted real instance disk size from the total size | |
of the virtual disk of all instances. | |
""" | |
disk_free_gb = disk_info_dict['free'] | |
disk_over_committed = (self.driver. | |
_get_disk_over_committed_size_total()) | |
# Disk available least size | |
available_least = disk_free_gb * units.Gi - disk_over_committed | |
return (available_least / units.Gi) | |
LOG.debug("Updating host stats") | |
disk_info_dict = self.driver._get_local_gb_info() | |
data = {} | |
# NOTE(dprince): calling capabilities before getVersion works around | |
# an initialization issue with some versions of Libvirt (1.0.5.5). | |
# See: https://bugzilla.redhat.com/show_bug.cgi?id=1000116 | |
# See: https://bugs.launchpad.net/nova/+bug/1215593 | |
data["supported_instances"] = \ | |
self.driver._get_instance_capabilities() | |
data["vcpus"] = self.driver._get_vcpu_total() | |
data["memory_mb"] = self.driver._get_memory_mb_total() | |
data["local_gb"] = disk_info_dict['total'] | |
data["vcpus_used"] = self.driver._get_vcpu_used() | |
data["memory_mb_used"] = self.driver._get_memory_mb_used() | |
data["local_gb_used"] = disk_info_dict['used'] | |
data["hypervisor_type"] = self.driver._get_hypervisor_type() | |
data["hypervisor_version"] = self.driver._get_hypervisor_version() | |
data["hypervisor_hostname"] = self.driver._get_hypervisor_hostname() | |
data["cpu_info"] = self.driver._get_cpu_info() | |
data['disk_available_least'] = _get_disk_available_least() | |
data['pci_passthrough_devices'] = \ | |
self.driver._get_pci_passthrough_devices() | |
numa_topology = self.driver._get_host_numa_topology() | |
if numa_topology: | |
data['numa_topology'] = numa_topology.to_json() | |
else: | |
data['numa_topology'] = None | |
self._stats = data | |
return data | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment