Merge "Make allocation candidates available for scheduler filters"

This commit is contained in:
Zuul 2022-12-17 17:12:31 +00:00 committed by Gerrit Code Review
commit b54beee429
4 changed files with 866 additions and 122 deletions

View File

@ -28,6 +28,9 @@ class BaseHostFilter(filters.BaseFilter):
# other parameters. We care about running policy filters (i.e.
# ImagePropertiesFilter) but not things that check usage on the
# existing compute node, etc.
# This also means that filters marked with RUN_ON_REBUILD = True cannot
# filter on allocation candidates or need to handle the rebuild case
# specially.
RUN_ON_REBUILD = False
def _filter_one(self, obj, spec):

View File

@ -153,6 +153,8 @@ class HostState(object):
self.updated = None
self.allocation_candidates = []
def update(self, compute=None, service=None, aggregates=None,
inst_dict=None):
"""Update all information about a host."""
@ -314,13 +316,21 @@ class HostState(object):
self.num_io_ops += 1
def __repr__(self):
return ("(%(host)s, %(node)s) ram: %(free_ram)sMB "
"disk: %(free_disk)sMB io_ops: %(num_io_ops)s "
"instances: %(num_instances)s" %
{'host': self.host, 'node': self.nodename,
'free_ram': self.free_ram_mb, 'free_disk': self.free_disk_mb,
'num_io_ops': self.num_io_ops,
'num_instances': self.num_instances})
return (
"(%(host)s, %(node)s) ram: %(free_ram)sMB "
"disk: %(free_disk)sMB io_ops: %(num_io_ops)s "
"instances: %(num_instances)s, "
"allocation_candidates: %(num_a_c)s"
% {
"host": self.host,
"node": self.nodename,
"free_ram": self.free_ram_mb,
"free_disk": self.free_disk_mb,
"num_io_ops": self.num_io_ops,
"num_instances": self.num_instances,
"num_a_c": len(self.allocation_candidates),
}
)
class HostManager(object):

View File

@ -20,6 +20,7 @@ Scheduler Service
"""
import collections
import copy
import random
from oslo_log import log as logging
@ -299,12 +300,29 @@ class SchedulerManager(manager.Manager):
# host, we virtually consume resources on it so subsequent
# selections can adjust accordingly.
def hosts_with_alloc_reqs(hosts_gen):
"""Extend the HostState objects returned by the generator with
the allocation requests of that host
"""
for host in hosts_gen:
host.allocation_candidates = copy.deepcopy(
alloc_reqs_by_rp_uuid[host.uuid])
yield host
# Note: remember, we are using a generator-iterator here. So only
# traverse this list once. This can bite you if the hosts
# are being scanned in a filter or weighing function.
hosts = self._get_all_host_states(
elevated, spec_obj, provider_summaries)
# alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot
# run filters that are using allocation candidates during rebuild
if alloc_reqs_by_rp_uuid is not None:
# wrap the generator to extend the HostState objects with the
# allocation requests for that given host. This is needed to
# support scheduler filters filtering on allocation candidates.
hosts = hosts_with_alloc_reqs(hosts)
# NOTE(sbauza): The RequestSpec.num_instances field contains the number
# of instances created when the RequestSpec was used to first boot some
# instances. This is incorrect when doing a move or resize operation,
@ -332,6 +350,13 @@ class SchedulerManager(manager.Manager):
# the older dict format representing HostState objects.
# TODO(stephenfin): Remove this when we bump scheduler the RPC API
# version to 5.0
# NOTE(gibi): We cannot remove this branch as it is actively used
# when nova calls the scheduler during rebuild (not evacuate) to
# check if the current host is still good for the new image used
# for the rebuild. In this case placement cannot be used to
# generate candidates as that would require space on the current
# compute for double allocation. So no allocation candidates for
# rebuild and therefore alloc_reqs_by_rp_uuid is None
return self._legacy_find_hosts(
context, num_instances, spec_obj, hosts, num_alts,
instance_uuids=instance_uuids)
@ -345,6 +370,9 @@ class SchedulerManager(manager.Manager):
# The list of hosts that have been selected (and claimed).
claimed_hosts = []
# The allocation request allocated on the given claimed host
claimed_alloc_reqs = []
for num, instance_uuid in enumerate(instance_uuids):
# In a multi-create request, the first request spec from the list
# is passed to the scheduler and that request spec's instance_uuid
@ -371,21 +399,20 @@ class SchedulerManager(manager.Manager):
# resource provider UUID
claimed_host = None
for host in hosts:
cn_uuid = host.uuid
if cn_uuid not in alloc_reqs_by_rp_uuid:
msg = ("A host state with uuid = '%s' that did not have a "
"matching allocation_request was encountered while "
"scheduling. This host was skipped.")
LOG.debug(msg, cn_uuid)
if not host.allocation_candidates:
LOG.debug(
"The nova scheduler removed every allocation candidate"
"for host %s so this host was skipped.",
host
)
continue
alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid]
# TODO(jaypipes): Loop through all allocation_requests instead
# of just trying the first one. For now, since we'll likely
# want to order the allocation_requests in the future based on
# information in the provider summaries, we'll just try to
# claim resources using the first allocation_request
alloc_req = alloc_reqs[0]
alloc_req = host.allocation_candidates[0]
if utils.claim_resources(
elevated, self.placement_client, spec_obj, instance_uuid,
alloc_req,
@ -405,6 +432,15 @@ class SchedulerManager(manager.Manager):
claimed_instance_uuids.append(instance_uuid)
claimed_hosts.append(claimed_host)
claimed_alloc_reqs.append(alloc_req)
# update the provider mapping in the request spec based
# on the allocated candidate as the _consume_selected_host depends
# on this information to temporally consume PCI devices tracked in
# placement
for request_group in spec_obj.requested_resources:
request_group.provider_uuids = alloc_req[
'mappings'][request_group.requester_id]
# Now consume the resources so the filter/weights will change for
# the next instance.
@ -416,11 +452,19 @@ class SchedulerManager(manager.Manager):
self._ensure_sufficient_hosts(
context, claimed_hosts, num_instances, claimed_instance_uuids)
# We have selected and claimed hosts for each instance. Now we need to
# find alternates for each host.
# We have selected and claimed hosts for each instance along with a
# claimed allocation request. Now we need to find alternates for each
# host.
return self._get_alternate_hosts(
claimed_hosts, spec_obj, hosts, num, num_alts,
alloc_reqs_by_rp_uuid, allocation_request_version)
claimed_hosts,
spec_obj,
hosts,
num,
num_alts,
alloc_reqs_by_rp_uuid,
allocation_request_version,
claimed_alloc_reqs,
)
def _ensure_sufficient_hosts(
self, context, hosts, required_count, claimed_uuids=None,
@ -532,7 +576,21 @@ class SchedulerManager(manager.Manager):
def _get_alternate_hosts(
self, selected_hosts, spec_obj, hosts, index, num_alts,
alloc_reqs_by_rp_uuid=None, allocation_request_version=None,
selected_alloc_reqs=None,
):
"""Generate the main Selection and possible alternate Selection
objects for each "instance".
:param selected_hosts: This is a list of HostState objects. Each
HostState represents the main selection for a given instance being
scheduled (we can have multiple instances during multi create).
:param selected_alloc_reqs: This is a list of allocation requests that
are already allocated in placement for the main Selection for each
instance. This list is matching with selected_hosts by index. So
for the first instance the selected host is selected_host[0] and
the already allocated placement candidate is
selected_alloc_reqs[0].
"""
# We only need to filter/weigh the hosts again if we're dealing with
# more than one instance and are going to be picking alternates.
if index > 0 and num_alts > 0:
@ -546,11 +604,10 @@ class SchedulerManager(manager.Manager):
# representing the selected host along with alternates from the same
# cell.
selections_to_return = []
for selected_host in selected_hosts:
for i, selected_host in enumerate(selected_hosts):
# This is the list of hosts for one particular instance.
if alloc_reqs_by_rp_uuid:
selected_alloc_req = alloc_reqs_by_rp_uuid.get(
selected_host.uuid)[0]
selected_alloc_req = selected_alloc_reqs[i]
else:
selected_alloc_req = None
@ -571,15 +628,17 @@ class SchedulerManager(manager.Manager):
if len(selected_plus_alts) >= num_alts + 1:
break
# TODO(gibi): In theory we could generate alternatives on the
# same host if that host has different possible allocation
# candidates for the request. But we don't do that today
if host.cell_uuid == cell_uuid and host not in selected_hosts:
if alloc_reqs_by_rp_uuid is not None:
alt_uuid = host.uuid
if alt_uuid not in alloc_reqs_by_rp_uuid:
if not host.allocation_candidates:
msg = ("A host state with uuid = '%s' that did "
"not have a matching allocation_request "
"not have any remaining allocation_request "
"was encountered while scheduling. This "
"host was skipped.")
LOG.debug(msg, alt_uuid)
LOG.debug(msg, host.uuid)
continue
# TODO(jaypipes): Loop through all allocation_requests
@ -588,7 +647,13 @@ class SchedulerManager(manager.Manager):
# the future based on information in the provider
# summaries, we'll just try to claim resources using
# the first allocation_request
alloc_req = alloc_reqs_by_rp_uuid[alt_uuid][0]
# NOTE(gibi): we are using, and re-using, allocation
# candidates for alternatives here. This is OK as
# these candidates are not yet allocated in placement
# and we don't know if an alternate will ever be used.
# To increase our success we could try to use different
# candidate for different alternative though.
alloc_req = host.allocation_candidates[0]
alt_selection = objects.Selection.from_host_state(
host, alloc_req, allocation_request_version)
else:

File diff suppressed because it is too large Load Diff