Merge "Make allocation candidates available for scheduler filters"
This commit is contained in:
commit
b54beee429
@ -28,6 +28,9 @@ class BaseHostFilter(filters.BaseFilter):
|
||||
# other parameters. We care about running policy filters (i.e.
|
||||
# ImagePropertiesFilter) but not things that check usage on the
|
||||
# existing compute node, etc.
|
||||
# This also means that filters marked with RUN_ON_REBUILD = True cannot
|
||||
# filter on allocation candidates or need to handle the rebuild case
|
||||
# specially.
|
||||
RUN_ON_REBUILD = False
|
||||
|
||||
def _filter_one(self, obj, spec):
|
||||
|
@ -153,6 +153,8 @@ class HostState(object):
|
||||
|
||||
self.updated = None
|
||||
|
||||
self.allocation_candidates = []
|
||||
|
||||
def update(self, compute=None, service=None, aggregates=None,
|
||||
inst_dict=None):
|
||||
"""Update all information about a host."""
|
||||
@ -314,13 +316,21 @@ class HostState(object):
|
||||
self.num_io_ops += 1
|
||||
|
||||
def __repr__(self):
|
||||
return ("(%(host)s, %(node)s) ram: %(free_ram)sMB "
|
||||
"disk: %(free_disk)sMB io_ops: %(num_io_ops)s "
|
||||
"instances: %(num_instances)s" %
|
||||
{'host': self.host, 'node': self.nodename,
|
||||
'free_ram': self.free_ram_mb, 'free_disk': self.free_disk_mb,
|
||||
'num_io_ops': self.num_io_ops,
|
||||
'num_instances': self.num_instances})
|
||||
return (
|
||||
"(%(host)s, %(node)s) ram: %(free_ram)sMB "
|
||||
"disk: %(free_disk)sMB io_ops: %(num_io_ops)s "
|
||||
"instances: %(num_instances)s, "
|
||||
"allocation_candidates: %(num_a_c)s"
|
||||
% {
|
||||
"host": self.host,
|
||||
"node": self.nodename,
|
||||
"free_ram": self.free_ram_mb,
|
||||
"free_disk": self.free_disk_mb,
|
||||
"num_io_ops": self.num_io_ops,
|
||||
"num_instances": self.num_instances,
|
||||
"num_a_c": len(self.allocation_candidates),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class HostManager(object):
|
||||
|
@ -20,6 +20,7 @@ Scheduler Service
|
||||
"""
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import random
|
||||
|
||||
from oslo_log import log as logging
|
||||
@ -299,12 +300,29 @@ class SchedulerManager(manager.Manager):
|
||||
# host, we virtually consume resources on it so subsequent
|
||||
# selections can adjust accordingly.
|
||||
|
||||
def hosts_with_alloc_reqs(hosts_gen):
|
||||
"""Extend the HostState objects returned by the generator with
|
||||
the allocation requests of that host
|
||||
"""
|
||||
for host in hosts_gen:
|
||||
host.allocation_candidates = copy.deepcopy(
|
||||
alloc_reqs_by_rp_uuid[host.uuid])
|
||||
yield host
|
||||
|
||||
# Note: remember, we are using a generator-iterator here. So only
|
||||
# traverse this list once. This can bite you if the hosts
|
||||
# are being scanned in a filter or weighing function.
|
||||
hosts = self._get_all_host_states(
|
||||
elevated, spec_obj, provider_summaries)
|
||||
|
||||
# alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot
|
||||
# run filters that are using allocation candidates during rebuild
|
||||
if alloc_reqs_by_rp_uuid is not None:
|
||||
# wrap the generator to extend the HostState objects with the
|
||||
# allocation requests for that given host. This is needed to
|
||||
# support scheduler filters filtering on allocation candidates.
|
||||
hosts = hosts_with_alloc_reqs(hosts)
|
||||
|
||||
# NOTE(sbauza): The RequestSpec.num_instances field contains the number
|
||||
# of instances created when the RequestSpec was used to first boot some
|
||||
# instances. This is incorrect when doing a move or resize operation,
|
||||
@ -332,6 +350,13 @@ class SchedulerManager(manager.Manager):
|
||||
# the older dict format representing HostState objects.
|
||||
# TODO(stephenfin): Remove this when we bump scheduler the RPC API
|
||||
# version to 5.0
|
||||
# NOTE(gibi): We cannot remove this branch as it is actively used
|
||||
# when nova calls the scheduler during rebuild (not evacuate) to
|
||||
# check if the current host is still good for the new image used
|
||||
# for the rebuild. In this case placement cannot be used to
|
||||
# generate candidates as that would require space on the current
|
||||
# compute for double allocation. So no allocation candidates for
|
||||
# rebuild and therefore alloc_reqs_by_rp_uuid is None
|
||||
return self._legacy_find_hosts(
|
||||
context, num_instances, spec_obj, hosts, num_alts,
|
||||
instance_uuids=instance_uuids)
|
||||
@ -345,6 +370,9 @@ class SchedulerManager(manager.Manager):
|
||||
# The list of hosts that have been selected (and claimed).
|
||||
claimed_hosts = []
|
||||
|
||||
# The allocation request allocated on the given claimed host
|
||||
claimed_alloc_reqs = []
|
||||
|
||||
for num, instance_uuid in enumerate(instance_uuids):
|
||||
# In a multi-create request, the first request spec from the list
|
||||
# is passed to the scheduler and that request spec's instance_uuid
|
||||
@ -371,21 +399,20 @@ class SchedulerManager(manager.Manager):
|
||||
# resource provider UUID
|
||||
claimed_host = None
|
||||
for host in hosts:
|
||||
cn_uuid = host.uuid
|
||||
if cn_uuid not in alloc_reqs_by_rp_uuid:
|
||||
msg = ("A host state with uuid = '%s' that did not have a "
|
||||
"matching allocation_request was encountered while "
|
||||
"scheduling. This host was skipped.")
|
||||
LOG.debug(msg, cn_uuid)
|
||||
if not host.allocation_candidates:
|
||||
LOG.debug(
|
||||
"The nova scheduler removed every allocation candidate"
|
||||
"for host %s so this host was skipped.",
|
||||
host
|
||||
)
|
||||
continue
|
||||
|
||||
alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid]
|
||||
# TODO(jaypipes): Loop through all allocation_requests instead
|
||||
# of just trying the first one. For now, since we'll likely
|
||||
# want to order the allocation_requests in the future based on
|
||||
# information in the provider summaries, we'll just try to
|
||||
# claim resources using the first allocation_request
|
||||
alloc_req = alloc_reqs[0]
|
||||
alloc_req = host.allocation_candidates[0]
|
||||
if utils.claim_resources(
|
||||
elevated, self.placement_client, spec_obj, instance_uuid,
|
||||
alloc_req,
|
||||
@ -405,6 +432,15 @@ class SchedulerManager(manager.Manager):
|
||||
|
||||
claimed_instance_uuids.append(instance_uuid)
|
||||
claimed_hosts.append(claimed_host)
|
||||
claimed_alloc_reqs.append(alloc_req)
|
||||
|
||||
# update the provider mapping in the request spec based
|
||||
# on the allocated candidate as the _consume_selected_host depends
|
||||
# on this information to temporally consume PCI devices tracked in
|
||||
# placement
|
||||
for request_group in spec_obj.requested_resources:
|
||||
request_group.provider_uuids = alloc_req[
|
||||
'mappings'][request_group.requester_id]
|
||||
|
||||
# Now consume the resources so the filter/weights will change for
|
||||
# the next instance.
|
||||
@ -416,11 +452,19 @@ class SchedulerManager(manager.Manager):
|
||||
self._ensure_sufficient_hosts(
|
||||
context, claimed_hosts, num_instances, claimed_instance_uuids)
|
||||
|
||||
# We have selected and claimed hosts for each instance. Now we need to
|
||||
# find alternates for each host.
|
||||
# We have selected and claimed hosts for each instance along with a
|
||||
# claimed allocation request. Now we need to find alternates for each
|
||||
# host.
|
||||
return self._get_alternate_hosts(
|
||||
claimed_hosts, spec_obj, hosts, num, num_alts,
|
||||
alloc_reqs_by_rp_uuid, allocation_request_version)
|
||||
claimed_hosts,
|
||||
spec_obj,
|
||||
hosts,
|
||||
num,
|
||||
num_alts,
|
||||
alloc_reqs_by_rp_uuid,
|
||||
allocation_request_version,
|
||||
claimed_alloc_reqs,
|
||||
)
|
||||
|
||||
def _ensure_sufficient_hosts(
|
||||
self, context, hosts, required_count, claimed_uuids=None,
|
||||
@ -532,7 +576,21 @@ class SchedulerManager(manager.Manager):
|
||||
def _get_alternate_hosts(
|
||||
self, selected_hosts, spec_obj, hosts, index, num_alts,
|
||||
alloc_reqs_by_rp_uuid=None, allocation_request_version=None,
|
||||
selected_alloc_reqs=None,
|
||||
):
|
||||
"""Generate the main Selection and possible alternate Selection
|
||||
objects for each "instance".
|
||||
|
||||
:param selected_hosts: This is a list of HostState objects. Each
|
||||
HostState represents the main selection for a given instance being
|
||||
scheduled (we can have multiple instances during multi create).
|
||||
:param selected_alloc_reqs: This is a list of allocation requests that
|
||||
are already allocated in placement for the main Selection for each
|
||||
instance. This list is matching with selected_hosts by index. So
|
||||
for the first instance the selected host is selected_host[0] and
|
||||
the already allocated placement candidate is
|
||||
selected_alloc_reqs[0].
|
||||
"""
|
||||
# We only need to filter/weigh the hosts again if we're dealing with
|
||||
# more than one instance and are going to be picking alternates.
|
||||
if index > 0 and num_alts > 0:
|
||||
@ -546,11 +604,10 @@ class SchedulerManager(manager.Manager):
|
||||
# representing the selected host along with alternates from the same
|
||||
# cell.
|
||||
selections_to_return = []
|
||||
for selected_host in selected_hosts:
|
||||
for i, selected_host in enumerate(selected_hosts):
|
||||
# This is the list of hosts for one particular instance.
|
||||
if alloc_reqs_by_rp_uuid:
|
||||
selected_alloc_req = alloc_reqs_by_rp_uuid.get(
|
||||
selected_host.uuid)[0]
|
||||
selected_alloc_req = selected_alloc_reqs[i]
|
||||
else:
|
||||
selected_alloc_req = None
|
||||
|
||||
@ -571,15 +628,17 @@ class SchedulerManager(manager.Manager):
|
||||
if len(selected_plus_alts) >= num_alts + 1:
|
||||
break
|
||||
|
||||
# TODO(gibi): In theory we could generate alternatives on the
|
||||
# same host if that host has different possible allocation
|
||||
# candidates for the request. But we don't do that today
|
||||
if host.cell_uuid == cell_uuid and host not in selected_hosts:
|
||||
if alloc_reqs_by_rp_uuid is not None:
|
||||
alt_uuid = host.uuid
|
||||
if alt_uuid not in alloc_reqs_by_rp_uuid:
|
||||
if not host.allocation_candidates:
|
||||
msg = ("A host state with uuid = '%s' that did "
|
||||
"not have a matching allocation_request "
|
||||
"not have any remaining allocation_request "
|
||||
"was encountered while scheduling. This "
|
||||
"host was skipped.")
|
||||
LOG.debug(msg, alt_uuid)
|
||||
LOG.debug(msg, host.uuid)
|
||||
continue
|
||||
|
||||
# TODO(jaypipes): Loop through all allocation_requests
|
||||
@ -588,7 +647,13 @@ class SchedulerManager(manager.Manager):
|
||||
# the future based on information in the provider
|
||||
# summaries, we'll just try to claim resources using
|
||||
# the first allocation_request
|
||||
alloc_req = alloc_reqs_by_rp_uuid[alt_uuid][0]
|
||||
# NOTE(gibi): we are using, and re-using, allocation
|
||||
# candidates for alternatives here. This is OK as
|
||||
# these candidates are not yet allocated in placement
|
||||
# and we don't know if an alternate will ever be used.
|
||||
# To increase our success we could try to use different
|
||||
# candidate for different alternative though.
|
||||
alloc_req = host.allocation_candidates[0]
|
||||
alt_selection = objects.Selection.from_host_state(
|
||||
host, alloc_req, allocation_request_version)
|
||||
else:
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user