Merge "Fix CPU pinning for odd number of CPUs w hyperthreading"
This commit is contained in:
commit
1def6efe82
@ -12,6 +12,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import collections
|
||||
import uuid
|
||||
|
||||
import mock
|
||||
@ -1896,6 +1897,25 @@ class _CPUPinningTestCaseBase(object):
|
||||
self.assertEqual(len(instance_cell.cpuset),
|
||||
len(instance_cell.cpu_pinning))
|
||||
|
||||
def assertPinningPreferThreads(self, instance_cell, host_cell):
|
||||
"""Make sure we are preferring threads.
|
||||
|
||||
We do this by assessing that at least 2 CPUs went to the same core
|
||||
if that was even possible to begin with.
|
||||
"""
|
||||
max_free_siblings = max(map(len, host_cell.free_siblings))
|
||||
if len(instance_cell) > 1 and max_free_siblings > 1:
|
||||
cpu_to_sib = {}
|
||||
for sib in host_cell.free_siblings:
|
||||
for cpu in sib:
|
||||
cpu_to_sib[cpu] = tuple(sorted(sib))
|
||||
pins_per_sib = collections.defaultdict(int)
|
||||
for inst_p, host_p in instance_cell.cpu_pinning.items():
|
||||
pins_per_sib[cpu_to_sib[host_p]] += 1
|
||||
self.assertTrue(max(pins_per_sib.values()) > 1,
|
||||
"Seems threads were not prefered by the pinning "
|
||||
"logic.")
|
||||
|
||||
|
||||
class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
|
||||
def test_get_pinning_inst_too_large_cpu(self):
|
||||
@ -2031,7 +2051,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
|
||||
self.assertInstanceCellPinned(inst_pin)
|
||||
got_topo = objects.VirtCPUTopology(sockets=1, cores=1, threads=4)
|
||||
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
|
||||
got_pinning = {x: x + 4 for x in range(0, 4)}
|
||||
got_pinning = {x: x for x in range(0, 4)}
|
||||
self.assertEqual(got_pinning, inst_pin.cpu_pinning)
|
||||
|
||||
def test_get_pinning_host_siblings_fit(self):
|
||||
@ -2048,6 +2068,76 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
|
||||
got_pinning = {x: x for x in range(0, 4)}
|
||||
self.assertEqual(got_pinning, inst_pin.cpu_pinning)
|
||||
|
||||
def test_get_pinning_host_siblings_instance_odd_fit(self):
|
||||
host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
memory=4096, memory_usage=0,
|
||||
siblings=[set([0, 1]), set([2, 3]),
|
||||
set([4, 5]), set([6, 7])],
|
||||
mempages=[], pinned_cpus=set([]))
|
||||
inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4]),
|
||||
memory=2048)
|
||||
inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
|
||||
self.assertInstanceCellPinned(inst_pin)
|
||||
got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
|
||||
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
|
||||
|
||||
def test_get_pinning_host_siblings_instance_fit_optimize_threads(self):
|
||||
host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
memory=4096, memory_usage=0,
|
||||
siblings=[set([0, 1, 2, 3]),
|
||||
set([4, 5, 6, 7])],
|
||||
mempages=[], pinned_cpus=set([]))
|
||||
inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4, 5]),
|
||||
memory=2048)
|
||||
inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
|
||||
self.assertInstanceCellPinned(inst_pin)
|
||||
got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=2)
|
||||
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
|
||||
|
||||
def test_get_pinning_host_siblings_instance_odd_fit_w_usage(self):
|
||||
host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
memory=4096, memory_usage=0,
|
||||
siblings=[set([0, 1]), set([2, 3]),
|
||||
set([4, 5]), set([6, 7])],
|
||||
mempages=[], pinned_cpus=set([0, 2, 5]))
|
||||
inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2]),
|
||||
memory=2048)
|
||||
inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
|
||||
self.assertInstanceCellPinned(inst_pin)
|
||||
got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=1)
|
||||
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
|
||||
|
||||
def test_get_pinning_host_siblings_instance_odd_fit_orphan_only(self):
|
||||
host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
memory=4096, memory_usage=0,
|
||||
siblings=[set([0, 1]), set([2, 3]),
|
||||
set([4, 5]), set([6, 7])],
|
||||
mempages=[], pinned_cpus=set([0, 2, 5, 6]))
|
||||
inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3]),
|
||||
memory=2048)
|
||||
inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
|
||||
self.assertInstanceCellPinned(inst_pin)
|
||||
got_topo = objects.VirtCPUTopology(sockets=1, cores=4, threads=1)
|
||||
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
|
||||
|
||||
def test_get_pinning_host_siblings_large_instance_odd_fit(self):
|
||||
host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14,
|
||||
15]),
|
||||
memory=4096, memory_usage=0,
|
||||
siblings=[set([0, 8]), set([1, 9]),
|
||||
set([2, 10]), set([3, 11]),
|
||||
set([4, 12]), set([5, 13]),
|
||||
set([6, 14]), set([7, 15])],
|
||||
mempages=[], pinned_cpus=set([]))
|
||||
inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4]),
|
||||
memory=2048)
|
||||
inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
|
||||
self.assertInstanceCellPinned(inst_pin)
|
||||
self.assertPinningPreferThreads(inst_pin, host_pin)
|
||||
got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
|
||||
self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
|
||||
|
||||
|
||||
class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
|
||||
def test_host_numa_fit_instance_to_host_single_cell(self):
|
||||
|
@ -13,6 +13,7 @@
|
||||
# under the License.
|
||||
|
||||
import collections
|
||||
import fractions
|
||||
import itertools
|
||||
|
||||
from oslo_config import cfg
|
||||
@ -674,6 +675,16 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
|
||||
This method will calculate the pinning for the given instance and it's
|
||||
topology, making sure that hyperthreads of the instance match up with
|
||||
those of the host when the pinning takes effect.
|
||||
|
||||
Currently the strategy for packing is to prefer siblings and try use
|
||||
cores evenly, by using emptier cores first. This is achieved by the way we
|
||||
order cores in the can_pack structure, and the order in which we iterate
|
||||
through it.
|
||||
|
||||
The main packing loop that iterates over the can_pack dictionary will not
|
||||
currently try to look for a fit that maximizes number of siblings, but will
|
||||
simply rely on the iteration ordering and picking the first viable
|
||||
placement.
|
||||
"""
|
||||
|
||||
# We build up a data structure 'can_pack' that answers the question:
|
||||
@ -689,8 +700,44 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
|
||||
|
||||
if threads_per_core * len(cores_list) < len(instance_cell):
|
||||
return False
|
||||
else:
|
||||
return len(instance_cell) % threads_per_core == 0
|
||||
return True
|
||||
|
||||
def _orphans(instance_cell, threads_per_core):
|
||||
"""Number of instance CPUs which will not fill up a host core.
|
||||
|
||||
Best explained by an example: consider set of free host cores as such:
|
||||
[(0, 1), (3, 5), (6, 7, 8)]
|
||||
This would be a case of 2 threads_per_core AKA an entry for 2 in the
|
||||
can_pack structure.
|
||||
|
||||
If we attempt to pack a 5 core instance on it - due to the fact that we
|
||||
iterate the list in order, we will end up with a single core of the
|
||||
instance pinned to a thread "alone" (with id 6), and we would have one
|
||||
'orphan' vcpu.
|
||||
"""
|
||||
return len(instance_cell) % threads_per_core
|
||||
|
||||
def _threads(instance_cell, threads_per_core):
|
||||
"""Threads to expose to the instance via the VirtCPUTopology.
|
||||
|
||||
This is calculated by taking the GCD of the number of threads we are
|
||||
considering at the moment, and the number of orphans. An example for
|
||||
instance_cell = 6
|
||||
threads_per_core = 4
|
||||
|
||||
So we can fit the instance as such:
|
||||
[(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, 10, 11)]
|
||||
x x x x x x
|
||||
|
||||
We can't expose 4 threads, as that will not be a valid topology (all
|
||||
cores exposed to the guest have to have an equal number of threads),
|
||||
and 1 would be too restrictive, but we want all threads that guest sees
|
||||
to be on the same physical core, so we take GCD of 4 (max number of
|
||||
threads) and 2 (number of 'orphan' CPUs) and get 2 as the number of
|
||||
threads.
|
||||
"""
|
||||
return fractions.gcd(threads_per_core, _orphans(instance_cell,
|
||||
threads_per_core))
|
||||
|
||||
# We iterate over the can_pack dict in descending order of cores that
|
||||
# can be packed - an attempt to get even distribution over time
|
||||
@ -702,9 +749,11 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
|
||||
pinning = zip(sorted(instance_cell.cpuset),
|
||||
itertools.chain(*sliced_sibs))
|
||||
|
||||
threads = _threads(instance_cell, cores_per_sib)
|
||||
cores = len(instance_cell) / threads
|
||||
topology = objects.VirtCPUTopology(sockets=1,
|
||||
cores=len(sliced_sibs),
|
||||
threads=cores_per_sib)
|
||||
cores=cores,
|
||||
threads=threads)
|
||||
instance_cell.pin_vcpus(*pinning)
|
||||
instance_cell.cpu_topology = topology
|
||||
instance_cell.id = host_cell_id
|
||||
@ -729,17 +778,9 @@ def _numa_fit_instance_cell_with_pinning(host_cell, instance_cell):
|
||||
return
|
||||
|
||||
if host_cell.siblings:
|
||||
# Try to pack the instance cell in one core
|
||||
largest_free_sibling_set = sorted(
|
||||
host_cell.free_siblings, key=len)[-1]
|
||||
if len(instance_cell.cpuset) <= len(largest_free_sibling_set):
|
||||
return _pack_instance_onto_cores(
|
||||
[largest_free_sibling_set], instance_cell, host_cell.id)
|
||||
|
||||
# We can't to pack it onto one core so try with avail siblings
|
||||
else:
|
||||
return _pack_instance_onto_cores(
|
||||
host_cell.free_siblings, instance_cell, host_cell.id)
|
||||
# Try to pack the instance cell onto cores
|
||||
return _pack_instance_onto_cores(
|
||||
host_cell.free_siblings, instance_cell, host_cell.id)
|
||||
else:
|
||||
# Straightforward to pin to available cpus when there is no
|
||||
# hyperthreading on the host
|
||||
|
Loading…
x
Reference in New Issue
Block a user