From d5bed8fad9d51dd0d4ba9cf452c6f5c8c543b264 Mon Sep 17 00:00:00 2001
From: Nikola Dipanov <ndipanov@redhat.com>
Date: Wed, 30 Sep 2015 19:30:59 +0100
Subject: [PATCH] Fix CPU pinning for odd number of CPUs w hyperthreading

Previous logic was not handling the cases when the instance cell has an
odd number of vcpus that is larger than any sibling set on the host. The
logic would (accidentally) either reject to pin such an instance even
though there is ample free cores, or would (in case there were enough
free sibling sets on the host) spread the instance instead of pack it
onto siblings as the default policy suggests.

This patch fixes some incorrect assumptions in the code, while also
simplifying it. As an added bonus - we still attempt to expose
(via the Topology, and this time correctly) the largest possible number
of threads that we can expose to the instance.

Finally - we add some more comments to clear up the intent behind the
current packing logic with pointers how it could be tweaked to achieve
different results in the future.

Change-Id: I2c0b3b250ffb1a7483299df13b317cdb24f8141d
Co-Authored-By: Stephen Finucane <stephen.finucane@intel.com>
Closes-bug: 1501358
Closes-bug: 1467927
---
 nova/tests/unit/virt/test_hardware.py | 92 ++++++++++++++++++++++++++-
 nova/virt/hardware.py                 | 71 ++++++++++++++++-----
 2 files changed, 147 insertions(+), 16 deletions(-)

diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py
index 7c251e1e07cd..3816cb73a0e0 100644
--- a/nova/tests/unit/virt/test_hardware.py
+++ b/nova/tests/unit/virt/test_hardware.py
@@ -12,6 +12,7 @@
 # License for the specific language governing permissions and limitations
 # under the License.
 
+import collections
 import uuid
 
 import mock
@@ -1896,6 +1897,25 @@ class _CPUPinningTestCaseBase(object):
         self.assertEqual(len(instance_cell.cpuset),
                          len(instance_cell.cpu_pinning))
 
+    def assertPinningPreferThreads(self, instance_cell, host_cell):
+        """Make sure we are preferring threads.
+
+        We do this by assessing that at least 2 CPUs went to the same core
+        if that was even possible to begin with.
+        """
+        max_free_siblings = max(map(len, host_cell.free_siblings))
+        if len(instance_cell) > 1 and max_free_siblings > 1:
+            cpu_to_sib = {}
+            for sib in host_cell.free_siblings:
+                for cpu in sib:
+                    cpu_to_sib[cpu] = tuple(sorted(sib))
+            pins_per_sib = collections.defaultdict(int)
+            for inst_p, host_p in instance_cell.cpu_pinning.items():
+                pins_per_sib[cpu_to_sib[host_p]] += 1
+            self.assertTrue(max(pins_per_sib.values()) > 1,
+                            "Seems threads were not prefered by the pinning "
+                            "logic.")
+
 
 class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
     def test_get_pinning_inst_too_large_cpu(self):
@@ -2031,7 +2051,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
         self.assertInstanceCellPinned(inst_pin)
         got_topo = objects.VirtCPUTopology(sockets=1, cores=1, threads=4)
         self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
-        got_pinning = {x: x + 4 for x in range(0, 4)}
+        got_pinning = {x: x for x in range(0, 4)}
         self.assertEqual(got_pinning, inst_pin.cpu_pinning)
 
     def test_get_pinning_host_siblings_fit(self):
@@ -2048,6 +2068,76 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
         got_pinning = {x: x for x in range(0, 4)}
         self.assertEqual(got_pinning, inst_pin.cpu_pinning)
 
+    def test_get_pinning_host_siblings_instance_odd_fit(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3]),
+                                              set([4, 5]), set([6, 7])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_instance_fit_optimize_threads(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1, 2, 3]),
+                                              set([4, 5, 6, 7])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4, 5]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=2)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_instance_odd_fit_w_usage(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3]),
+                                              set([4, 5]), set([6, 7])],
+                                    mempages=[], pinned_cpus=set([0, 2, 5]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_instance_odd_fit_orphan_only(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3]),
+                                              set([4, 5]), set([6, 7])],
+                                    mempages=[], pinned_cpus=set([0, 2, 5, 6]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=4, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_large_instance_odd_fit(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7,
+                                                      8, 9, 10, 11, 12, 13, 14,
+                                                      15]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 8]), set([1, 9]),
+                                              set([2, 10]), set([3, 11]),
+                                              set([4, 12]), set([5, 13]),
+                                              set([6, 14]), set([7, 15])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        self.assertPinningPreferThreads(inst_pin, host_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
 
 class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
     def test_host_numa_fit_instance_to_host_single_cell(self):
diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py
index 1479e461f46f..682df93ed185 100644
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@@ -13,6 +13,7 @@
 # under the License.
 
 import collections
+import fractions
 import itertools
 
 from oslo_config import cfg
@@ -674,6 +675,16 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
     This method will calculate the pinning for the given instance and it's
     topology, making sure that hyperthreads of the instance match up with
     those of the host when the pinning takes effect.
+
+    Currently the strategy for packing is to prefer siblings and try use
+    cores evenly, by using emptier cores first. This is achieved by the way we
+    order cores in the can_pack structure, and the order in which we iterate
+    through it.
+
+    The main packing loop that iterates over the can_pack dictionary will not
+    currently try to look for a fit that maximizes number of siblings, but will
+    simply rely on the iteration ordering and picking the first viable
+    placement.
     """
 
     # We build up a data structure 'can_pack' that answers the question:
@@ -689,8 +700,44 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
 
         if threads_per_core * len(cores_list) < len(instance_cell):
             return False
-        else:
-            return len(instance_cell) % threads_per_core == 0
+        return True
+
+    def _orphans(instance_cell, threads_per_core):
+        """Number of instance CPUs which will not fill up a host core.
+
+        Best explained by an example: consider set of free host cores as such:
+            [(0, 1), (3, 5), (6, 7, 8)]
+        This would be a case of 2 threads_per_core AKA an entry for 2 in the
+        can_pack structure.
+
+        If we attempt to pack a 5 core instance on it - due to the fact that we
+        iterate the list in order, we will end up with a single core of the
+        instance pinned to a thread "alone" (with id 6), and we would have one
+        'orphan' vcpu.
+        """
+        return len(instance_cell) % threads_per_core
+
+    def _threads(instance_cell, threads_per_core):
+        """Threads to expose to the instance via the VirtCPUTopology.
+
+        This is calculated by taking the GCD of the number of threads we are
+        considering at the moment, and the number of orphans. An example for
+            instance_cell = 6
+            threads_per_core = 4
+
+        So we can fit the instance as such:
+            [(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, 10, 11)]
+              x  x  x  x    x  x
+
+        We can't expose 4 threads, as that will not be a valid topology (all
+        cores exposed to the guest have to have an equal number of threads),
+        and 1 would be too restrictive, but we want all threads that guest sees
+        to be on the same physical core, so we take GCD of 4 (max number of
+        threads) and 2 (number of 'orphan' CPUs) and get 2 as the number of
+        threads.
+        """
+        return fractions.gcd(threads_per_core, _orphans(instance_cell,
+                                                        threads_per_core))
 
     # We iterate over the can_pack dict in descending order of cores that
     # can be packed - an attempt to get even distribution over time
@@ -702,9 +749,11 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
             pinning = zip(sorted(instance_cell.cpuset),
                           itertools.chain(*sliced_sibs))
 
+            threads = _threads(instance_cell, cores_per_sib)
+            cores = len(instance_cell) / threads
             topology = objects.VirtCPUTopology(sockets=1,
-                                               cores=len(sliced_sibs),
-                                               threads=cores_per_sib)
+                                               cores=cores,
+                                               threads=threads)
             instance_cell.pin_vcpus(*pinning)
             instance_cell.cpu_topology = topology
             instance_cell.id = host_cell_id
@@ -729,17 +778,9 @@ def _numa_fit_instance_cell_with_pinning(host_cell, instance_cell):
         return
 
     if host_cell.siblings:
-        # Try to pack the instance cell in one core
-        largest_free_sibling_set = sorted(
-            host_cell.free_siblings, key=len)[-1]
-        if len(instance_cell.cpuset) <= len(largest_free_sibling_set):
-            return _pack_instance_onto_cores(
-                [largest_free_sibling_set], instance_cell, host_cell.id)
-
-        # We can't to pack it onto one core so try with avail siblings
-        else:
-            return _pack_instance_onto_cores(
-                host_cell.free_siblings, instance_cell, host_cell.id)
+        # Try to pack the instance cell onto cores
+        return _pack_instance_onto_cores(
+            host_cell.free_siblings, instance_cell, host_cell.id)
     else:
         # Straightforward to pin to available cpus when there is no
         # hyperthreading on the host