Merge "Add nova-manage placement heal_allocations CLI"

2018-06-06 20:32:18 +00:00 · 2018-06-06 20:32:18 +00:00 · 07652b8bab
commit 07652b8bab
parent f292837b97 95106d2fa1
8 changed files with 618 additions and 3 deletions
--- a/doc/source/cli/nova-manage.rst
+++ b/doc/source/cli/nova-manage.rst
@ -276,6 +276,38 @@ Nova Cells v2
    found, 3 if a host with that name is not in a cell with that uuid, 4 if
    a host with that name has instances (host not empty).

+
+Placement
+~~~~~~~~~
+
+``nova-manage placement heal_allocations [--max-count <max_count>] [--verbose]``
+    Iterates over non-cell0 cells looking for instances which do not have
+    allocations in the Placement service and which are not undergoing a task
+    state transition. For each instance found, allocations are created against
+    the compute node resource provider for that instance based on the flavor
+    associated with the instance.
+
+    Specify ``--max-count`` to control the maximum number of instances to
+    process. If not specified, all instances in each cell will be mapped in
+    batches of 50. If you have a large number of instances, consider
+    specifying a custom value and run the command until it exits with 0 or 4.
+
+    Specify ``--verbose`` to get detailed progress output during execution.
+
+    This command requires that the ``[api_database]/connection`` and
+    ``[placement]`` configuration options are set.
+
+    Return codes:
+
+    * 0: Command completed successfully and allocations were created.
+    * 1: --max-count was reached and there are more instances to process.
+    * 2: Unable to find a compute node record for a given instance.
+    * 3: Unable to create allocations for an instance against its
+      compute node resource provider.
+    * 4: Command completed successfully but no allocations were created.
+    * 127: Invalid input.
+
+
 See Also
 ========

--- a/nova/cmd/manage.py
+++ b/nova/cmd/manage.py
@ -64,6 +64,8 @@ from nova.objects import quotas as quotas_obj
 from nova.objects import request_spec
 from nova import quota
 from nova import rpc
+from nova.scheduler.client import report
+from nova.scheduler import utils as scheduler_utils
 from nova import utils
 from nova import version
 from nova.virt import ironic
@ -1713,6 +1715,246 @@ class CellV2Commands(object):
        return 0


+class PlacementCommands(object):
+    """Commands for managing placement resources."""
+
+    @staticmethod
+    def _get_compute_node_uuid(ctxt, instance, node_cache):
+        """Find the ComputeNode.uuid for the given Instance
+
+        :param ctxt: cell-targeted nova.context.RequestContext
+        :param instance: the instance to lookup a compute node
+        :param node_cache: dict of Instance.node keys to ComputeNode.uuid
+            values; this cache is updated if a new node is processed.
+        :returns: ComputeNode.uuid for the given instance
+        :raises: nova.exception.ComputeHostNotFound
+        """
+        if instance.node in node_cache:
+            return node_cache[instance.node]
+
+        compute_node = objects.ComputeNode.get_by_host_and_nodename(
+            ctxt, instance.host, instance.node)
+        node_uuid = compute_node.uuid
+        node_cache[instance.node] = node_uuid
+        return node_uuid
+
+    def _heal_instances_in_cell(self, ctxt, max_count, unlimited, output,
+                                placement):
+        """Checks for instances to heal in a given cell.
+
+        :param ctxt: cell-targeted nova.context.RequestContext
+        :param max_count: batch size (limit per instance query)
+        :param unlimited: True if all instances in the cell should be
+            processed, else False to just process $max_count instances
+        :param outout: function that takes a single message for verbose output
+        :param placement: nova.scheduler.client.report.SchedulerReportClient
+            to communicate with the Placement service API.
+        :return: Number of instances that had allocations created.
+        :raises: nova.exception.ComputeHostNotFound if a compute node for a
+            given instance cannot be found
+        :raises: AllocationCreateFailed if unable to create allocations for
+            a given instance against a given compute node resource provider
+        """
+        # Keep a cache of instance.node to compute node resource provider UUID.
+        # This will save some queries for non-ironic instances to the
+        # compute_nodes table.
+        node_cache = {}
+        # Track the total number of instances that have allocations created
+        # for them in this cell. We return when num_processed equals max_count
+        # and unlimited=True or we exhaust the number of instances to process
+        # in this cell.
+        num_processed = 0
+        # Get all instances from this cell which have a host and are not
+        # undergoing a task state transition. Go from oldest to newest.
+        # NOTE(mriedem): Unfortunately we don't have a marker to use
+        # between runs where the user is specifying --max-count.
+        # TODO(mriedem): Store a marker in system_metadata so we can
+        # automatically pick up where we left off without the user having
+        # to pass it in (if unlimited is False).
+        instances = objects.InstanceList.get_by_filters(
+            ctxt, filters={}, sort_key='created_at', sort_dir='asc',
+            limit=max_count, expected_attrs=['flavor'])
+        while instances:
+            output(_('Found %s candidate instances.') % len(instances))
+            # For each instance in this list, we need to see if it has
+            # allocations in placement and if so, assume it's correct and
+            # continue.
+            for instance in instances:
+                if instance.task_state is not None:
+                    output(_('Instance %(instance)s is undergoing a task '
+                             'state transition: %(task_state)s') %
+                           {'instance': instance.uuid,
+                            'task_state': instance.task_state})
+                    continue
+
+                if instance.node is None:
+                    output(_('Instance %s is not on a host.') % instance.uuid)
+                    continue
+
+                allocations = placement.get_allocations_for_consumer(
+                    ctxt, instance.uuid)
+                if allocations:
+                    output(_('Instance %s already has allocations.') %
+                           instance.uuid)
+                    # TODO(mriedem): Check to see if the allocation project_id
+                    # and user_id matches the instance project and user and
+                    # fix the allocation project/user if they don't match; see
+                    # blueprint add-consumer-generation for details.
+                    continue
+
+                # This instance doesn't have allocations so we need to find
+                # its compute node resource provider.
+                node_uuid = self._get_compute_node_uuid(
+                    ctxt, instance, node_cache)
+
+                # Now get the resource allocations for the instance based
+                # on its embedded flavor.
+                resources = scheduler_utils.resources_from_flavor(
+                    instance, instance.flavor)
+                if placement.put_allocations(
+                        ctxt, node_uuid, instance.uuid, resources,
+                        instance.project_id, instance.user_id):
+                    num_processed += 1
+                    output(_('Successfully created allocations for '
+                             'instance %(instance)s against resource '
+                             'provider %(provider)s.') %
+                           {'instance': instance.uuid, 'provider': node_uuid})
+                else:
+                    raise exception.AllocationCreateFailed(
+                        instance=instance.uuid, provider=node_uuid)
+
+            # Make sure we don't go over the max count. Note that we
+            # don't include instances that already have allocations in the
+            # max_count number, only the number of instances that have
+            # successfully created allocations.
+            if not unlimited and num_processed == max_count:
+                return num_processed
+
+            # Use a marker to get the next page of instances in this cell.
+            # Note that InstanceList doesn't support slice notation.
+            marker = instances[len(instances) - 1].uuid
+            instances = objects.InstanceList.get_by_filters(
+                ctxt, filters={}, sort_key='created_at', sort_dir='asc',
+                limit=max_count, marker=marker, expected_attrs=['flavor'])
+
+        return num_processed
+
+    @action_description(
+        _("Iterates over non-cell0 cells looking for instances which do "
+          "not have allocations in the Placement service and which are not "
+          "undergoing a task state transition. For each instance found, "
+          "allocations are created against the compute node resource provider "
+          "for that instance based on the flavor associated with the "
+          "instance. This command requires that the [api_database]/connection "
+          "and [placement] configuration options are set."))
+    @args('--max-count', metavar='<max_count>', dest='max_count',
+          help='Maximum number of instances to process. If not specified, all '
+               'instances in each cell will be mapped in batches of 50. '
+               'If you have a large number of instances, consider specifying '
+               'a custom value and run the command until it exits with '
+               '0 or 4.')
+    @args('--verbose', action='store_true', dest='verbose', default=False,
+          help='Provide verbose output during execution.')
+    def heal_allocations(self, max_count=None, verbose=False):
+        """Heals instance allocations in the Placement service
+
+        Return codes:
+
+        * 0: Command completed successfully and allocations were created.
+        * 1: --max-count was reached and there are more instances to process.
+        * 2: Unable to find a compute node record for a given instance.
+        * 3: Unable to create allocations for an instance against its
+             compute node resource provider.
+        * 4: Command completed successfully but no allocations were created.
+        * 127: Invalid input.
+        """
+        # NOTE(mriedem): Thoughts on ways to expand this:
+        # - add a --dry-run option to just print which instances would have
+        #   allocations created for them
+        # - allow passing a specific cell to heal
+        # - allow filtering on enabled/disabled cells
+        # - allow passing a specific instance to heal
+        # - add a force option to force allocations for instances which have
+        #   task_state is not None (would get complicated during a migration);
+        #   for example, this could cleanup ironic instances that have
+        #   allocations on VCPU/MEMORY_MB/DISK_GB but are now using a custom
+        #   resource class
+        # - add an option to overwrite allocations for instances which already
+        #   have allocations (but the operator thinks might be wrong?); this
+        #   would probably only be safe with a specific instance.
+        # - deal with nested resource providers?
+
+        output = lambda msg: None
+        if verbose:
+            output = lambda msg: print(msg)
+
+        # TODO(mriedem): Rather than --max-count being both a total and batch
+        # count, should we have separate options to be specific, i.e. --total
+        # and --batch-size? Then --batch-size defaults to 50 and --total
+        # defaults to None to mean unlimited.
+        if max_count is not None:
+            try:
+                max_count = int(max_count)
+            except ValueError:
+                max_count = -1
+            unlimited = False
+            if max_count < 1:
+                print(_('Must supply a positive integer for --max-count.'))
+                return 127
+        else:
+            max_count = 50
+            unlimited = True
+            output(_('Running batches of %i until complete') % max_count)
+
+        ctxt = context.get_admin_context()
+        cells = objects.CellMappingList.get_all(ctxt)
+        if not cells:
+            output(_('No cells to process.'))
+            return 4
+
+        placement = report.SchedulerReportClient()
+        num_processed = 0
+        # TODO(mriedem): Use context.scatter_gather_skip_cell0.
+        for cell in cells:
+            # Skip cell0 since that is where instances go that do not get
+            # scheduled and hence would not have allocations against a host.
+            if cell.uuid == objects.CellMapping.CELL0_UUID:
+                continue
+            output(_('Looking for instances in cell: %s') % cell.identity)
+
+            limit_per_cell = max_count
+            if not unlimited:
+                # Adjust the limit for the next cell. For example, if the user
+                # only wants to process a total of 100 instances and we did
+                # 75 in cell1, then we only need 25 more from cell2 and so on.
+                limit_per_cell = max_count - num_processed
+
+            with context.target_cell(ctxt, cell) as cctxt:
+                try:
+                    num_processed += self._heal_instances_in_cell(
+                        cctxt, limit_per_cell, unlimited, output, placement)
+                except exception.ComputeHostNotFound as e:
+                    print(e.format_message())
+                    return 2
+                except exception.AllocationCreateFailed as e:
+                    print(e.format_message())
+                    return 3
+
+                # Make sure we don't go over the max count. Note that we
+                # don't include instances that already have allocations in the
+                # max_count number, only the number of instances that have
+                # successfully created allocations.
+                if num_processed == max_count:
+                    output(_('Max count reached. Processed %s instances.')
+                           % num_processed)
+                    return 1
+
+        output(_('Processed %s instances.') % num_processed)
+        if not num_processed:
+            return 4
+        return 0
+
+
 CATEGORIES = {
    'api_db': ApiDbCommands,
    'cell': CellCommands,
@ -1720,6 +1962,7 @@ CATEGORIES = {
    'db': DbCommands,
    'floating': FloatingIpCommands,
    'network': NetworkCommands,
+    'placement': PlacementCommands
 }


--- a/nova/exception.py
+++ b/nova/exception.py
@ -2287,3 +2287,8 @@ class DeviceDeletionException(NovaException):
 class OptRequiredIfOtherOptValue(NovaException):
    msg_fmt = _("The %(then_opt)s option is required if %(if_opt)s is "
                "specified as '%(if_value)s'.")
+
+
+class AllocationCreateFailed(NovaException):
+    msg_fmt = _('Failed to create allocations for instance %(instance)s '
+                'against resource provider %(provider)s.')
--- a/nova/test.py
+++ b/nova/test.py
@ -415,7 +415,8 @@ class TestCase(testtools.TestCase):
            # otherwise we'll fail to update the scheduler while running
            # the compute node startup routines below.
            ctxt = context.get_context()
-            cell = self.cell_mappings[kwargs.pop('cell', CELL1_NAME)]
+            cell_name = kwargs.pop('cell', CELL1_NAME) or CELL1_NAME
+            cell = self.cell_mappings[cell_name]
            hm = objects.HostMapping(context=ctxt,
                                     host=host or name,
                                     cell_mapping=cell)
--- a/nova/tests/functional/test_nova_manage.py
+++ b/nova/tests/functional/test_nova_manage.py
@ -10,10 +10,14 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.

+import fixtures
+from six.moves import StringIO
+
 from nova.cmd import manage
 from nova import context
 from nova import objects
 from nova import test
+from nova.tests.functional import test_servers


 class NovaManageDBIronicTest(test.TestCase):
@ -348,3 +352,222 @@ class NovaManageCellV2Test(test.TestCase):
        cns = objects.ComputeNodeList.get_all(self.context)
        self.assertEqual(1, len(cns))
        self.assertEqual(0, cns[0].mapped)
+
+
+class TestNovaManagePlacementHealAllocations(
+        test_servers.ProviderUsageBaseTestCase):
+    """Functional tests for nova-manage placement heal_allocations"""
+
+    # This is required by the parent class.
+    compute_driver = 'fake.SmallFakeDriver'
+    # We want to test iterating across multiple cells.
+    NUMBER_OF_CELLS = 2
+
+    def setUp(self):
+        # Since the CachingScheduler does not use Placement, we want to use
+        # the CachingScheduler to create instances and then we can heal their
+        # allocations via the CLI.
+        self.flags(driver='caching_scheduler', group='scheduler')
+        super(TestNovaManagePlacementHealAllocations, self).setUp()
+        self.cli = manage.PlacementCommands()
+        # We need to start a compute in each non-cell0 cell.
+        for cell_name, cell_mapping in self.cell_mappings.items():
+            if cell_mapping.uuid == objects.CellMapping.CELL0_UUID:
+                continue
+            self._start_compute(cell_name, cell_name=cell_name)
+        # Make sure we have two hypervisors reported in the API.
+        hypervisors = self.admin_api.api_get(
+            '/os-hypervisors').body['hypervisors']
+        self.assertEqual(2, len(hypervisors))
+        self.flavor = self.api.get_flavors()[0]
+        self.output = StringIO()
+        self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
+
+    def _boot_and_assert_no_allocations(self, flavor, hostname):
+        """Creates a server on the given host and asserts neither have usage
+
+        :param flavor: the flavor used to create the server
+        :param hostname: the host on which to create the server
+        :returns: two-item tuple of the server and the compute node resource
+                  provider uuid
+        """
+        server_req = self._build_minimal_create_server_request(
+            self.api, 'some-server', flavor_id=flavor['id'],
+            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
+            networks=[])
+        server_req['availability_zone'] = 'nova:%s' % hostname
+        created_server = self.api.post_server({'server': server_req})
+        server = self._wait_for_state_change(
+            self.admin_api, created_server, 'ACTIVE')
+
+        # Verify that our source host is what the server ended up on
+        self.assertEqual(hostname, server['OS-EXT-SRV-ATTR:host'])
+
+        # Check that the compute node resource provider has no allocations.
+        rp_uuid = self._get_provider_uuid_by_host(hostname)
+        provider_usages = self._get_provider_usages(rp_uuid)
+        for resource_class, usage in provider_usages.items():
+            self.assertEqual(
+                0, usage,
+                'Compute node resource provider %s should not have %s '
+                'usage when using the CachingScheduler.' %
+                (hostname, resource_class))
+
+        # Check that the server has no allocations.
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual({}, allocations,
+                         'Server should not have allocations when using '
+                         'the CachingScheduler.')
+        return server, rp_uuid
+
+    def _assert_healed(self, server, rp_uuid):
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertIn(rp_uuid, allocations,
+                      'Allocations not found for server %s and compute node '
+                      'resource provider. %s\nOutput:%s' %
+                      (server['id'], rp_uuid, self.output.getvalue()))
+        self.assertFlavorMatchesAllocation(
+            self.flavor, allocations[rp_uuid]['resources'])
+
+    def test_heal_allocations_paging(self):
+        """This test runs the following scenario:
+
+        * Schedule server1 to cell1 and assert it doesn't have allocations.
+        * Schedule server2 to cell2 and assert it doesn't have allocations.
+        * Run "nova-manage placement heal_allocations --max-count 1" to make
+          sure we stop with just one instance and the return code is 1.
+        * Run "nova-manage placement heal_allocations" and assert both
+          both instances now have allocations against their respective compute
+          node resource providers.
+        """
+        server1, rp_uuid1 = self._boot_and_assert_no_allocations(
+            self.flavor, 'cell1')
+        server2, rp_uuid2 = self._boot_and_assert_no_allocations(
+            self.flavor, 'cell2')
+
+        # heal server1 and server2 in separate calls
+        for x in range(2):
+            result = self.cli.heal_allocations(max_count=1, verbose=True)
+            self.assertEqual(1, result, self.output.getvalue())
+            output = self.output.getvalue()
+            self.assertIn('Max count reached. Processed 1 instances.', output)
+            # If this is the 2nd call, we'll have skipped the first instance.
+            if x == 0:
+                self.assertNotIn('already has allocations', output)
+            else:
+                self.assertIn('already has allocations', output)
+
+        self._assert_healed(server1, rp_uuid1)
+        self._assert_healed(server2, rp_uuid2)
+
+        # run it again to make sure nothing was processed
+        result = self.cli.heal_allocations(verbose=True)
+        self.assertEqual(4, result, self.output.getvalue())
+        self.assertIn('already has allocations', self.output.getvalue())
+
+    def test_heal_allocations_paging_max_count_more_than_num_instances(self):
+        """Sets up 2 instances in cell1 and 1 instance in cell2. Then specify
+        --max-count=10, processes 3 instances, rc is 0
+        """
+        servers = []  # This is really a list of 2-item tuples.
+        for x in range(2):
+            servers.append(
+                self._boot_and_assert_no_allocations(self.flavor, 'cell1'))
+        servers.append(
+            self._boot_and_assert_no_allocations(self.flavor, 'cell2'))
+        result = self.cli.heal_allocations(max_count=10, verbose=True)
+        self.assertEqual(0, result, self.output.getvalue())
+        self.assertIn('Processed 3 instances.', self.output.getvalue())
+        for server, rp_uuid in servers:
+            self._assert_healed(server, rp_uuid)
+
+    def test_heal_allocations_paging_more_instances_remain(self):
+        """Tests that there is one instance in cell1 and two instances in
+        cell2, with a --max-count=2. This tests that we stop in cell2 once
+        max_count is reached.
+        """
+        servers = []  # This is really a list of 2-item tuples.
+        servers.append(
+            self._boot_and_assert_no_allocations(self.flavor, 'cell1'))
+        for x in range(2):
+            servers.append(
+                self._boot_and_assert_no_allocations(self.flavor, 'cell2'))
+        result = self.cli.heal_allocations(max_count=2, verbose=True)
+        self.assertEqual(1, result, self.output.getvalue())
+        self.assertIn('Max count reached. Processed 2 instances.',
+                      self.output.getvalue())
+        # Assert that allocations were healed on the instances we expect. Order
+        # works here because cell mappings are retrieved by id in ascending
+        # order so oldest to newest, and instances are also retrieved from each
+        # cell by created_at in ascending order, which matches the order we put
+        # created servers in our list.
+        for x in range(2):
+            self._assert_healed(*servers[x])
+        # And assert the remaining instance does not have allocations.
+        allocations = self._get_allocations_by_server_uuid(
+            servers[2][0]['id'])
+        self.assertEqual({}, allocations)
+
+    def test_heal_allocations_unlimited(self):
+        """Sets up 2 instances in cell1 and 1 instance in cell2. Then
+        don't specify --max-count, processes 3 instances, rc is 0.
+        """
+        servers = []  # This is really a list of 2-item tuples.
+        for x in range(2):
+            servers.append(
+                self._boot_and_assert_no_allocations(self.flavor, 'cell1'))
+        servers.append(
+            self._boot_and_assert_no_allocations(self.flavor, 'cell2'))
+        result = self.cli.heal_allocations(verbose=True)
+        self.assertEqual(0, result, self.output.getvalue())
+        self.assertIn('Processed 3 instances.', self.output.getvalue())
+        for server, rp_uuid in servers:
+            self._assert_healed(server, rp_uuid)
+
+    def test_heal_allocations_shelved(self):
+        """Tests the scenario that an instance with no allocations is shelved
+        so heal_allocations skips it (since the instance is not on a host).
+        """
+        server, rp_uuid = self._boot_and_assert_no_allocations(
+            self.flavor, 'cell1')
+        self.api.post_server_action(server['id'], {'shelve': None})
+        # The server status goes to SHELVED_OFFLOADED before the host/node
+        # is nulled out in the compute service, so we also have to wait for
+        # that so we don't race when we run heal_allocations.
+        server = self._wait_for_server_parameter(
+            self.admin_api, server,
+            {'OS-EXT-SRV-ATTR:host': None, 'status': 'SHELVED_OFFLOADED'})
+        result = self.cli.heal_allocations(verbose=True)
+        self.assertEqual(4, result, self.output.getvalue())
+        self.assertIn('Instance %s is not on a host.' % server['id'],
+                      self.output.getvalue())
+        # Check that the server has no allocations.
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual({}, allocations,
+                         'Shelved-offloaded server should not have '
+                         'allocations.')
+
+    def test_heal_allocations_task_in_progress(self):
+        """Tests the case that heal_allocations skips over an instance which
+        is undergoing a task state transition (in this case pausing).
+        """
+        server, rp_uuid = self._boot_and_assert_no_allocations(
+            self.flavor, 'cell1')
+
+        def fake_pause_instance(_self, ctxt, instance, *a, **kw):
+            self.assertEqual('pausing', instance.task_state)
+        # We have to stub out pause_instance so that the instance is stuck with
+        # task_state != None.
+        self.stub_out('nova.compute.manager.ComputeManager.pause_instance',
+                      fake_pause_instance)
+        self.api.post_server_action(server['id'], {'pause': None})
+        result = self.cli.heal_allocations(verbose=True)
+        self.assertEqual(4, result, self.output.getvalue())
+        # Check that the server has no allocations.
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual({}, allocations,
+                         'Server undergoing task state transition should '
+                         'not have allocations.')
+        # Assert something was logged for this instance when it was skipped.
+        self.assertIn('Instance %s is undergoing a task state transition: '
+                      'pausing' % server['id'], self.output.getvalue())
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@ -1456,16 +1456,18 @@ class ProviderUsageBaseTestCase(test.TestCase,

        self.computes = {}

-    def _start_compute(self, host):
+    def _start_compute(self, host, cell_name=None):
        """Start a nova compute service on the given host

        :param host: the name of the host that will be associated to the
                     compute service.
+        :param cell_name: optional name of the cell in which to start the
+                          compute service (defaults to cell1)
        :return: the nova compute service object
        """
        fake.set_nodes([host])
        self.addCleanup(fake.restore_nodes)
-        compute = self.start_service('compute', host=host)
+        compute = self.start_service('compute', host=host, cell=cell_name)
        self.computes[host] = compute
        return compute

--- a/nova/tests/unit/test_nova_manage.py
+++ b/nova/tests/unit/test_nova_manage.py
@ -2395,6 +2395,97 @@ class CellV2CommandsTestCase(test.NoDBTestCase):
            node.save.assert_called_once_with()


+@ddt.ddt
+class TestNovaManagePlacement(test.NoDBTestCase):
+    """Unit tests for the nova-manage placement commands.
+
+    Tests in this class should be simple and can rely on mock, so they
+    are usually restricted to negative or side-effect type tests.
+
+    For more involved functional scenarios, use
+    nova.tests.functional.test_nova_manage.
+    """
+    def setUp(self):
+        super(TestNovaManagePlacement, self).setUp()
+        self.output = StringIO()
+        self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
+        self.cli = manage.PlacementCommands()
+
+    @ddt.data(-1, 0, "one")
+    def test_heal_allocations_invalid_max_count(self, max_count):
+        self.assertEqual(127, self.cli.heal_allocations(max_count=max_count))
+
+    @mock.patch('nova.objects.CellMappingList.get_all',
+                return_value=objects.CellMappingList())
+    def test_heal_allocations_no_cells(self, mock_get_all_cells):
+        self.assertEqual(4, self.cli.heal_allocations(verbose=True))
+        self.assertIn('No cells to process', self.output.getvalue())
+
+    @mock.patch('nova.objects.CellMappingList.get_all',
+                return_value=objects.CellMappingList(objects=[
+                    objects.CellMapping(name='cell1',
+                                        uuid=uuidsentinel.cell1)]))
+    @mock.patch('nova.objects.InstanceList.get_by_filters',
+                return_value=objects.InstanceList())
+    def test_heal_allocations_no_instances(
+            self, mock_get_instances, mock_get_all_cells):
+        self.assertEqual(4, self.cli.heal_allocations(verbose=True))
+        self.assertIn('Processed 0 instances.', self.output.getvalue())
+
+    @mock.patch('nova.objects.CellMappingList.get_all',
+                return_value=objects.CellMappingList(objects=[
+                    objects.CellMapping(name='cell1',
+                                        uuid=uuidsentinel.cell1)]))
+    @mock.patch('nova.objects.InstanceList.get_by_filters',
+                return_value=objects.InstanceList(objects=[
+                    objects.Instance(
+                        uuid=uuidsentinel.instance, host='fake', node='fake',
+                        task_state=None)]))
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_allocations_for_consumer', return_value={})
+    @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
+                side_effect=exception.ComputeHostNotFound(host='fake'))
+    def test_heal_allocations_compute_host_not_found(
+            self, mock_get_compute_node, mock_get_allocs, mock_get_instances,
+            mock_get_all_cells):
+        self.assertEqual(2, self.cli.heal_allocations())
+        self.assertIn('Compute host fake could not be found.',
+                      self.output.getvalue())
+
+    @mock.patch('nova.objects.CellMappingList.get_all',
+                return_value=objects.CellMappingList(objects=[
+                    objects.CellMapping(name='cell1',
+                                        uuid=uuidsentinel.cell1)]))
+    @mock.patch('nova.objects.InstanceList.get_by_filters',
+                return_value=objects.InstanceList(objects=[
+                    objects.Instance(
+                        uuid=uuidsentinel.instance, host='fake', node='fake',
+                        task_state=None, flavor=objects.Flavor(),
+                        project_id='fake-project', user_id='fake-user')]))
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_allocations_for_consumer', return_value={})
+    @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
+                return_value=objects.ComputeNode(uuid=uuidsentinel.node))
+    @mock.patch('nova.scheduler.utils.resources_from_flavor',
+                return_value=mock.sentinel.resources)
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'put_allocations', return_value=False)
+    def test_heal_allocations_put_allocations_fails(
+            self, mock_put_allocations, mock_res_from_flavor,
+            mock_get_compute_node, mock_get_allocs, mock_get_instances,
+            mock_get_all_cells):
+        self.assertEqual(3, self.cli.heal_allocations())
+        self.assertIn('Failed to create allocations for instance',
+                      self.output.getvalue())
+        instance = mock_get_instances.return_value[0]
+        mock_res_from_flavor.assert_called_once_with(
+            instance, instance.flavor)
+        mock_put_allocations.assert_called_once_with(
+            test.MatchType(context.RequestContext), uuidsentinel.node,
+            uuidsentinel.instance, mock.sentinel.resources, 'fake-project',
+            'fake-user')
+
+
 class TestNovaManageMain(test.NoDBTestCase):
    """Tests the nova-manage:main() setup code."""

--- a/releasenotes/notes/nova-manage-placement-heal-allocations-13a9a0a3df910e0b.yaml
+++ b/releasenotes/notes/nova-manage-placement-heal-allocations-13a9a0a3df910e0b.yaml
@ -0,0 +1,18 @@
+---
+other:
+  - |
+    A new ``nova-manage placement heal_allocations`` CLI has been added to
+    help migrate users from the deprecated CachingScheduler. Starting in
+    16.0.0 (Pike), the nova-compute service no longer reports instance
+    allocations to the Placement service because the FilterScheduler does
+    that as part of scheduling. However, the CachingScheduler does not create
+    the allocations in the Placement service, so any instances created using
+    the CachingScheduler after Ocata will not have allocations in Placement.
+    The new CLI allows operators using the CachingScheduler to find all
+    instances in all cells which do not have allocations in Placement and
+    create those allocations. The CLI will skip any instances that are
+    undergoing a task state transition, so ideally this would be run when
+    the API is down but it can be run, if necessary, while the API is up.
+    For more details on CLI usage, see the man page entry:
+
+    https://docs.openstack.org/nova/latest/cli/nova-manage.html#placement