Merge "XenAPI: get vGPU stats from hypervisor"
This commit is contained in:
commit
2c4a1a390a
@ -67,7 +67,15 @@ class XenAPIDriverTestCase(stubs.XenAPITestBaseNoDB):
|
||||
},
|
||||
'vcpus_used': 10,
|
||||
'pci_passthrough_devices': '',
|
||||
'host_other-config': {'iscsi_iqn': 'someiqn'}}
|
||||
'host_other-config': {'iscsi_iqn': 'someiqn'},
|
||||
'vgpu_stats': {
|
||||
'c8328467-badf-43d8-8e28-0e096b0f88b1':
|
||||
{'uuid': '6444c6ee-3a49-42f5-bebb-606b52175e67',
|
||||
'total': 7,
|
||||
'max_heads': 1,
|
||||
'type_name': 'Intel GVT-g',
|
||||
},
|
||||
}}
|
||||
|
||||
def test_available_resource(self):
|
||||
driver = self._get_driver()
|
||||
|
202
nova/tests/unit/virt/xenapi/test_vgpu.py
Normal file
202
nova/tests/unit/virt/xenapi/test_vgpu.py
Normal file
@ -0,0 +1,202 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import mock
|
||||
|
||||
from nova import test
|
||||
from nova.virt.xenapi import host
|
||||
|
||||
|
||||
class VGPUTestCase(test.NoDBTestCase):
|
||||
"""Unit tests for Driver operations."""
|
||||
@mock.patch.object(host.HostState, 'update_status',
|
||||
return_value='fake_stats_1')
|
||||
@mock.patch.object(host.HostState, '_get_vgpu_stats_in_group')
|
||||
def test_get_vgpu_stats_empty_cfg(self, mock_get, mock_update):
|
||||
# no vGPU type configured.
|
||||
self.flags(enabled_vgpu_types=[], group='devices')
|
||||
session = mock.Mock()
|
||||
|
||||
host_obj = host.HostState(session)
|
||||
stats = host_obj._get_vgpu_stats()
|
||||
|
||||
session.call_xenapi.assert_not_called()
|
||||
self.assertEqual(stats, {})
|
||||
|
||||
@mock.patch.object(host.HostState, 'update_status',
|
||||
return_value='fake_stats_1')
|
||||
@mock.patch.object(host.HostState, '_get_vgpu_stats_in_group')
|
||||
def test_get_vgpu_stats_single_type(self, mock_get, mock_update):
|
||||
# configured single vGPU type
|
||||
self.flags(enabled_vgpu_types=['type_name_1'], group='devices')
|
||||
session = mock.Mock()
|
||||
# multiple GPU groups
|
||||
session.call_xenapi.side_effect = [
|
||||
['grp_ref1', 'grp_ref2'], # GPU_group.get_all
|
||||
'uuid_1', # GPU_group.get_uuid
|
||||
'uuid_2', # GPU_group.get_uuid
|
||||
]
|
||||
# Let it return None for the 2nd GPU group for the case
|
||||
# that it doesn't have the specified vGPU type enabled.
|
||||
mock_get.side_effect = ['fake_stats_1', None]
|
||||
host_obj = host.HostState(session)
|
||||
stats = host_obj._get_vgpu_stats()
|
||||
|
||||
self.assertEqual(session.call_xenapi.call_count, 3)
|
||||
self.assertEqual(mock_update.call_count, 1)
|
||||
self.assertEqual(mock_get.call_count, 2)
|
||||
self.assertEqual(stats, {'uuid_1': 'fake_stats_1'})
|
||||
|
||||
@mock.patch.object(host.HostState, 'update_status',
|
||||
return_value='fake_stats_1')
|
||||
@mock.patch.object(host.HostState, '_get_vgpu_stats_in_group')
|
||||
def test_get_vgpu_stats_multi_types(self, mock_get, mock_update):
|
||||
# when multiple vGPU types configured, it use the first one.
|
||||
self.flags(enabled_vgpu_types=['type_name_1', 'type_name_2'],
|
||||
group='devices')
|
||||
session = mock.Mock()
|
||||
session.call_xenapi.side_effect = [
|
||||
['grp_ref1'], # GPU_group.get_all
|
||||
'uuid_1', # GPU_group.get_uuid
|
||||
]
|
||||
mock_get.side_effect = ['fake_stats_1']
|
||||
host_obj = host.HostState(session)
|
||||
stats = host_obj._get_vgpu_stats()
|
||||
|
||||
self.assertEqual(session.call_xenapi.call_count, 2)
|
||||
self.assertEqual(mock_update.call_count, 1)
|
||||
self.assertEqual(stats, {'uuid_1': 'fake_stats_1'})
|
||||
# called with the first vGPU type: 'type_name_1'
|
||||
mock_get.assert_called_with('grp_ref1', ['type_name_1'])
|
||||
|
||||
@mock.patch.object(host.HostState, 'update_status',
|
||||
return_value='fake_stats_1')
|
||||
@mock.patch.object(host.HostState, '_get_total_vgpu_in_grp',
|
||||
return_value=7)
|
||||
def test_get_vgpu_stats_in_group(self, mock_get, mock_update):
|
||||
# Test it will return vGPU stat for the enabled vGPU type.
|
||||
enabled_vgpu_types = ['type_name_2']
|
||||
session = mock.Mock()
|
||||
session.call_xenapi.side_effect = [
|
||||
['type_ref_1', 'type_ref_2'], # GPU_group.get_enabled_VGPU_types
|
||||
'type_name_1', # VGPU_type.get_model_name
|
||||
'type_name_2', # VGPU_type.get_model_name
|
||||
'type_uuid_2', # VGPU_type.get_uuid
|
||||
'4', # VGPU_type.get_max_heads
|
||||
]
|
||||
host_obj = host.HostState(session)
|
||||
|
||||
stats = host_obj._get_vgpu_stats_in_group('grp_ref',
|
||||
enabled_vgpu_types)
|
||||
|
||||
expect_stats = {'uuid': 'type_uuid_2',
|
||||
'type_name': 'type_name_2',
|
||||
'max_heads': 4,
|
||||
'total': 7,
|
||||
}
|
||||
self.assertEqual(session.call_xenapi.call_count, 5)
|
||||
# It should get_uuid for the vGPU type passed via *enabled_vgpu_types*
|
||||
# (the arg for get_uuid should be 'type_ref_2').
|
||||
get_uuid_call = [mock.call('VGPU_type.get_uuid', 'type_ref_2')]
|
||||
session.call_xenapi.assert_has_calls(get_uuid_call)
|
||||
mock_get.assert_called_once()
|
||||
self.assertEqual(expect_stats, stats)
|
||||
|
||||
@mock.patch.object(host.HostState, 'update_status')
|
||||
@mock.patch.object(host.HostState, '_get_total_vgpu_in_grp',
|
||||
return_value=7)
|
||||
def test_get_vgpu_stats_in_group_multiple(self, mock_get, mock_update):
|
||||
# Test when enabled multiple vGPU types in the same group.
|
||||
# It should only return the first vGPU type's stats.
|
||||
enabled_vgpu_types = ['type_name_1', 'type_name_2']
|
||||
session = mock.Mock()
|
||||
session.call_xenapi.side_effect = [
|
||||
['type_ref_1', 'type_ref_2'], # GPU_group.get_enabled_VGPU_types
|
||||
'type_name_1', # VGPU_type.get_model_name
|
||||
'type_name_2', # VGPU_type.get_model_name
|
||||
'type_uuid_1', # VGPU_type.get_uuid
|
||||
'4', # VGPU_type.get_max_heads
|
||||
]
|
||||
host_obj = host.HostState(session)
|
||||
|
||||
stats = host_obj._get_vgpu_stats_in_group('grp_ref',
|
||||
enabled_vgpu_types)
|
||||
|
||||
expect_stats = {
|
||||
'uuid': 'type_uuid_1',
|
||||
'type_name': 'type_name_1',
|
||||
'max_heads': 4,
|
||||
'total': 7,
|
||||
}
|
||||
self.assertEqual(session.call_xenapi.call_count, 5)
|
||||
# It should call get_uuid for the first vGPU type (the arg for get_uuid
|
||||
# should be 'type_ref_1').
|
||||
get_uuid_call = [mock.call('VGPU_type.get_uuid', 'type_ref_1')]
|
||||
session.call_xenapi.assert_has_calls(get_uuid_call)
|
||||
mock_get.assert_called_once()
|
||||
self.assertEqual(expect_stats, stats)
|
||||
|
||||
@mock.patch.object(host.HostState, 'update_status')
|
||||
@mock.patch.object(host.HostState, '_get_total_vgpu_in_grp',
|
||||
return_value=7)
|
||||
def test_get_vgpu_stats_in_group_cfg_not_in_grp(self, mock_get,
|
||||
mock_update):
|
||||
# Test when the enable_vgpu_types is not a valid
|
||||
# type belong to the GPU group. It will return None.
|
||||
enabled_vgpu_types = ['bad_type_name']
|
||||
session = mock.Mock()
|
||||
session.call_xenapi.side_effect = [
|
||||
['type_ref_1', 'type_ref_2'], # GPU_group.get_enabled_VGPU_types
|
||||
'type_name_1', # VGPU_type.get_model_name
|
||||
'type_name_2', # VGPU_type.get_model_name
|
||||
]
|
||||
host_obj = host.HostState(session)
|
||||
|
||||
stats = host_obj._get_vgpu_stats_in_group('grp_ref',
|
||||
enabled_vgpu_types)
|
||||
|
||||
expect_stats = None
|
||||
self.assertEqual(session.call_xenapi.call_count, 3)
|
||||
mock_get.assert_not_called()
|
||||
self.assertEqual(expect_stats, stats)
|
||||
|
||||
@mock.patch.object(host.HostState, 'update_status')
|
||||
def test_get_total_vgpu_in_grp(self, mock_update):
|
||||
session = mock.Mock()
|
||||
# The fake PGPU records returned from call_xenapi's string function:
|
||||
# "PGPU.get_all_records_where".
|
||||
pgpu_records = {
|
||||
'pgpu_ref1': {
|
||||
'enabled_VGPU_types': ['type_ref1', 'type_ref2'],
|
||||
'supported_VGPU_max_capacities': {
|
||||
'type_ref1': '1',
|
||||
'type_ref2': '3',
|
||||
}
|
||||
},
|
||||
'pgpu_ref2': {
|
||||
'enabled_VGPU_types': ['type_ref1', 'type_ref2'],
|
||||
'supported_VGPU_max_capacities': {
|
||||
'type_ref1': '1',
|
||||
'type_ref2': '3',
|
||||
}
|
||||
}
|
||||
}
|
||||
session.call_xenapi.return_value = pgpu_records
|
||||
host_obj = host.HostState(session)
|
||||
|
||||
total = host_obj._get_total_vgpu_in_grp('grp_ref', 'type_ref1')
|
||||
|
||||
session.call_xenapi.assert_called_with(
|
||||
'PGPU.get_all_records_where', 'field "GPU_group" = "grp_ref"')
|
||||
# The total amount of VGPUs is equal to sum of vaiable VGPU of
|
||||
# 'type_ref1' in all PGPUs.
|
||||
self.assertEqual(total, 2)
|
@ -2230,12 +2230,14 @@ class XenAPIHostTestCase(stubs.XenAPITestBase):
|
||||
|
||||
@mock.patch.object(host.HostState, 'get_disk_used')
|
||||
@mock.patch.object(host.HostState, '_get_passthrough_devices')
|
||||
@mock.patch.object(host.HostState, '_get_vgpu_stats')
|
||||
@mock.patch.object(jsonutils, 'loads')
|
||||
@mock.patch.object(vm_utils, 'list_vms')
|
||||
@mock.patch.object(vm_utils, 'scan_default_sr')
|
||||
@mock.patch.object(host_management, 'get_host_data')
|
||||
def test_update_stats_caches_hostname(self, mock_host_data, mock_scan_sr,
|
||||
mock_list_vms, mock_loads,
|
||||
mock_vgpus_stats,
|
||||
mock_devices, mock_dis_used):
|
||||
data = {'disk_total': 0,
|
||||
'disk_used': 0,
|
||||
@ -2266,10 +2268,12 @@ class XenAPIHostTestCase(stubs.XenAPITestBase):
|
||||
self.assertEqual(2, mock_host_data.call_count)
|
||||
self.assertEqual(2, mock_scan_sr.call_count)
|
||||
self.assertEqual(2, mock_devices.call_count)
|
||||
self.assertEqual(2, mock_vgpus_stats.call_count)
|
||||
mock_loads.assert_called_with(data)
|
||||
mock_host_data.assert_called_with(self.conn._session)
|
||||
mock_scan_sr.assert_called_with(self.conn._session)
|
||||
mock_devices.assert_called_with()
|
||||
mock_vgpus_stats.assert_called_with()
|
||||
|
||||
|
||||
@mock.patch.object(host.HostState, 'update_status')
|
||||
|
@ -68,7 +68,8 @@ from nova.i18n import _
|
||||
|
||||
|
||||
_CLASSES = ['host', 'network', 'session', 'pool', 'SR', 'VBD',
|
||||
'PBD', 'VDI', 'VIF', 'PIF', 'VM', 'VLAN', 'task']
|
||||
'PBD', 'VDI', 'VIF', 'PIF', 'VM', 'VLAN', 'task',
|
||||
'GPU_group', 'PGPU', 'VGPU_type']
|
||||
_after_create_functions = {}
|
||||
_destroy_functions = {}
|
||||
|
||||
|
@ -220,6 +220,110 @@ class HostState(object):
|
||||
|
||||
return passthrough_devices
|
||||
|
||||
def _get_vgpu_stats(self):
|
||||
"""Invoke XenAPI to get the stats for VGPUs.
|
||||
|
||||
The return value is a dict which has GPU groups' uuid as
|
||||
the keys:
|
||||
dict(grp_uuid_1=dict_vgpu_stats_in_grp_1,
|
||||
grp_uuid_2=dict_vgpu_stats_in_grp_2,
|
||||
...,
|
||||
grp_uuid_n=dict_vgpu_stats_in_grp_n)
|
||||
The `dict_vgpu_stats_in_grp_x` is a dict represents the
|
||||
vGPU stats in GPU group x. For details, please refer to
|
||||
the return value of the function of _get_vgpu_stats_in_group().
|
||||
"""
|
||||
if not CONF.devices.enabled_vgpu_types:
|
||||
return {}
|
||||
|
||||
vgpu_stats = {}
|
||||
|
||||
# NOTE(jianghuaw): If there are multiple vGPU types enabled in
|
||||
# the configure option, we only choose the first one so that
|
||||
# we support only one vGPU type per compute node at the moment.
|
||||
# Once we switch to use the nested resource providers, we will
|
||||
# remove these lines to allow multiple vGPU types within multiple
|
||||
# GPU groups (each group has a different vGPU type enabled).
|
||||
if len(CONF.devices.enabled_vgpu_types) > 1:
|
||||
LOG.warning('XenAPI only supports one GPU type per compute node,'
|
||||
' only first type will be used.')
|
||||
cfg_enabled_types = CONF.devices.enabled_vgpu_types[:1]
|
||||
|
||||
vgpu_grp_refs = self._session.call_xenapi('GPU_group.get_all')
|
||||
for ref in vgpu_grp_refs:
|
||||
grp_uuid = self._session.call_xenapi('GPU_group.get_uuid', ref)
|
||||
stat = self._get_vgpu_stats_in_group(ref, cfg_enabled_types)
|
||||
if stat:
|
||||
vgpu_stats[grp_uuid] = stat
|
||||
|
||||
LOG.debug("Returning vGPU stats: %s", vgpu_stats)
|
||||
|
||||
return vgpu_stats
|
||||
|
||||
def _get_vgpu_stats_in_group(self, grp_ref, vgpu_types):
|
||||
"""Get stats for the specified vGPU types in a GPU group.
|
||||
|
||||
NOTE(Jianghuaw): In XenAPI, a GPU group is the minimal unit
|
||||
from where to create a vGPU for an instance. So here, we
|
||||
report vGPU resources for a particular GPU group. When we use
|
||||
nested resource providers to represent the vGPU resources,
|
||||
each GPU group will be a child resource provider under the
|
||||
compute node.
|
||||
|
||||
The return value is a dict. For example:
|
||||
{'uuid': '6444c6ee-3a49-42f5-bebb-606b52175e67',
|
||||
'total': 7,
|
||||
'max_heads': '1',
|
||||
'type_name': 'Intel GVT-g',
|
||||
}
|
||||
"""
|
||||
type_refs_in_grp = self._session.call_xenapi(
|
||||
'GPU_group.get_enabled_VGPU_types', grp_ref)
|
||||
|
||||
type_names_in_grp = {self._session.call_xenapi(
|
||||
'VGPU_type.get_model_name',
|
||||
type_ref): type_ref
|
||||
for type_ref in type_refs_in_grp}
|
||||
# Get the vGPU types enabled both in this GPU group and in the
|
||||
# nova conf.
|
||||
enabled_types = set(vgpu_types) & set(type_names_in_grp)
|
||||
if not enabled_types:
|
||||
return
|
||||
|
||||
stat = {}
|
||||
# Get the sorted enabled types, so that we can always choose the same
|
||||
# type when there are multiple enabled vGPU types.
|
||||
sorted_types = sorted(enabled_types)
|
||||
chosen_type = sorted_types[0]
|
||||
if len(sorted_types) > 1:
|
||||
LOG.warning('XenAPI only supports one vGPU type per GPU group,'
|
||||
' but enabled multiple vGPU types: %(available)s.'
|
||||
' Choosing the first one: %(chosen)s.',
|
||||
dict(available=sorted_types,
|
||||
chosen=chosen_type))
|
||||
type_ref = type_names_in_grp[chosen_type]
|
||||
type_uuid = self._session.call_xenapi('VGPU_type.get_uuid', type_ref)
|
||||
stat['uuid'] = type_uuid
|
||||
stat['type_name'] = chosen_type
|
||||
stat['max_heads'] = int(self._session.call_xenapi(
|
||||
'VGPU_type.get_max_heads', type_ref))
|
||||
|
||||
stat['total'] = self._get_total_vgpu_in_grp(grp_ref, type_ref)
|
||||
return stat
|
||||
|
||||
def _get_total_vgpu_in_grp(self, grp_ref, type_ref):
|
||||
"""Get the total capacity of vGPUs in the group."""
|
||||
pgpu_recs = self._session.call_xenapi(
|
||||
'PGPU.get_all_records_where', 'field "GPU_group" = "%s"' % grp_ref)
|
||||
|
||||
total = 0
|
||||
for pgpu_ref in pgpu_recs:
|
||||
pgpu_rec = pgpu_recs[pgpu_ref]
|
||||
if type_ref in pgpu_rec['enabled_VGPU_types']:
|
||||
cap = pgpu_rec['supported_VGPU_max_capacities'][type_ref]
|
||||
total += int(cap)
|
||||
return total
|
||||
|
||||
def get_host_stats(self, refresh=False):
|
||||
"""Return the current state of the host. If 'refresh' is
|
||||
True, run the update first.
|
||||
@ -309,6 +413,7 @@ class HostState(object):
|
||||
vcpus_used = vcpus_used + int(vm_rec['VCPUs_max'])
|
||||
data['vcpus_used'] = vcpus_used
|
||||
data['pci_passthrough_devices'] = self._get_passthrough_devices()
|
||||
data['vgpu_stats'] = self._get_vgpu_stats()
|
||||
self._stats = data
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user