diff --git a/kernel-rt/debian/patches/0059-intel_idle-add-SPR-support.patch b/kernel-rt/debian/patches/0059-intel_idle-add-SPR-support.patch new file mode 100644 index 00000000..f2224b92 --- /dev/null +++ b/kernel-rt/debian/patches/0059-intel_idle-add-SPR-support.patch @@ -0,0 +1,122 @@ +From 3fc0e4cf93e9074f99f3b30ac65062f72f94adf1 Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 2 Mar 2022 10:15:58 +0200 +Subject: [PATCH] intel_idle: add SPR support + +Add Sapphire Rapids Xeon support. + +Up until very recently, the C1 and C1E C-states were independent, but this +has changed in some new chips, including Sapphire Rapids Xeon (SPR). In these +chips the C1 and C1E states cannot be enabled at the same time. The "C1E +promotion" bit in 'MSR_IA32_POWER_CTL' also has its semantics changed a bit. + +Here are the C1, C1E, and "C1E promotion" bit rules on Xeons before SPR. + +1. If C1E promotion bit is disabled. + a. C1 requests end up with C1 C-state. + b. C1E requests end up with C1E C-state. +2. If C1E promotion bit is enabled. + a. C1 requests end up with C1E C-state. + b. C1E requests end up with C1E C-state. + +Here are the C1, C1E, and "C1E promotion" bit rules on Sapphire Rapids Xeon. +1. If C1E promotion bit is disabled. + a. C1 requests end up with C1 C-state. + b. C1E requests end up with C1 C-state. +2. If C1E promotion bit is enabled. + a. C1 requests end up with C1E C-state. + b. C1E requests end up with C1E C-state. + +Before SPR Xeon, the 'intel_idle' driver was disabling C1E promotion and was +exposing C1 and C1E as independent C-states. But on SPR, C1 and C1E cannot be +enabled at the same time. + +This patch adds both C1 and C1E states. However, C1E is marked as with the +"CPUIDLE_FLAG_UNUSABLE" flag, which means that in won't be registered by +default. The C1E promotion bit will be cleared, which means that by default +only C1 and C6 will be registered on SPR. + +The next patch will add an option for enabling C1E and disabling C1 on SPR. + +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 9edf3c0ffef0ec1bed8300315852b5c6a0997130) +[mvb: Fix merge conflict caused by a lack of IceLake-D support in + StarlingX.] +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 47 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index b92b032fb6d1..7694d852b49d 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -778,6 +778,46 @@ static struct cpuidle_state icx_cstates[] __initdata = { + .enter = NULL } + }; + ++/* ++ * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice ++ * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in ++ * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 ++ * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then ++ * both C1 and C1E requests end up with C1, so there is effectively no C1E. ++ * ++ * By default we enable C1 and disable C1E by marking it with ++ * 'CPUIDLE_FLAG_UNUSABLE'. ++ */ ++static struct cpuidle_state spr_cstates[] __initdata = { ++ { ++ .name = "C1", ++ .desc = "MWAIT 0x00", ++ .flags = MWAIT2flg(0x00), ++ .exit_latency = 1, ++ .target_residency = 1, ++ .enter = &intel_idle, ++ .enter_s2idle = intel_idle_s2idle, }, ++ { ++ .name = "C1E", ++ .desc = "MWAIT 0x01", ++ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ ++ CPUIDLE_FLAG_UNUSABLE, ++ .exit_latency = 2, ++ .target_residency = 4, ++ .enter = &intel_idle, ++ .enter_s2idle = intel_idle_s2idle, }, ++ { ++ .name = "C6", ++ .desc = "MWAIT 0x20", ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .exit_latency = 290, ++ .target_residency = 800, ++ .enter = &intel_idle, ++ .enter_s2idle = intel_idle_s2idle, }, ++ { ++ .enter = NULL } ++}; ++ + static struct cpuidle_state atom_cstates[] __initdata = { + { + .name = "C1E", +@@ -1088,6 +1128,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = { + .use_acpi = true, + }; + ++static const struct idle_cpu idle_cpu_spr __initconst = { ++ .state_table = spr_cstates, ++ .disable_promotion_to_c1e = true, ++ .use_acpi = true, ++}; ++ + static const struct idle_cpu idle_cpu_avn __initconst = { + .state_table = avn_cstates, + .disable_promotion_to_c1e = true, +@@ -1143,6 +1189,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), ++ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), diff --git a/kernel-rt/debian/patches/0060-intel_idle-add-preferred_cstates-module-argument.patch b/kernel-rt/debian/patches/0060-intel_idle-add-preferred_cstates-module-argument.patch new file mode 100644 index 00000000..2ce5deb1 --- /dev/null +++ b/kernel-rt/debian/patches/0060-intel_idle-add-preferred_cstates-module-argument.patch @@ -0,0 +1,130 @@ +From 0e408069e2c1e49b70e3db7e86201072615681e1 Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 2 Mar 2022 10:15:59 +0200 +Subject: [PATCH] intel_idle: add 'preferred_cstates' module argument + +On Sapphire Rapids Xeon (SPR) the C1 and C1E states are basically mutually +exclusive - only one of them can be enabled. By default, 'intel_idle' driver +enables C1 and disables C1E. However, some users prefer to use C1E instead of +C1, because it saves more energy. + +This patch adds a new module parameter ('preferred_cstates') for enabling C1E +and disabling C1. Here is the idea behind it. + +1. This option has effect only for "mutually exclusive" C-states like C1 and + C1E on SPR. +2. It does not have any effect on independent C-states, which do not require + other C-states to be disabled (most states on most platforms as of today). +3. For mutually exclusive C-states, the 'intel_idle' driver always has a + reasonable default, such as enabling C1 on SPR by default. On other + platforms, the default may be different. +4. Users can override the default using the 'preferred_cstates' parameter. +5. The parameter accepts the preferred C-states bit-mask, similarly to the + existing 'states_off' parameter. +6. This parameter is not limited to C1/C1E, and leaves room for supporting + other mutually exclusive C-states, if they come in the future. + +Today 'intel_idle' can only be compiled-in, which means that on SPR, in order +to disable C1 and enable C1E, users should boot with the following kernel +argument: intel_idle.preferred_cstates=4 + +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit da0e58c038e60e7e65d30813ebdfe91687aa8a24) +[mvb: Fix merge conflicts caused by the lack of commit 642333384991 + ("intel_idle: Adjust the SKX C6 parameters if PC6 is disabled") + in StarlingX's v5.10 kernel.] +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 46 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 46 insertions(+) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index 7694d852b49d..6837a5fa0214 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -66,6 +66,7 @@ static struct cpuidle_driver intel_idle_driver = { + /* intel_idle.max_cstate=0 disables driver */ + static int max_cstate = CPUIDLE_STATE_MAX - 1; + static unsigned int disabled_states_mask; ++static unsigned int preferred_states_mask; + + static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; + +@@ -1377,6 +1378,8 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } + static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } + #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ + ++static void c1e_promotion_enable(void); ++ + /** + * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. + * +@@ -1517,6 +1520,26 @@ static void __init sklh_idle_state_table_update(void) + skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ + } + ++/** ++ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. ++ */ ++static void __init spr_idle_state_table_update(void) ++{ ++ /* Check if user prefers C1E over C1. */ ++ if (preferred_states_mask & BIT(2)) { ++ if (preferred_states_mask & BIT(1)) ++ /* Both can't be enabled, stick to the defaults. */ ++ return; ++ ++ spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; ++ spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; ++ ++ /* Enable C1E using the "C1E promotion" bit. */ ++ c1e_promotion_enable(); ++ disable_promotion_to_c1e = false; ++ } ++} ++ + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) + { + unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; +@@ -1548,6 +1571,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) + case INTEL_FAM6_SKYLAKE: + sklh_idle_state_table_update(); + break; ++ case INTEL_FAM6_SAPPHIRERAPIDS_X: ++ spr_idle_state_table_update(); ++ break; + } + + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { +@@ -1625,6 +1651,15 @@ static void auto_demotion_disable(void) + wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); + } + ++static void c1e_promotion_enable(void) ++{ ++ unsigned long long msr_bits; ++ ++ rdmsrl(MSR_IA32_POWER_CTL, msr_bits); ++ msr_bits |= 0x2; ++ wrmsrl(MSR_IA32_POWER_CTL, msr_bits); ++} ++ + static void c1e_promotion_disable(void) + { + unsigned long long msr_bits; +@@ -1794,3 +1829,14 @@ module_param(max_cstate, int, 0444); + */ + module_param_named(states_off, disabled_states_mask, uint, 0444); + MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); ++/* ++ * Some platforms come with mutually exclusive C-states, so that if one is ++ * enabled, the other C-states must not be used. Example: C1 and C1E on ++ * Sapphire Rapids platform. This parameter allows for selecting the ++ * preferred C-states among the groups of mutually exclusive C-states - the ++ * selected C-states will be registered, the other C-states from the mutually ++ * exclusive group won't be registered. If the platform has no mutually ++ * exclusive C-states, this parameter has no effect. ++ */ ++module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); ++MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); diff --git a/kernel-rt/debian/patches/0061-intel_idle-add-core-C6-optimization-for-SPR.patch b/kernel-rt/debian/patches/0061-intel_idle-add-core-C6-optimization-for-SPR.patch new file mode 100644 index 00000000..bcb57e8c --- /dev/null +++ b/kernel-rt/debian/patches/0061-intel_idle-add-core-C6-optimization-for-SPR.patch @@ -0,0 +1,50 @@ +From 0c58f0b289baec96167af2c61df75912d115c2d8 Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 2 Mar 2022 10:16:00 +0200 +Subject: [PATCH] intel_idle: add core C6 optimization for SPR + +Add a Sapphire Rapids Xeon C6 optimization, similar to what we have for Sky Lake +Xeon: if package C6 is disabled, adjust C6 exit latency and target residency to +match core C6 values, instead of using the default package C6 values. + +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 3a9cf77b60dc9839b6674943bb7c9dcd524b6294) +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index 6837a5fa0214..8e4c41dab7ce 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -1525,6 +1525,8 @@ static void __init sklh_idle_state_table_update(void) + */ + static void __init spr_idle_state_table_update(void) + { ++ unsigned long long msr; ++ + /* Check if user prefers C1E over C1. */ + if (preferred_states_mask & BIT(2)) { + if (preferred_states_mask & BIT(1)) +@@ -1538,6 +1540,19 @@ static void __init spr_idle_state_table_update(void) + c1e_promotion_enable(); + disable_promotion_to_c1e = false; + } ++ ++ /* ++ * By default, the C6 state assumes the worst-case scenario of package ++ * C6. However, if PC6 is disabled, we update the numbers to match ++ * core C6. ++ */ ++ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); ++ ++ /* Limit value 2 and above allow for PC6. */ ++ if ((msr & 0x7) < 2) { ++ spr_cstates[2].exit_latency = 190; ++ spr_cstates[2].target_residency = 600; ++ } + } + + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) diff --git a/kernel-rt/debian/patches/0062-intel_idle-Fix-the-preferred_cstates-module-paramete.patch b/kernel-rt/debian/patches/0062-intel_idle-Fix-the-preferred_cstates-module-paramete.patch new file mode 100644 index 00000000..6d567aa5 --- /dev/null +++ b/kernel-rt/debian/patches/0062-intel_idle-Fix-the-preferred_cstates-module-paramete.patch @@ -0,0 +1,101 @@ +From 2e2f50c5e9b45c4c47e592c811439ba413ee2043 Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 27 Apr 2022 09:08:52 +0300 +Subject: [PATCH] intel_idle: Fix the 'preferred_cstates' module parameter + +Problem description. + +When user boots kernel up with the 'intel_idle.preferred_cstates=4' option, +we enable C1E and disable C1 states on Sapphire Rapids Xeon (SPR). In order +for C1E to work on SPR, we have to enable the C1E promotion bit on all +CPUs. However, we enable it only on one CPU. + +Fix description. + +The 'intel_idle' driver already has the infrastructure for disabling C1E +promotion on every CPU. This patch uses the same infrastructure for +enabling C1E promotion on every CPU. It changes the boolean +'disable_promotion_to_c1e' variable to a tri-state 'c1e_promotion' +variable. + +Tested on a 2-socket SPR system. I verified the following combinations: + + * C1E promotion enabled and disabled in BIOS. + * Booted with and without the 'intel_idle.preferred_cstates=4' kernel + argument. + +In all 4 cases C1E promotion was correctly set on all CPUs. + +Also tested on an old Broadwell system, just to make sure it does not cause +a regression. C1E promotion was correctly disabled on that system, both C1 +and C1E were exposed (as expected). + +Fixes: da0e58c038e6 ("intel_idle: add 'preferred_cstates' module argument") +Reported-by: Jan Beulich +Signed-off-by: Artem Bityutskiy +[ rjw: Minor changelog edits ] +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 39c184a6a9a7a99950b321d55fe713175cf1d404) +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index 8e4c41dab7ce..f7da2031c994 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -71,7 +71,12 @@ static unsigned int preferred_states_mask; + static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; + + static unsigned long auto_demotion_disable_flags; +-static bool disable_promotion_to_c1e; ++ ++static enum { ++ C1E_PROMOTION_PRESERVE, ++ C1E_PROMOTION_ENABLE, ++ C1E_PROMOTION_DISABLE ++} c1e_promotion = C1E_PROMOTION_PRESERVE; + + struct idle_cpu { + struct cpuidle_state *state_table; +@@ -1378,8 +1383,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } + static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } + #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ + +-static void c1e_promotion_enable(void); +- + /** + * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. + * +@@ -1537,8 +1540,7 @@ static void __init spr_idle_state_table_update(void) + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + + /* Enable C1E using the "C1E promotion" bit. */ +- c1e_promotion_enable(); +- disable_promotion_to_c1e = false; ++ c1e_promotion = C1E_PROMOTION_ENABLE; + } + + /* +@@ -1706,7 +1708,9 @@ static int intel_idle_cpu_init(unsigned int cpu) + if (auto_demotion_disable_flags) + auto_demotion_disable(); + +- if (disable_promotion_to_c1e) ++ if (c1e_promotion == C1E_PROMOTION_ENABLE) ++ c1e_promotion_enable(); ++ else if (c1e_promotion == C1E_PROMOTION_DISABLE) + c1e_promotion_disable(); + + return 0; +@@ -1785,7 +1789,8 @@ static int __init intel_idle_init(void) + if (icpu) { + cpuidle_state_table = icpu->state_table; + auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; +- disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; ++ if (icpu->disable_promotion_to_c1e) ++ c1e_promotion = C1E_PROMOTION_DISABLE; + if (icpu->use_acpi || force_use_acpi) + intel_idle_acpi_cst_extract(); + } else if (!intel_idle_acpi_cst_extract()) { diff --git a/kernel-rt/debian/patches/0063-intel_idle-Fix-SPR-C6-optimization.patch b/kernel-rt/debian/patches/0063-intel_idle-Fix-SPR-C6-optimization.patch new file mode 100644 index 00000000..416cf8a8 --- /dev/null +++ b/kernel-rt/debian/patches/0063-intel_idle-Fix-SPR-C6-optimization.patch @@ -0,0 +1,47 @@ +From 5ce5fbe31839dc984ea8ada2daabb57438f2332d Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 27 Apr 2022 09:08:53 +0300 +Subject: [PATCH] intel_idle: Fix SPR C6 optimization + +The Sapphire Rapids (SPR) C6 optimization was added to the end of the +'spr_idle_state_table_update()' function. However, the function has a +'return' which may happen before the optimization has a chance to run. +And this may prevent the optimization from happening. + +This is an unlikely scenario, but possible if user boots with, say, +the 'intel_idle.preferred_cstates=6' kernel boot option. + +This patch fixes the issue by eliminating the problematic 'return' +statement. + +Fixes: 3a9cf77b60dc ("intel_idle: add core C6 optimization for SPR") +Suggested-by: Jan Beulich +Reported-by: Jan Beulich +Signed-off-by: Artem Bityutskiy +[ rjw: Minor changelog edits ] +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 7eac3bd38d18cd3317756649921b8264ddfee692) +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index f7da2031c994..fcd086916cfa 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -1531,11 +1531,9 @@ static void __init spr_idle_state_table_update(void) + unsigned long long msr; + + /* Check if user prefers C1E over C1. */ +- if (preferred_states_mask & BIT(2)) { +- if (preferred_states_mask & BIT(1)) +- /* Both can't be enabled, stick to the defaults. */ +- return; +- ++ if ((preferred_states_mask & BIT(2)) && ++ !(preferred_states_mask & BIT(1))) { ++ /* Disable C1 and enable C1E. */ + spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + diff --git a/kernel-rt/debian/patches/0064-intel_idle-make-SPR-C1-and-C1E-be-independent.patch b/kernel-rt/debian/patches/0064-intel_idle-make-SPR-C1-and-C1E-be-independent.patch new file mode 100644 index 00000000..c824c535 --- /dev/null +++ b/kernel-rt/debian/patches/0064-intel_idle-make-SPR-C1-and-C1E-be-independent.patch @@ -0,0 +1,85 @@ +From 244af11009534272b87ca70a12d91489eab6607c Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Sat, 16 Jul 2022 09:26:55 +0300 +Subject: [PATCH] intel_idle: make SPR C1 and C1E be independent + +This patch partially reverts the changes made by the following commit: + +da0e58c038e6 intel_idle: add 'preferred_cstates' module argument + +As that commit describes, on early Sapphire Rapids Xeon platforms the C1 and +C1E states were mutually exclusive, so that users could only have either C1 and +C6, or C1E and C6. + +However, Intel firmware engineers managed to remove this limitation and make C1 +and C1E to be completely independent, just like on previous Xeon platforms. + +Therefore, this patch: + * Removes commentary describing the old, and now non-existing SPR C1E + limitation. + * Marks SPR C1E as available by default. + * Removes the 'preferred_cstates' parameter handling for SPR. Both C1 and + C1E will be available regardless of 'preferred_cstates' value. + +We expect that all SPR systems are shipping with new firmware, which includes +the C1/C1E improvement. + +Cc: v5.18+ # v5.18+ +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 1548fac47a114b42063def551eb152a536ed9697) +[mvb: Adapt to context in drivers/idle/intel_idle.c] +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 24 +----------------------- + 1 file changed, 1 insertion(+), 23 deletions(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index fcd086916cfa..8db9d2f7ee74 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -784,16 +784,6 @@ static struct cpuidle_state icx_cstates[] __initdata = { + .enter = NULL } + }; + +-/* +- * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice +- * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in +- * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 +- * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then +- * both C1 and C1E requests end up with C1, so there is effectively no C1E. +- * +- * By default we enable C1 and disable C1E by marking it with +- * 'CPUIDLE_FLAG_UNUSABLE'. +- */ + static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1", +@@ -806,8 +796,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1E", + .desc = "MWAIT 0x01", +- .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ +- CPUIDLE_FLAG_UNUSABLE, ++ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, +@@ -1530,17 +1519,6 @@ static void __init spr_idle_state_table_update(void) + { + unsigned long long msr; + +- /* Check if user prefers C1E over C1. */ +- if ((preferred_states_mask & BIT(2)) && +- !(preferred_states_mask & BIT(1))) { +- /* Disable C1 and enable C1E. */ +- spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; +- spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; +- +- /* Enable C1E using the "C1E promotion" bit. */ +- c1e_promotion = C1E_PROMOTION_ENABLE; +- } +- + /* + * By default, the C6 state assumes the worst-case scenario of package + * C6. However, if PC6 is disabled, we update the numbers to match diff --git a/kernel-rt/debian/patches/0065-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch b/kernel-rt/debian/patches/0065-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch new file mode 100644 index 00000000..10f16857 --- /dev/null +++ b/kernel-rt/debian/patches/0065-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch @@ -0,0 +1,96 @@ +From 024c741f7c7c015989ee10734df060328de9c50f Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 25 Jun 2021 21:32:47 +0800 +Subject: [PATCH] perf/x86/cstate: Add ICELAKE_X and ICELAKE_D support + +Introduce icx_cstates for ICELAKE_X and ICELAKE_D, and also update the +comments. + +On ICELAKE_X and ICELAKE_D, Core C1, Core C6, Package C2 and Package C6 +Residency MSRs are supported. + +This patch has been tested on real hardware. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Kan Liang +Acked-by: Artem Bityutskiy +Link: https://lkml.kernel.org/r/20210625133247.2813-1-rui.zhang@intel.com +(cherry picked from commit 87bf399f86ecf36cc84fbeb7027a2995af649d6e) +[mvb: Adapt to context, caused by missing AlderLake and RocketLake + support in the v5.10 kernel. The contextual differences include + comments as well as the code.] +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/intel/cstate.c | 23 +++++++++++++++++------ + 1 file changed, 17 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c +index 4eb7ee5fed72..16f2d91201b7 100644 +--- a/arch/x86/events/intel/cstate.c ++++ b/arch/x86/events/intel/cstate.c +@@ -40,7 +40,7 @@ + * Model specific counters: + * MSR_CORE_C1_RES: CORE C1 Residency Counter + * perf code: 0x00 +- * Available model: SLM,AMT,GLM,CNL,TNT ++ * Available model: SLM,AMT,GLM,CNL,ICX,TNT + * Scope: Core (each processor core has a MSR) + * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter + * perf code: 0x01 +@@ -50,8 +50,8 @@ + * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, +- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, +- * TNT ++ * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, ++ * TGL,TNT + * Scope: Core + * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter + * perf code: 0x03 +@@ -61,7 +61,7 @@ + * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. + * perf code: 0x00 + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, +- * KBL,CML,ICL,TGL,TNT ++ * KBL,CML,ICL,ICX,TGL,TNT + * Scope: Package (physical package) + * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. + * perf code: 0x01 +@@ -71,8 +71,8 @@ + * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, +- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, +- * TNT ++ * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, ++ * TGL,TNT + * Scope: Package (physical package) + * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. + * perf code: 0x03 +@@ -563,6 +563,14 @@ static const struct cstate_model icl_cstates __initconst = { + BIT(PERF_CSTATE_PKG_C10_RES), + }; + ++static const struct cstate_model icx_cstates __initconst = { ++ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | ++ BIT(PERF_CSTATE_CORE_C6_RES), ++ ++ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | ++ BIT(PERF_CSTATE_PKG_C6_RES), ++}; ++ + static const struct cstate_model slm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), +@@ -647,6 +655,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { + + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), ++ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates), ++ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), ++ + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), + { }, diff --git a/kernel-rt/debian/patches/0066-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch b/kernel-rt/debian/patches/0066-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch new file mode 100644 index 00000000..b7b0e122 --- /dev/null +++ b/kernel-rt/debian/patches/0066-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch @@ -0,0 +1,62 @@ +From 9f37372d9a7d79331a114a4aa4a9f2c39870d530 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 15 Apr 2022 18:45:20 +0800 +Subject: [PATCH] perf/x86/cstate: Add SAPPHIRERAPIDS_X CPU support + +From the perspective of Intel cstate residency counters, +SAPPHIRERAPIDS_X is the same as ICELAKE_X. + +Share the code with it. And update the comments for SAPPHIRERAPIDS_X. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Kan Liang +Link: https://lkml.kernel.org/r/20220415104520.2737004-1-rui.zhang@intel.com +(cherry picked from commit 528c9f1daf20da14d3e7348dc4b1d7c55743ee64) +[mvb: Adapt to contextual differences in the v5.10 kernel, caused by + missing AlderLake, RocketLake and RaptorLake support. The contextual + differences are in the comments only.] +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/intel/cstate.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c +index 16f2d91201b7..ffe6c493baf6 100644 +--- a/arch/x86/events/intel/cstate.c ++++ b/arch/x86/events/intel/cstate.c +@@ -51,7 +51,7 @@ + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, +- * TGL,TNT ++ * TGL,TNT,SPR + * Scope: Core + * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter + * perf code: 0x03 +@@ -61,7 +61,7 @@ + * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. + * perf code: 0x00 + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, +- * KBL,CML,ICL,ICX,TGL,TNT ++ * KBL,CML,ICL,ICX,TGL,TNT,SPR + * Scope: Package (physical package) + * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. + * perf code: 0x01 +@@ -72,7 +72,7 @@ + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, +- * TGL,TNT ++ * TGL,TNT,SPR + * Scope: Package (physical package) + * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. + * perf code: 0x03 +@@ -657,6 +657,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), ++ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), diff --git a/kernel-rt/debian/patches/0067-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch b/kernel-rt/debian/patches/0067-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch new file mode 100644 index 00000000..95183683 --- /dev/null +++ b/kernel-rt/debian/patches/0067-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch @@ -0,0 +1,152 @@ +From 442408cb24dd389c50f6af222d1fb4b403458814 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Tue, 7 Dec 2021 21:17:34 +0800 +Subject: [PATCH] powercap: intel_rapl: support new layout of Psys PowerLimit + Register on SPR + +On Sapphire Rapids, the layout of the Psys domain Power Limit Register +is different from from what it was before. + +Enhance the code to support the new Psys PL register layout. + +Signed-off-by: Zhang Rui +Reported-and-tested-by: Alkattan Dana +[ rjw: Subject and changelog edits ] +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 931da6a0de5d620425af4425344259e6ff46b654) +Signed-off-by: M. Vefa Bicakci +--- + drivers/powercap/intel_rapl_common.c | 61 +++++++++++++++++++++++++++- + include/linux/intel_rapl.h | 6 +++ + 2 files changed, 65 insertions(+), 2 deletions(-) + +diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c +index 285420c1eb7c..da90c7e52122 100644 +--- a/drivers/powercap/intel_rapl_common.c ++++ b/drivers/powercap/intel_rapl_common.c +@@ -61,6 +61,20 @@ + #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff + #define PP_POLICY_MASK 0x1F + ++/* ++ * SPR has different layout for Psys Domain PowerLimit registers. ++ * There are 17 bits of PL1 and PL2 instead of 15 bits. ++ * The Enable bits and TimeWindow bits are also shifted as a result. ++ */ ++#define PSYS_POWER_LIMIT1_MASK 0x1FFFF ++#define PSYS_POWER_LIMIT1_ENABLE BIT(17) ++ ++#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32) ++#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49) ++ ++#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) ++#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) ++ + /* Non HW constants */ + #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ + #define RAPL_PRIMITIVE_DUMMY BIT(2) +@@ -97,6 +111,7 @@ struct rapl_defaults { + bool to_raw); + unsigned int dram_domain_energy_unit; + unsigned int psys_domain_energy_unit; ++ bool spr_psys_bits; + }; + static struct rapl_defaults *rapl_defaults; + +@@ -669,12 +684,51 @@ static struct rapl_primitive_info rpi[] = { + RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), + PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, + RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, ++ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, ++ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, ++ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, ++ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, ++ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, ++ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), + /* non-hardware */ + PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, + RAPL_PRIMITIVE_DERIVED), + {NULL, 0, 0, 0}, + }; + ++static enum rapl_primitives ++prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) ++{ ++ if (!rapl_defaults->spr_psys_bits) ++ return prim; ++ ++ if (rd->id != RAPL_DOMAIN_PLATFORM) ++ return prim; ++ ++ switch (prim) { ++ case POWER_LIMIT1: ++ return PSYS_POWER_LIMIT1; ++ case POWER_LIMIT2: ++ return PSYS_POWER_LIMIT2; ++ case PL1_ENABLE: ++ return PSYS_PL1_ENABLE; ++ case PL2_ENABLE: ++ return PSYS_PL2_ENABLE; ++ case TIME_WINDOW1: ++ return PSYS_TIME_WINDOW1; ++ case TIME_WINDOW2: ++ return PSYS_TIME_WINDOW2; ++ default: ++ return prim; ++ } ++} ++ + /* Read primitive data based on its related struct rapl_primitive_info. + * if xlate flag is set, return translated data based on data units, i.e. + * time, energy, and power. +@@ -692,7 +746,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd, + enum rapl_primitives prim, bool xlate, u64 *data) + { + u64 value; +- struct rapl_primitive_info *rp = &rpi[prim]; ++ enum rapl_primitives prim_fixed = prim_fixups(rd, prim); ++ struct rapl_primitive_info *rp = &rpi[prim_fixed]; + struct reg_action ra; + int cpu; + +@@ -738,7 +793,8 @@ static int rapl_write_data_raw(struct rapl_domain *rd, + enum rapl_primitives prim, + unsigned long long value) + { +- struct rapl_primitive_info *rp = &rpi[prim]; ++ enum rapl_primitives prim_fixed = prim_fixups(rd, prim); ++ struct rapl_primitive_info *rp = &rpi[prim_fixed]; + int cpu; + u64 bits; + struct reg_action ra; +@@ -983,6 +1039,7 @@ static const struct rapl_defaults rapl_defaults_spr_server = { + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, + .psys_domain_energy_unit = 1000000000, ++ .spr_psys_bits = true, + }; + + static const struct rapl_defaults rapl_defaults_byt = { +diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h +index 50b8398ffd21..acf72c018142 100644 +--- a/include/linux/intel_rapl.h ++++ b/include/linux/intel_rapl.h +@@ -58,6 +58,12 @@ enum rapl_primitives { + THROTTLED_TIME, + PRIORITY_LEVEL, + ++ PSYS_POWER_LIMIT1, ++ PSYS_POWER_LIMIT2, ++ PSYS_PL1_ENABLE, ++ PSYS_PL2_ENABLE, ++ PSYS_TIME_WINDOW1, ++ PSYS_TIME_WINDOW2, + /* below are not raw primitive data */ + AVERAGE_POWER, + NR_RAPL_PRIMITIVES, diff --git a/kernel-rt/debian/patches/0068-perf-x86-rapl-Add-msr-mask-support.patch b/kernel-rt/debian/patches/0068-perf-x86-rapl-Add-msr-mask-support.patch new file mode 100644 index 00000000..8b0c2265 --- /dev/null +++ b/kernel-rt/debian/patches/0068-perf-x86-rapl-Add-msr-mask-support.patch @@ -0,0 +1,72 @@ +From af6f5c9695da99cd0d01ce63c5c60e70cc644fe8 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 5 Feb 2021 00:18:14 +0800 +Subject: [PATCH] perf/x86/rapl: Add msr mask support + +In some cases, when probing a perf MSR, we're probing certain bits of the +MSR instead of the whole register, thus only these bits should be checked. + +For example, for RAPL ENERGY_STATUS MSR, only the lower 32 bits represents +the energy counter, and the higher 32bits are reserved. + +Introduce a new mask field in struct perf_msr to allow probing certain +bits of a MSR. + +This change is transparent to the current perf_msr_probe() users. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Andi Kleen +Link: https://lkml.kernel.org/r/20210204161816.12649-1-rui.zhang@intel.com +(cherry picked from commit ffb20c2e52e8709b5fc9951e8863e31efb1f2cba) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/probe.c | 7 ++++++- + arch/x86/events/probe.h | 7 ++++--- + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c +index 136a1e847254..600bf8d15c0c 100644 +--- a/arch/x86/events/probe.c ++++ b/arch/x86/events/probe.c +@@ -28,6 +28,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) + for (bit = 0; bit < cnt; bit++) { + if (!msr[bit].no_check) { + struct attribute_group *grp = msr[bit].grp; ++ u64 mask; + + /* skip entry with no group */ + if (!grp) +@@ -44,8 +45,12 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ + if (rdmsrl_safe(msr[bit].msr, &val)) + continue; ++ ++ mask = msr[bit].mask; ++ if (!mask) ++ mask = ~0ULL; + /* Disable zero counters if requested. */ +- if (!zero && !val) ++ if (!zero && !(val & mask)) + continue; + + grp->is_visible = NULL; +diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h +index 4c8e0afc5fb5..261b9bda24e3 100644 +--- a/arch/x86/events/probe.h ++++ b/arch/x86/events/probe.h +@@ -4,10 +4,11 @@ + #include + + struct perf_msr { +- u64 msr; +- struct attribute_group *grp; ++ u64 msr; ++ struct attribute_group *grp; + bool (*test)(int idx, void *data); +- bool no_check; ++ bool no_check; ++ u64 mask; + }; + + unsigned long diff --git a/kernel-rt/debian/patches/0069-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch b/kernel-rt/debian/patches/0069-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch new file mode 100644 index 00000000..39375b85 --- /dev/null +++ b/kernel-rt/debian/patches/0069-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch @@ -0,0 +1,52 @@ +From c28f6b6f72cfc2ee0be47de94be21ba3ca876b32 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 5 Feb 2021 00:18:15 +0800 +Subject: [PATCH] perf/x86/rapl: Only check lower 32bits for RAPL energy + counters + +In the RAPL ENERGY_COUNTER MSR, only the lower 32bits represent the energy +counter. + +On previous platforms, the higher 32bits are reverved and always return +Zero. But on Intel SapphireRapids platform, the higher 32bits are reused +for other purpose and return non-zero value. + +Thus check the lower 32bits only for these ENERGY_COUTNER MSRs, to make +sure the RAPL PMU events are not added erroneously when higher 32bits +contain non-zero value. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Andi Kleen +Link: https://lkml.kernel.org/r/20210204161816.12649-2-rui.zhang@intel.com +(cherry picked from commit b6f78d3fba7f605f673185d7292d84af7576fdc1) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/rapl.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c +index 7dbbeaacd995..7ed25b2ba05f 100644 +--- a/arch/x86/events/rapl.c ++++ b/arch/x86/events/rapl.c +@@ -523,12 +523,15 @@ static bool test_msr(int idx, void *data) + return test_bit(idx, (unsigned long *) data); + } + ++/* Only lower 32bits of the MSR represents the energy counter */ ++#define RAPL_MSR_MASK 0xFFFFFFFF ++ + static struct perf_msr intel_rapl_msrs[] = { +- [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, +- [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, +- [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, +- [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, +- [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, ++ [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK }, + }; + + /* diff --git a/kernel-rt/debian/patches/0070-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch b/kernel-rt/debian/patches/0070-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch new file mode 100644 index 00000000..f263d91d --- /dev/null +++ b/kernel-rt/debian/patches/0070-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch @@ -0,0 +1,126 @@ +From f7d7c1c60866dc2d4e7c79f10a520bbbccfd7ceb Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 5 Feb 2021 00:18:16 +0800 +Subject: [PATCH] perf/x86/rapl: Fix psys-energy event on Intel SPR platform + +There are several things special for the RAPL Psys energy counter, on +Intel Sapphire Rapids platform. +1. it contains one Psys master package, and only CPUs on the master + package can read valid value of the Psys energy counter, reading the + MSR on CPUs in the slave package returns 0. +2. The master package does not have to be Physical package 0. And when + all the CPUs on the Psys master package are offlined, we lose the Psys + energy counter, at runtime. +3. The Psys energy counter can be disabled by BIOS, while all the other + energy counters are not affected. + +It is not easy to handle all of these in the current RAPL PMU design +because +a) perf_msr_probe() validates the MSR on some random CPU, which may either + be in the Psys master package or in the Psys slave package. +b) all the RAPL events share the same PMU, and there is not API to remove + the psys-energy event cleanly, without affecting the other events in + the same PMU. + +This patch addresses the problems in a simple way. + +First, by setting .no_check bit for RAPL Psys MSR, the psys-energy event +is always added, so we don't have to check the Psys ENERGY_STATUS MSR on +master package. + +Then, by removing rapl_not_visible(), the psys-energy event is always +available in sysfs. This does not affect the previous code because, for +the RAPL MSRs with .no_check cleared, the .is_visible() callback is always +overriden in the perf_msr_probe() function. + +Note, although RAPL PMU is die-based, and the Psys energy counter MSR on +Intel SPR is package scope, this is not a problem because there is only +one die in each package on SPR. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Andi Kleen +Link: https://lkml.kernel.org/r/20210204161816.12649-3-rui.zhang@intel.com +(cherry picked from commit 838342a6d6b7ecc475dc052d4a405c4ffb3ad1b5) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/rapl.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c +index 7ed25b2ba05f..f42a70496a24 100644 +--- a/arch/x86/events/rapl.c ++++ b/arch/x86/events/rapl.c +@@ -454,16 +454,9 @@ static struct attribute *rapl_events_cores[] = { + NULL, + }; + +-static umode_t +-rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i) +-{ +- return 0; +-} +- + static struct attribute_group rapl_events_cores_group = { + .name = "events", + .attrs = rapl_events_cores, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_pkg[] = { +@@ -476,7 +469,6 @@ static struct attribute *rapl_events_pkg[] = { + static struct attribute_group rapl_events_pkg_group = { + .name = "events", + .attrs = rapl_events_pkg, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_ram[] = { +@@ -489,7 +481,6 @@ static struct attribute *rapl_events_ram[] = { + static struct attribute_group rapl_events_ram_group = { + .name = "events", + .attrs = rapl_events_ram, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_gpu[] = { +@@ -502,7 +493,6 @@ static struct attribute *rapl_events_gpu[] = { + static struct attribute_group rapl_events_gpu_group = { + .name = "events", + .attrs = rapl_events_gpu, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_psys[] = { +@@ -515,7 +505,6 @@ static struct attribute *rapl_events_psys[] = { + static struct attribute_group rapl_events_psys_group = { + .name = "events", + .attrs = rapl_events_psys, +- .is_visible = rapl_not_visible, + }; + + static bool test_msr(int idx, void *data) +@@ -534,6 +523,14 @@ static struct perf_msr intel_rapl_msrs[] = { + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK }, + }; + ++static struct perf_msr intel_rapl_spr_msrs[] = { ++ [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, true, RAPL_MSR_MASK }, ++}; ++ + /* + * Force to PERF_RAPL_MAX size due to: + * - perf_msr_probe(PERF_RAPL_MAX) +@@ -764,7 +761,7 @@ static struct rapl_model model_spr = { + BIT(PERF_RAPL_PSYS), + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, + .msr_power_unit = MSR_RAPL_POWER_UNIT, +- .rapl_msrs = intel_rapl_msrs, ++ .rapl_msrs = intel_rapl_spr_msrs, + }; + + static struct rapl_model model_amd_fam17h = { diff --git a/kernel-rt/debian/patches/0071-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch b/kernel-rt/debian/patches/0071-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch new file mode 100644 index 00000000..434edcf9 --- /dev/null +++ b/kernel-rt/debian/patches/0071-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch @@ -0,0 +1,42 @@ +From 7320c7b7fe7d3b3c649dabf0643f646b6b553ade Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Sat, 24 Sep 2022 13:47:37 +0800 +Subject: [PATCH] perf/x86/rapl: Use standard Energy Unit for SPR Dram RAPL + domain + +Intel Xeon servers used to use a fixed energy resolution (15.3uj) for +Dram RAPL domain. But on SPR, Dram RAPL domain follows the standard +energy resolution as described in MSR_RAPL_POWER_UNIT. + +Remove the SPR Dram energy unit quirk. + +Fixes: bcfd218b6679 ("perf/x86/rapl: Add support for Intel SPR platform") +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Kan Liang +Tested-by: Wang Wendy +Link: https://lkml.kernel.org/r/20220924054738.12076-3-rui.zhang@intel.com +(cherry picked from commit 80275ca9e525c198c7efe045c4a6cdb68a2ea763) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/rapl.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c +index f42a70496a24..fd4572885378 100644 +--- a/arch/x86/events/rapl.c ++++ b/arch/x86/events/rapl.c +@@ -616,12 +616,8 @@ static int rapl_check_hw_unit(struct rapl_model *rm) + case RAPL_UNIT_QUIRK_INTEL_HSW: + rapl_hw_unit[PERF_RAPL_RAM] = 16; + break; +- /* +- * SPR shares the same DRAM domain energy unit as HSW, plus it +- * also has a fixed energy unit for Psys domain. +- */ ++ /* SPR uses a fixed energy unit for Psys domain. */ + case RAPL_UNIT_QUIRK_INTEL_SPR: +- rapl_hw_unit[PERF_RAPL_RAM] = 16; + rapl_hw_unit[PERF_RAPL_PSYS] = 0; + break; + default: diff --git a/kernel-rt/debian/patches/series b/kernel-rt/debian/patches/series index 87b5d55f..aba26832 100644 --- a/kernel-rt/debian/patches/series +++ b/kernel-rt/debian/patches/series @@ -53,3 +53,16 @@ 0056-cpufreq-intel_pstate-Add-Sapphire-Rapids-support-in-.patch 0057-Port-negative-dentries-limit-feature-from-3.10.patch 0058-platform-x86-intel-uncore-freq-Add-Sapphire-Rapids-s.patch +0059-intel_idle-add-SPR-support.patch +0060-intel_idle-add-preferred_cstates-module-argument.patch +0061-intel_idle-add-core-C6-optimization-for-SPR.patch +0062-intel_idle-Fix-the-preferred_cstates-module-paramete.patch +0063-intel_idle-Fix-SPR-C6-optimization.patch +0064-intel_idle-make-SPR-C1-and-C1E-be-independent.patch +0065-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch +0066-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch +0067-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch +0068-perf-x86-rapl-Add-msr-mask-support.patch +0069-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch +0070-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch +0071-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch diff --git a/kernel-std/debian/patches/0058-intel_idle-add-SPR-support.patch b/kernel-std/debian/patches/0058-intel_idle-add-SPR-support.patch new file mode 100644 index 00000000..76c3d859 --- /dev/null +++ b/kernel-std/debian/patches/0058-intel_idle-add-SPR-support.patch @@ -0,0 +1,122 @@ +From 865f32679b6bbf92ebf4e0a74c36170c21c4250e Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 2 Mar 2022 10:15:58 +0200 +Subject: [PATCH] intel_idle: add SPR support + +Add Sapphire Rapids Xeon support. + +Up until very recently, the C1 and C1E C-states were independent, but this +has changed in some new chips, including Sapphire Rapids Xeon (SPR). In these +chips the C1 and C1E states cannot be enabled at the same time. The "C1E +promotion" bit in 'MSR_IA32_POWER_CTL' also has its semantics changed a bit. + +Here are the C1, C1E, and "C1E promotion" bit rules on Xeons before SPR. + +1. If C1E promotion bit is disabled. + a. C1 requests end up with C1 C-state. + b. C1E requests end up with C1E C-state. +2. If C1E promotion bit is enabled. + a. C1 requests end up with C1E C-state. + b. C1E requests end up with C1E C-state. + +Here are the C1, C1E, and "C1E promotion" bit rules on Sapphire Rapids Xeon. +1. If C1E promotion bit is disabled. + a. C1 requests end up with C1 C-state. + b. C1E requests end up with C1 C-state. +2. If C1E promotion bit is enabled. + a. C1 requests end up with C1E C-state. + b. C1E requests end up with C1E C-state. + +Before SPR Xeon, the 'intel_idle' driver was disabling C1E promotion and was +exposing C1 and C1E as independent C-states. But on SPR, C1 and C1E cannot be +enabled at the same time. + +This patch adds both C1 and C1E states. However, C1E is marked as with the +"CPUIDLE_FLAG_UNUSABLE" flag, which means that in won't be registered by +default. The C1E promotion bit will be cleared, which means that by default +only C1 and C6 will be registered on SPR. + +The next patch will add an option for enabling C1E and disabling C1 on SPR. + +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 9edf3c0ffef0ec1bed8300315852b5c6a0997130) +[mvb: Fix merge conflict caused by a lack of IceLake-D support in + StarlingX.] +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 47 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index b92b032fb6d1..7694d852b49d 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -778,6 +778,46 @@ static struct cpuidle_state icx_cstates[] __initdata = { + .enter = NULL } + }; + ++/* ++ * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice ++ * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in ++ * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 ++ * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then ++ * both C1 and C1E requests end up with C1, so there is effectively no C1E. ++ * ++ * By default we enable C1 and disable C1E by marking it with ++ * 'CPUIDLE_FLAG_UNUSABLE'. ++ */ ++static struct cpuidle_state spr_cstates[] __initdata = { ++ { ++ .name = "C1", ++ .desc = "MWAIT 0x00", ++ .flags = MWAIT2flg(0x00), ++ .exit_latency = 1, ++ .target_residency = 1, ++ .enter = &intel_idle, ++ .enter_s2idle = intel_idle_s2idle, }, ++ { ++ .name = "C1E", ++ .desc = "MWAIT 0x01", ++ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ ++ CPUIDLE_FLAG_UNUSABLE, ++ .exit_latency = 2, ++ .target_residency = 4, ++ .enter = &intel_idle, ++ .enter_s2idle = intel_idle_s2idle, }, ++ { ++ .name = "C6", ++ .desc = "MWAIT 0x20", ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .exit_latency = 290, ++ .target_residency = 800, ++ .enter = &intel_idle, ++ .enter_s2idle = intel_idle_s2idle, }, ++ { ++ .enter = NULL } ++}; ++ + static struct cpuidle_state atom_cstates[] __initdata = { + { + .name = "C1E", +@@ -1088,6 +1128,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = { + .use_acpi = true, + }; + ++static const struct idle_cpu idle_cpu_spr __initconst = { ++ .state_table = spr_cstates, ++ .disable_promotion_to_c1e = true, ++ .use_acpi = true, ++}; ++ + static const struct idle_cpu idle_cpu_avn __initconst = { + .state_table = avn_cstates, + .disable_promotion_to_c1e = true, +@@ -1143,6 +1189,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), ++ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), diff --git a/kernel-std/debian/patches/0059-intel_idle-add-preferred_cstates-module-argument.patch b/kernel-std/debian/patches/0059-intel_idle-add-preferred_cstates-module-argument.patch new file mode 100644 index 00000000..92a70797 --- /dev/null +++ b/kernel-std/debian/patches/0059-intel_idle-add-preferred_cstates-module-argument.patch @@ -0,0 +1,130 @@ +From 740b87d322e28e14754cb74076799cf8ae44b8f4 Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 2 Mar 2022 10:15:59 +0200 +Subject: [PATCH] intel_idle: add 'preferred_cstates' module argument + +On Sapphire Rapids Xeon (SPR) the C1 and C1E states are basically mutually +exclusive - only one of them can be enabled. By default, 'intel_idle' driver +enables C1 and disables C1E. However, some users prefer to use C1E instead of +C1, because it saves more energy. + +This patch adds a new module parameter ('preferred_cstates') for enabling C1E +and disabling C1. Here is the idea behind it. + +1. This option has effect only for "mutually exclusive" C-states like C1 and + C1E on SPR. +2. It does not have any effect on independent C-states, which do not require + other C-states to be disabled (most states on most platforms as of today). +3. For mutually exclusive C-states, the 'intel_idle' driver always has a + reasonable default, such as enabling C1 on SPR by default. On other + platforms, the default may be different. +4. Users can override the default using the 'preferred_cstates' parameter. +5. The parameter accepts the preferred C-states bit-mask, similarly to the + existing 'states_off' parameter. +6. This parameter is not limited to C1/C1E, and leaves room for supporting + other mutually exclusive C-states, if they come in the future. + +Today 'intel_idle' can only be compiled-in, which means that on SPR, in order +to disable C1 and enable C1E, users should boot with the following kernel +argument: intel_idle.preferred_cstates=4 + +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit da0e58c038e60e7e65d30813ebdfe91687aa8a24) +[mvb: Fix merge conflicts caused by the lack of commit 642333384991 + ("intel_idle: Adjust the SKX C6 parameters if PC6 is disabled") + in StarlingX's v5.10 kernel.] +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 46 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 46 insertions(+) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index 7694d852b49d..6837a5fa0214 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -66,6 +66,7 @@ static struct cpuidle_driver intel_idle_driver = { + /* intel_idle.max_cstate=0 disables driver */ + static int max_cstate = CPUIDLE_STATE_MAX - 1; + static unsigned int disabled_states_mask; ++static unsigned int preferred_states_mask; + + static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; + +@@ -1377,6 +1378,8 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } + static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } + #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ + ++static void c1e_promotion_enable(void); ++ + /** + * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. + * +@@ -1517,6 +1520,26 @@ static void __init sklh_idle_state_table_update(void) + skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ + } + ++/** ++ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. ++ */ ++static void __init spr_idle_state_table_update(void) ++{ ++ /* Check if user prefers C1E over C1. */ ++ if (preferred_states_mask & BIT(2)) { ++ if (preferred_states_mask & BIT(1)) ++ /* Both can't be enabled, stick to the defaults. */ ++ return; ++ ++ spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; ++ spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; ++ ++ /* Enable C1E using the "C1E promotion" bit. */ ++ c1e_promotion_enable(); ++ disable_promotion_to_c1e = false; ++ } ++} ++ + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) + { + unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; +@@ -1548,6 +1571,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) + case INTEL_FAM6_SKYLAKE: + sklh_idle_state_table_update(); + break; ++ case INTEL_FAM6_SAPPHIRERAPIDS_X: ++ spr_idle_state_table_update(); ++ break; + } + + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { +@@ -1625,6 +1651,15 @@ static void auto_demotion_disable(void) + wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); + } + ++static void c1e_promotion_enable(void) ++{ ++ unsigned long long msr_bits; ++ ++ rdmsrl(MSR_IA32_POWER_CTL, msr_bits); ++ msr_bits |= 0x2; ++ wrmsrl(MSR_IA32_POWER_CTL, msr_bits); ++} ++ + static void c1e_promotion_disable(void) + { + unsigned long long msr_bits; +@@ -1794,3 +1829,14 @@ module_param(max_cstate, int, 0444); + */ + module_param_named(states_off, disabled_states_mask, uint, 0444); + MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); ++/* ++ * Some platforms come with mutually exclusive C-states, so that if one is ++ * enabled, the other C-states must not be used. Example: C1 and C1E on ++ * Sapphire Rapids platform. This parameter allows for selecting the ++ * preferred C-states among the groups of mutually exclusive C-states - the ++ * selected C-states will be registered, the other C-states from the mutually ++ * exclusive group won't be registered. If the platform has no mutually ++ * exclusive C-states, this parameter has no effect. ++ */ ++module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); ++MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); diff --git a/kernel-std/debian/patches/0060-intel_idle-add-core-C6-optimization-for-SPR.patch b/kernel-std/debian/patches/0060-intel_idle-add-core-C6-optimization-for-SPR.patch new file mode 100644 index 00000000..1435350f --- /dev/null +++ b/kernel-std/debian/patches/0060-intel_idle-add-core-C6-optimization-for-SPR.patch @@ -0,0 +1,50 @@ +From 313c832d5e62ca6da89e74c393e908ad667820c8 Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 2 Mar 2022 10:16:00 +0200 +Subject: [PATCH] intel_idle: add core C6 optimization for SPR + +Add a Sapphire Rapids Xeon C6 optimization, similar to what we have for Sky Lake +Xeon: if package C6 is disabled, adjust C6 exit latency and target residency to +match core C6 values, instead of using the default package C6 values. + +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 3a9cf77b60dc9839b6674943bb7c9dcd524b6294) +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index 6837a5fa0214..8e4c41dab7ce 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -1525,6 +1525,8 @@ static void __init sklh_idle_state_table_update(void) + */ + static void __init spr_idle_state_table_update(void) + { ++ unsigned long long msr; ++ + /* Check if user prefers C1E over C1. */ + if (preferred_states_mask & BIT(2)) { + if (preferred_states_mask & BIT(1)) +@@ -1538,6 +1540,19 @@ static void __init spr_idle_state_table_update(void) + c1e_promotion_enable(); + disable_promotion_to_c1e = false; + } ++ ++ /* ++ * By default, the C6 state assumes the worst-case scenario of package ++ * C6. However, if PC6 is disabled, we update the numbers to match ++ * core C6. ++ */ ++ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); ++ ++ /* Limit value 2 and above allow for PC6. */ ++ if ((msr & 0x7) < 2) { ++ spr_cstates[2].exit_latency = 190; ++ spr_cstates[2].target_residency = 600; ++ } + } + + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) diff --git a/kernel-std/debian/patches/0061-intel_idle-Fix-the-preferred_cstates-module-paramete.patch b/kernel-std/debian/patches/0061-intel_idle-Fix-the-preferred_cstates-module-paramete.patch new file mode 100644 index 00000000..a1f25f12 --- /dev/null +++ b/kernel-std/debian/patches/0061-intel_idle-Fix-the-preferred_cstates-module-paramete.patch @@ -0,0 +1,101 @@ +From 56c2804828b5f2479c0287a1afc3c0b23e44f19a Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 27 Apr 2022 09:08:52 +0300 +Subject: [PATCH] intel_idle: Fix the 'preferred_cstates' module parameter + +Problem description. + +When user boots kernel up with the 'intel_idle.preferred_cstates=4' option, +we enable C1E and disable C1 states on Sapphire Rapids Xeon (SPR). In order +for C1E to work on SPR, we have to enable the C1E promotion bit on all +CPUs. However, we enable it only on one CPU. + +Fix description. + +The 'intel_idle' driver already has the infrastructure for disabling C1E +promotion on every CPU. This patch uses the same infrastructure for +enabling C1E promotion on every CPU. It changes the boolean +'disable_promotion_to_c1e' variable to a tri-state 'c1e_promotion' +variable. + +Tested on a 2-socket SPR system. I verified the following combinations: + + * C1E promotion enabled and disabled in BIOS. + * Booted with and without the 'intel_idle.preferred_cstates=4' kernel + argument. + +In all 4 cases C1E promotion was correctly set on all CPUs. + +Also tested on an old Broadwell system, just to make sure it does not cause +a regression. C1E promotion was correctly disabled on that system, both C1 +and C1E were exposed (as expected). + +Fixes: da0e58c038e6 ("intel_idle: add 'preferred_cstates' module argument") +Reported-by: Jan Beulich +Signed-off-by: Artem Bityutskiy +[ rjw: Minor changelog edits ] +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 39c184a6a9a7a99950b321d55fe713175cf1d404) +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index 8e4c41dab7ce..f7da2031c994 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -71,7 +71,12 @@ static unsigned int preferred_states_mask; + static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; + + static unsigned long auto_demotion_disable_flags; +-static bool disable_promotion_to_c1e; ++ ++static enum { ++ C1E_PROMOTION_PRESERVE, ++ C1E_PROMOTION_ENABLE, ++ C1E_PROMOTION_DISABLE ++} c1e_promotion = C1E_PROMOTION_PRESERVE; + + struct idle_cpu { + struct cpuidle_state *state_table; +@@ -1378,8 +1383,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } + static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } + #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ + +-static void c1e_promotion_enable(void); +- + /** + * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. + * +@@ -1537,8 +1540,7 @@ static void __init spr_idle_state_table_update(void) + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + + /* Enable C1E using the "C1E promotion" bit. */ +- c1e_promotion_enable(); +- disable_promotion_to_c1e = false; ++ c1e_promotion = C1E_PROMOTION_ENABLE; + } + + /* +@@ -1706,7 +1708,9 @@ static int intel_idle_cpu_init(unsigned int cpu) + if (auto_demotion_disable_flags) + auto_demotion_disable(); + +- if (disable_promotion_to_c1e) ++ if (c1e_promotion == C1E_PROMOTION_ENABLE) ++ c1e_promotion_enable(); ++ else if (c1e_promotion == C1E_PROMOTION_DISABLE) + c1e_promotion_disable(); + + return 0; +@@ -1785,7 +1789,8 @@ static int __init intel_idle_init(void) + if (icpu) { + cpuidle_state_table = icpu->state_table; + auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; +- disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; ++ if (icpu->disable_promotion_to_c1e) ++ c1e_promotion = C1E_PROMOTION_DISABLE; + if (icpu->use_acpi || force_use_acpi) + intel_idle_acpi_cst_extract(); + } else if (!intel_idle_acpi_cst_extract()) { diff --git a/kernel-std/debian/patches/0062-intel_idle-Fix-SPR-C6-optimization.patch b/kernel-std/debian/patches/0062-intel_idle-Fix-SPR-C6-optimization.patch new file mode 100644 index 00000000..1abe8f70 --- /dev/null +++ b/kernel-std/debian/patches/0062-intel_idle-Fix-SPR-C6-optimization.patch @@ -0,0 +1,47 @@ +From c57b1ab599fc1c02d50e908def519c9ee290822c Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Wed, 27 Apr 2022 09:08:53 +0300 +Subject: [PATCH] intel_idle: Fix SPR C6 optimization + +The Sapphire Rapids (SPR) C6 optimization was added to the end of the +'spr_idle_state_table_update()' function. However, the function has a +'return' which may happen before the optimization has a chance to run. +And this may prevent the optimization from happening. + +This is an unlikely scenario, but possible if user boots with, say, +the 'intel_idle.preferred_cstates=6' kernel boot option. + +This patch fixes the issue by eliminating the problematic 'return' +statement. + +Fixes: 3a9cf77b60dc ("intel_idle: add core C6 optimization for SPR") +Suggested-by: Jan Beulich +Reported-by: Jan Beulich +Signed-off-by: Artem Bityutskiy +[ rjw: Minor changelog edits ] +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 7eac3bd38d18cd3317756649921b8264ddfee692) +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index f7da2031c994..fcd086916cfa 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -1531,11 +1531,9 @@ static void __init spr_idle_state_table_update(void) + unsigned long long msr; + + /* Check if user prefers C1E over C1. */ +- if (preferred_states_mask & BIT(2)) { +- if (preferred_states_mask & BIT(1)) +- /* Both can't be enabled, stick to the defaults. */ +- return; +- ++ if ((preferred_states_mask & BIT(2)) && ++ !(preferred_states_mask & BIT(1))) { ++ /* Disable C1 and enable C1E. */ + spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + diff --git a/kernel-std/debian/patches/0063-intel_idle-make-SPR-C1-and-C1E-be-independent.patch b/kernel-std/debian/patches/0063-intel_idle-make-SPR-C1-and-C1E-be-independent.patch new file mode 100644 index 00000000..6051ea10 --- /dev/null +++ b/kernel-std/debian/patches/0063-intel_idle-make-SPR-C1-and-C1E-be-independent.patch @@ -0,0 +1,85 @@ +From 575c30eae2756105a2c7c99e66d0074416fe7c02 Mon Sep 17 00:00:00 2001 +From: Artem Bityutskiy +Date: Sat, 16 Jul 2022 09:26:55 +0300 +Subject: [PATCH] intel_idle: make SPR C1 and C1E be independent + +This patch partially reverts the changes made by the following commit: + +da0e58c038e6 intel_idle: add 'preferred_cstates' module argument + +As that commit describes, on early Sapphire Rapids Xeon platforms the C1 and +C1E states were mutually exclusive, so that users could only have either C1 and +C6, or C1E and C6. + +However, Intel firmware engineers managed to remove this limitation and make C1 +and C1E to be completely independent, just like on previous Xeon platforms. + +Therefore, this patch: + * Removes commentary describing the old, and now non-existing SPR C1E + limitation. + * Marks SPR C1E as available by default. + * Removes the 'preferred_cstates' parameter handling for SPR. Both C1 and + C1E will be available regardless of 'preferred_cstates' value. + +We expect that all SPR systems are shipping with new firmware, which includes +the C1/C1E improvement. + +Cc: v5.18+ # v5.18+ +Signed-off-by: Artem Bityutskiy +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 1548fac47a114b42063def551eb152a536ed9697) +[mvb: Adapt to context in drivers/idle/intel_idle.c] +Signed-off-by: M. Vefa Bicakci +--- + drivers/idle/intel_idle.c | 24 +----------------------- + 1 file changed, 1 insertion(+), 23 deletions(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index fcd086916cfa..8db9d2f7ee74 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -784,16 +784,6 @@ static struct cpuidle_state icx_cstates[] __initdata = { + .enter = NULL } + }; + +-/* +- * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice +- * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in +- * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 +- * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then +- * both C1 and C1E requests end up with C1, so there is effectively no C1E. +- * +- * By default we enable C1 and disable C1E by marking it with +- * 'CPUIDLE_FLAG_UNUSABLE'. +- */ + static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1", +@@ -806,8 +796,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1E", + .desc = "MWAIT 0x01", +- .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ +- CPUIDLE_FLAG_UNUSABLE, ++ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, +@@ -1530,17 +1519,6 @@ static void __init spr_idle_state_table_update(void) + { + unsigned long long msr; + +- /* Check if user prefers C1E over C1. */ +- if ((preferred_states_mask & BIT(2)) && +- !(preferred_states_mask & BIT(1))) { +- /* Disable C1 and enable C1E. */ +- spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; +- spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; +- +- /* Enable C1E using the "C1E promotion" bit. */ +- c1e_promotion = C1E_PROMOTION_ENABLE; +- } +- + /* + * By default, the C6 state assumes the worst-case scenario of package + * C6. However, if PC6 is disabled, we update the numbers to match diff --git a/kernel-std/debian/patches/0064-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch b/kernel-std/debian/patches/0064-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch new file mode 100644 index 00000000..e056e3a5 --- /dev/null +++ b/kernel-std/debian/patches/0064-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch @@ -0,0 +1,96 @@ +From ce85ab13e4eca9cf31d9f0572736ec35edf8da94 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 25 Jun 2021 21:32:47 +0800 +Subject: [PATCH] perf/x86/cstate: Add ICELAKE_X and ICELAKE_D support + +Introduce icx_cstates for ICELAKE_X and ICELAKE_D, and also update the +comments. + +On ICELAKE_X and ICELAKE_D, Core C1, Core C6, Package C2 and Package C6 +Residency MSRs are supported. + +This patch has been tested on real hardware. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Kan Liang +Acked-by: Artem Bityutskiy +Link: https://lkml.kernel.org/r/20210625133247.2813-1-rui.zhang@intel.com +(cherry picked from commit 87bf399f86ecf36cc84fbeb7027a2995af649d6e) +[mvb: Adapt to context, caused by missing AlderLake and RocketLake + support in the v5.10 kernel. The contextual differences include + comments as well as the code.] +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/intel/cstate.c | 23 +++++++++++++++++------ + 1 file changed, 17 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c +index 4eb7ee5fed72..16f2d91201b7 100644 +--- a/arch/x86/events/intel/cstate.c ++++ b/arch/x86/events/intel/cstate.c +@@ -40,7 +40,7 @@ + * Model specific counters: + * MSR_CORE_C1_RES: CORE C1 Residency Counter + * perf code: 0x00 +- * Available model: SLM,AMT,GLM,CNL,TNT ++ * Available model: SLM,AMT,GLM,CNL,ICX,TNT + * Scope: Core (each processor core has a MSR) + * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter + * perf code: 0x01 +@@ -50,8 +50,8 @@ + * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, +- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, +- * TNT ++ * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, ++ * TGL,TNT + * Scope: Core + * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter + * perf code: 0x03 +@@ -61,7 +61,7 @@ + * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. + * perf code: 0x00 + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, +- * KBL,CML,ICL,TGL,TNT ++ * KBL,CML,ICL,ICX,TGL,TNT + * Scope: Package (physical package) + * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. + * perf code: 0x01 +@@ -71,8 +71,8 @@ + * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, +- * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, +- * TNT ++ * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, ++ * TGL,TNT + * Scope: Package (physical package) + * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. + * perf code: 0x03 +@@ -563,6 +563,14 @@ static const struct cstate_model icl_cstates __initconst = { + BIT(PERF_CSTATE_PKG_C10_RES), + }; + ++static const struct cstate_model icx_cstates __initconst = { ++ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | ++ BIT(PERF_CSTATE_CORE_C6_RES), ++ ++ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | ++ BIT(PERF_CSTATE_PKG_C6_RES), ++}; ++ + static const struct cstate_model slm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), +@@ -647,6 +655,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { + + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), ++ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates), ++ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), ++ + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), + { }, diff --git a/kernel-std/debian/patches/0065-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch b/kernel-std/debian/patches/0065-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch new file mode 100644 index 00000000..12818942 --- /dev/null +++ b/kernel-std/debian/patches/0065-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch @@ -0,0 +1,62 @@ +From e0c5f4f3fcf7499313beac43d8677a1098483acb Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 15 Apr 2022 18:45:20 +0800 +Subject: [PATCH] perf/x86/cstate: Add SAPPHIRERAPIDS_X CPU support + +From the perspective of Intel cstate residency counters, +SAPPHIRERAPIDS_X is the same as ICELAKE_X. + +Share the code with it. And update the comments for SAPPHIRERAPIDS_X. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Kan Liang +Link: https://lkml.kernel.org/r/20220415104520.2737004-1-rui.zhang@intel.com +(cherry picked from commit 528c9f1daf20da14d3e7348dc4b1d7c55743ee64) +[mvb: Adapt to contextual differences in the v5.10 kernel, caused by + missing AlderLake, RocketLake and RaptorLake support. The contextual + differences are in the comments only.] +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/intel/cstate.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c +index 16f2d91201b7..ffe6c493baf6 100644 +--- a/arch/x86/events/intel/cstate.c ++++ b/arch/x86/events/intel/cstate.c +@@ -51,7 +51,7 @@ + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, +- * TGL,TNT ++ * TGL,TNT,SPR + * Scope: Core + * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter + * perf code: 0x03 +@@ -61,7 +61,7 @@ + * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. + * perf code: 0x00 + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, +- * KBL,CML,ICL,ICX,TGL,TNT ++ * KBL,CML,ICL,ICX,TGL,TNT,SPR + * Scope: Package (physical package) + * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. + * perf code: 0x01 +@@ -72,7 +72,7 @@ + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, +- * TGL,TNT ++ * TGL,TNT,SPR + * Scope: Package (physical package) + * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. + * perf code: 0x03 +@@ -657,6 +657,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), ++ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates), + + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), diff --git a/kernel-std/debian/patches/0066-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch b/kernel-std/debian/patches/0066-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch new file mode 100644 index 00000000..4944b549 --- /dev/null +++ b/kernel-std/debian/patches/0066-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch @@ -0,0 +1,152 @@ +From ae6725d25cfe2fa7a45be90a06a953f9ebbad8e9 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Tue, 7 Dec 2021 21:17:34 +0800 +Subject: [PATCH] powercap: intel_rapl: support new layout of Psys PowerLimit + Register on SPR + +On Sapphire Rapids, the layout of the Psys domain Power Limit Register +is different from from what it was before. + +Enhance the code to support the new Psys PL register layout. + +Signed-off-by: Zhang Rui +Reported-and-tested-by: Alkattan Dana +[ rjw: Subject and changelog edits ] +Signed-off-by: Rafael J. Wysocki +(cherry picked from commit 931da6a0de5d620425af4425344259e6ff46b654) +Signed-off-by: M. Vefa Bicakci +--- + drivers/powercap/intel_rapl_common.c | 61 +++++++++++++++++++++++++++- + include/linux/intel_rapl.h | 6 +++ + 2 files changed, 65 insertions(+), 2 deletions(-) + +diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c +index 285420c1eb7c..da90c7e52122 100644 +--- a/drivers/powercap/intel_rapl_common.c ++++ b/drivers/powercap/intel_rapl_common.c +@@ -61,6 +61,20 @@ + #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff + #define PP_POLICY_MASK 0x1F + ++/* ++ * SPR has different layout for Psys Domain PowerLimit registers. ++ * There are 17 bits of PL1 and PL2 instead of 15 bits. ++ * The Enable bits and TimeWindow bits are also shifted as a result. ++ */ ++#define PSYS_POWER_LIMIT1_MASK 0x1FFFF ++#define PSYS_POWER_LIMIT1_ENABLE BIT(17) ++ ++#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32) ++#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49) ++ ++#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) ++#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) ++ + /* Non HW constants */ + #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ + #define RAPL_PRIMITIVE_DUMMY BIT(2) +@@ -97,6 +111,7 @@ struct rapl_defaults { + bool to_raw); + unsigned int dram_domain_energy_unit; + unsigned int psys_domain_energy_unit; ++ bool spr_psys_bits; + }; + static struct rapl_defaults *rapl_defaults; + +@@ -669,12 +684,51 @@ static struct rapl_primitive_info rpi[] = { + RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), + PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, + RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, ++ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, ++ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, ++ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, ++ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, ++ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), ++ PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, ++ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), + /* non-hardware */ + PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, + RAPL_PRIMITIVE_DERIVED), + {NULL, 0, 0, 0}, + }; + ++static enum rapl_primitives ++prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) ++{ ++ if (!rapl_defaults->spr_psys_bits) ++ return prim; ++ ++ if (rd->id != RAPL_DOMAIN_PLATFORM) ++ return prim; ++ ++ switch (prim) { ++ case POWER_LIMIT1: ++ return PSYS_POWER_LIMIT1; ++ case POWER_LIMIT2: ++ return PSYS_POWER_LIMIT2; ++ case PL1_ENABLE: ++ return PSYS_PL1_ENABLE; ++ case PL2_ENABLE: ++ return PSYS_PL2_ENABLE; ++ case TIME_WINDOW1: ++ return PSYS_TIME_WINDOW1; ++ case TIME_WINDOW2: ++ return PSYS_TIME_WINDOW2; ++ default: ++ return prim; ++ } ++} ++ + /* Read primitive data based on its related struct rapl_primitive_info. + * if xlate flag is set, return translated data based on data units, i.e. + * time, energy, and power. +@@ -692,7 +746,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd, + enum rapl_primitives prim, bool xlate, u64 *data) + { + u64 value; +- struct rapl_primitive_info *rp = &rpi[prim]; ++ enum rapl_primitives prim_fixed = prim_fixups(rd, prim); ++ struct rapl_primitive_info *rp = &rpi[prim_fixed]; + struct reg_action ra; + int cpu; + +@@ -738,7 +793,8 @@ static int rapl_write_data_raw(struct rapl_domain *rd, + enum rapl_primitives prim, + unsigned long long value) + { +- struct rapl_primitive_info *rp = &rpi[prim]; ++ enum rapl_primitives prim_fixed = prim_fixups(rd, prim); ++ struct rapl_primitive_info *rp = &rpi[prim_fixed]; + int cpu; + u64 bits; + struct reg_action ra; +@@ -983,6 +1039,7 @@ static const struct rapl_defaults rapl_defaults_spr_server = { + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, + .psys_domain_energy_unit = 1000000000, ++ .spr_psys_bits = true, + }; + + static const struct rapl_defaults rapl_defaults_byt = { +diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h +index 50b8398ffd21..acf72c018142 100644 +--- a/include/linux/intel_rapl.h ++++ b/include/linux/intel_rapl.h +@@ -58,6 +58,12 @@ enum rapl_primitives { + THROTTLED_TIME, + PRIORITY_LEVEL, + ++ PSYS_POWER_LIMIT1, ++ PSYS_POWER_LIMIT2, ++ PSYS_PL1_ENABLE, ++ PSYS_PL2_ENABLE, ++ PSYS_TIME_WINDOW1, ++ PSYS_TIME_WINDOW2, + /* below are not raw primitive data */ + AVERAGE_POWER, + NR_RAPL_PRIMITIVES, diff --git a/kernel-std/debian/patches/0067-perf-x86-rapl-Add-msr-mask-support.patch b/kernel-std/debian/patches/0067-perf-x86-rapl-Add-msr-mask-support.patch new file mode 100644 index 00000000..89cc835e --- /dev/null +++ b/kernel-std/debian/patches/0067-perf-x86-rapl-Add-msr-mask-support.patch @@ -0,0 +1,72 @@ +From 242f3941a50966c27b575041770180f11ef13de0 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 5 Feb 2021 00:18:14 +0800 +Subject: [PATCH] perf/x86/rapl: Add msr mask support + +In some cases, when probing a perf MSR, we're probing certain bits of the +MSR instead of the whole register, thus only these bits should be checked. + +For example, for RAPL ENERGY_STATUS MSR, only the lower 32 bits represents +the energy counter, and the higher 32bits are reserved. + +Introduce a new mask field in struct perf_msr to allow probing certain +bits of a MSR. + +This change is transparent to the current perf_msr_probe() users. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Andi Kleen +Link: https://lkml.kernel.org/r/20210204161816.12649-1-rui.zhang@intel.com +(cherry picked from commit ffb20c2e52e8709b5fc9951e8863e31efb1f2cba) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/probe.c | 7 ++++++- + arch/x86/events/probe.h | 7 ++++--- + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c +index 136a1e847254..600bf8d15c0c 100644 +--- a/arch/x86/events/probe.c ++++ b/arch/x86/events/probe.c +@@ -28,6 +28,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) + for (bit = 0; bit < cnt; bit++) { + if (!msr[bit].no_check) { + struct attribute_group *grp = msr[bit].grp; ++ u64 mask; + + /* skip entry with no group */ + if (!grp) +@@ -44,8 +45,12 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ + if (rdmsrl_safe(msr[bit].msr, &val)) + continue; ++ ++ mask = msr[bit].mask; ++ if (!mask) ++ mask = ~0ULL; + /* Disable zero counters if requested. */ +- if (!zero && !val) ++ if (!zero && !(val & mask)) + continue; + + grp->is_visible = NULL; +diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h +index 4c8e0afc5fb5..261b9bda24e3 100644 +--- a/arch/x86/events/probe.h ++++ b/arch/x86/events/probe.h +@@ -4,10 +4,11 @@ + #include + + struct perf_msr { +- u64 msr; +- struct attribute_group *grp; ++ u64 msr; ++ struct attribute_group *grp; + bool (*test)(int idx, void *data); +- bool no_check; ++ bool no_check; ++ u64 mask; + }; + + unsigned long diff --git a/kernel-std/debian/patches/0068-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch b/kernel-std/debian/patches/0068-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch new file mode 100644 index 00000000..9765645a --- /dev/null +++ b/kernel-std/debian/patches/0068-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch @@ -0,0 +1,52 @@ +From 9be604696472a5f2d72d98593ae95974319156b3 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 5 Feb 2021 00:18:15 +0800 +Subject: [PATCH] perf/x86/rapl: Only check lower 32bits for RAPL energy + counters + +In the RAPL ENERGY_COUNTER MSR, only the lower 32bits represent the energy +counter. + +On previous platforms, the higher 32bits are reverved and always return +Zero. But on Intel SapphireRapids platform, the higher 32bits are reused +for other purpose and return non-zero value. + +Thus check the lower 32bits only for these ENERGY_COUTNER MSRs, to make +sure the RAPL PMU events are not added erroneously when higher 32bits +contain non-zero value. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Andi Kleen +Link: https://lkml.kernel.org/r/20210204161816.12649-2-rui.zhang@intel.com +(cherry picked from commit b6f78d3fba7f605f673185d7292d84af7576fdc1) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/rapl.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c +index 7dbbeaacd995..7ed25b2ba05f 100644 +--- a/arch/x86/events/rapl.c ++++ b/arch/x86/events/rapl.c +@@ -523,12 +523,15 @@ static bool test_msr(int idx, void *data) + return test_bit(idx, (unsigned long *) data); + } + ++/* Only lower 32bits of the MSR represents the energy counter */ ++#define RAPL_MSR_MASK 0xFFFFFFFF ++ + static struct perf_msr intel_rapl_msrs[] = { +- [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, +- [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, +- [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, +- [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, +- [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, ++ [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK }, + }; + + /* diff --git a/kernel-std/debian/patches/0069-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch b/kernel-std/debian/patches/0069-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch new file mode 100644 index 00000000..243ceffe --- /dev/null +++ b/kernel-std/debian/patches/0069-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch @@ -0,0 +1,126 @@ +From 39e3f6539a9ba8c78c4116dfef027c002eafdd60 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Fri, 5 Feb 2021 00:18:16 +0800 +Subject: [PATCH] perf/x86/rapl: Fix psys-energy event on Intel SPR platform + +There are several things special for the RAPL Psys energy counter, on +Intel Sapphire Rapids platform. +1. it contains one Psys master package, and only CPUs on the master + package can read valid value of the Psys energy counter, reading the + MSR on CPUs in the slave package returns 0. +2. The master package does not have to be Physical package 0. And when + all the CPUs on the Psys master package are offlined, we lose the Psys + energy counter, at runtime. +3. The Psys energy counter can be disabled by BIOS, while all the other + energy counters are not affected. + +It is not easy to handle all of these in the current RAPL PMU design +because +a) perf_msr_probe() validates the MSR on some random CPU, which may either + be in the Psys master package or in the Psys slave package. +b) all the RAPL events share the same PMU, and there is not API to remove + the psys-energy event cleanly, without affecting the other events in + the same PMU. + +This patch addresses the problems in a simple way. + +First, by setting .no_check bit for RAPL Psys MSR, the psys-energy event +is always added, so we don't have to check the Psys ENERGY_STATUS MSR on +master package. + +Then, by removing rapl_not_visible(), the psys-energy event is always +available in sysfs. This does not affect the previous code because, for +the RAPL MSRs with .no_check cleared, the .is_visible() callback is always +overriden in the perf_msr_probe() function. + +Note, although RAPL PMU is die-based, and the Psys energy counter MSR on +Intel SPR is package scope, this is not a problem because there is only +one die in each package on SPR. + +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Andi Kleen +Link: https://lkml.kernel.org/r/20210204161816.12649-3-rui.zhang@intel.com +(cherry picked from commit 838342a6d6b7ecc475dc052d4a405c4ffb3ad1b5) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/rapl.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c +index 7ed25b2ba05f..f42a70496a24 100644 +--- a/arch/x86/events/rapl.c ++++ b/arch/x86/events/rapl.c +@@ -454,16 +454,9 @@ static struct attribute *rapl_events_cores[] = { + NULL, + }; + +-static umode_t +-rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i) +-{ +- return 0; +-} +- + static struct attribute_group rapl_events_cores_group = { + .name = "events", + .attrs = rapl_events_cores, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_pkg[] = { +@@ -476,7 +469,6 @@ static struct attribute *rapl_events_pkg[] = { + static struct attribute_group rapl_events_pkg_group = { + .name = "events", + .attrs = rapl_events_pkg, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_ram[] = { +@@ -489,7 +481,6 @@ static struct attribute *rapl_events_ram[] = { + static struct attribute_group rapl_events_ram_group = { + .name = "events", + .attrs = rapl_events_ram, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_gpu[] = { +@@ -502,7 +493,6 @@ static struct attribute *rapl_events_gpu[] = { + static struct attribute_group rapl_events_gpu_group = { + .name = "events", + .attrs = rapl_events_gpu, +- .is_visible = rapl_not_visible, + }; + + static struct attribute *rapl_events_psys[] = { +@@ -515,7 +505,6 @@ static struct attribute *rapl_events_psys[] = { + static struct attribute_group rapl_events_psys_group = { + .name = "events", + .attrs = rapl_events_psys, +- .is_visible = rapl_not_visible, + }; + + static bool test_msr(int idx, void *data) +@@ -534,6 +523,14 @@ static struct perf_msr intel_rapl_msrs[] = { + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, false, RAPL_MSR_MASK }, + }; + ++static struct perf_msr intel_rapl_spr_msrs[] = { ++ [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr, false, RAPL_MSR_MASK }, ++ [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr, true, RAPL_MSR_MASK }, ++}; ++ + /* + * Force to PERF_RAPL_MAX size due to: + * - perf_msr_probe(PERF_RAPL_MAX) +@@ -764,7 +761,7 @@ static struct rapl_model model_spr = { + BIT(PERF_RAPL_PSYS), + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, + .msr_power_unit = MSR_RAPL_POWER_UNIT, +- .rapl_msrs = intel_rapl_msrs, ++ .rapl_msrs = intel_rapl_spr_msrs, + }; + + static struct rapl_model model_amd_fam17h = { diff --git a/kernel-std/debian/patches/0070-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch b/kernel-std/debian/patches/0070-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch new file mode 100644 index 00000000..d22929e7 --- /dev/null +++ b/kernel-std/debian/patches/0070-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch @@ -0,0 +1,42 @@ +From c37093070323090f694d7369f16942d49a04e194 Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Sat, 24 Sep 2022 13:47:37 +0800 +Subject: [PATCH] perf/x86/rapl: Use standard Energy Unit for SPR Dram RAPL + domain + +Intel Xeon servers used to use a fixed energy resolution (15.3uj) for +Dram RAPL domain. But on SPR, Dram RAPL domain follows the standard +energy resolution as described in MSR_RAPL_POWER_UNIT. + +Remove the SPR Dram energy unit quirk. + +Fixes: bcfd218b6679 ("perf/x86/rapl: Add support for Intel SPR platform") +Signed-off-by: Zhang Rui +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Kan Liang +Tested-by: Wang Wendy +Link: https://lkml.kernel.org/r/20220924054738.12076-3-rui.zhang@intel.com +(cherry picked from commit 80275ca9e525c198c7efe045c4a6cdb68a2ea763) +Signed-off-by: M. Vefa Bicakci +--- + arch/x86/events/rapl.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c +index f42a70496a24..fd4572885378 100644 +--- a/arch/x86/events/rapl.c ++++ b/arch/x86/events/rapl.c +@@ -616,12 +616,8 @@ static int rapl_check_hw_unit(struct rapl_model *rm) + case RAPL_UNIT_QUIRK_INTEL_HSW: + rapl_hw_unit[PERF_RAPL_RAM] = 16; + break; +- /* +- * SPR shares the same DRAM domain energy unit as HSW, plus it +- * also has a fixed energy unit for Psys domain. +- */ ++ /* SPR uses a fixed energy unit for Psys domain. */ + case RAPL_UNIT_QUIRK_INTEL_SPR: +- rapl_hw_unit[PERF_RAPL_RAM] = 16; + rapl_hw_unit[PERF_RAPL_PSYS] = 0; + break; + default: diff --git a/kernel-std/debian/patches/series b/kernel-std/debian/patches/series index d1c58691..79d4032e 100644 --- a/kernel-std/debian/patches/series +++ b/kernel-std/debian/patches/series @@ -52,3 +52,16 @@ 0055-cpufreq-intel_pstate-Add-Sapphire-Rapids-support-in-.patch 0056-Port-negative-dentries-limit-feature-from-3.10.patch 0057-platform-x86-intel-uncore-freq-Add-Sapphire-Rapids-s.patch +0058-intel_idle-add-SPR-support.patch +0059-intel_idle-add-preferred_cstates-module-argument.patch +0060-intel_idle-add-core-C6-optimization-for-SPR.patch +0061-intel_idle-Fix-the-preferred_cstates-module-paramete.patch +0062-intel_idle-Fix-SPR-C6-optimization.patch +0063-intel_idle-make-SPR-C1-and-C1E-be-independent.patch +0064-perf-x86-cstate-Add-ICELAKE_X-and-ICELAKE_D-support.patch +0065-perf-x86-cstate-Add-SAPPHIRERAPIDS_X-CPU-support.patch +0066-powercap-intel_rapl-support-new-layout-of-Psys-Power.patch +0067-perf-x86-rapl-Add-msr-mask-support.patch +0068-perf-x86-rapl-Only-check-lower-32bits-for-RAPL-energ.patch +0069-perf-x86-rapl-Fix-psys-energy-event-on-Intel-SPR-pla.patch +0070-perf-x86-rapl-Use-standard-Energy-Unit-for-SPR-Dram-.patch