From e4dcdfc1fe32b2e972ea378b4a520389755758e8 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 Sep 2023 11:02:07 -0700 Subject: [PATCH 01/22] cpufreq: intel_pstate: Revise global turbo disable check commit 37b6ddba967c601479bea418a7ac6ff16b6232b7 upstream. Setting global turbo flag based on CPU 0 P-state limits is problematic as it limits max P-state request on every CPU on the system just based on its P-state limits. There are two cases in which global.turbo_disabled flag is set: - When the MSR_IA32_MISC_ENABLE_TURBO_DISABLE bit is set to 1 in the MSR MSR_IA32_MISC_ENABLE. This bit can be only changed by the system BIOS before power up. - When the max non turbo P-state is same as max turbo P-state for CPU 0. The second check is not a valid to decide global turbo state based on the CPU 0. CPU 0 max turbo P-state can be same as max non turbo P-state, but for other CPUs this may not be true. There is no guarantee that max P-state limits are same for every CPU. This is possible that during fusing max P-state for a CPU is constrained. Also with the Intel Speed Select performance profile, CPU 0 may not be present in all profiles. In this case the max non turbo and turbo P-state can be set to the lowest possible P-state by the hardware when switched to such profile. Since max non turbo and turbo P-state is same, global.turbo_disabled flag will be set. Once global.turbo_disabled is set, any scaling max and min frequency update for any CPU will result in its max P-state constrained to the max non turbo P-state. Hence remove the check of max non turbo P-state equal to max turbo P-state of CPU 0 to set global turbo disabled flag. Intel-SIG: commit 37b6ddba967c cpufreq: intel_pstate: Revise global turbo disable check. Backport intel_pstate driver update for 6.6 Signed-off-by: Srinivas Pandruvada [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ba7cecf7128b..f369611bd37e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -599,13 +599,9 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) static inline void update_turbo_state(void) { u64 misc_en; - struct cpudata *cpu; - cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = - (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || - cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); + global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE; } static int min_perf_pct_min(void) From 749727732294899b97b7337ed73ec0ff9daed15e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 20 Nov 2023 10:59:42 -0800 Subject: [PATCH 02/22] cpufreq: intel_pstate: Prioritize firmware-provided balance performance EPP commit 2719675fa8111a8d7a060133e1dd4797d20c9754 upstream. The platform firmware can provide a balance performance EPP value by enabling HWP and programming the EPP to the desired value. However, currently this only takes effect for processors listed in intel_epp_balance_perf[], so in order to enable a new processor model to utilize this mechanism, that table needs to be updated. It arguably should not be necessary to modify the kernel to work properly with every new generation of processors, though, and distributions that don't always ship the most recent kernels should be able to run reasonably well on new hardware without code changes. For this reason, move the check to avoid updating the EPP when the balance performance EPP is unmodified from the power-up default of 0x80 after the check that allows the firmware-provided balance performance EPP value to be retrieved. This will cause the code to always look for the firmware- provided value before consulting intel_epp_balance_perf[] and the handling of new hardware will not depend on whether or not that thable has been updated yet. Intel-SIG: commit 2719675fa811 cpufreq: intel_pstate: Prioritize firmware-provided balance performance EPP. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f369611bd37e..458528064b77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1719,13 +1719,6 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) { cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); - /* - * If this CPU gen doesn't call for change in balance_perf - * EPP return. - */ - if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) - return; - /* * If the EPP is set by firmware, which means that firmware enabled HWP * - Is equal or less than 0x80 (default balance_perf EPP) @@ -1738,6 +1731,13 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) return; } + /* + * If this CPU gen doesn't call for change in balance_perf + * EPP return. + */ + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) + return; + /* * Use hard coded value per gen to update the balance_perf * and default EPP. From a82506e6dedb2bcb9ae53f17512ff392f589e530 Mon Sep 17 00:00:00 2001 From: Zhenguo Yao Date: Wed, 13 Dec 2023 18:28:08 +0800 Subject: [PATCH 03/22] cpufreq: intel_pstate: Add Emerald Rapids support in no-HWP mode commit e95013156ad88e6a1e1db6545881f49183e2ee0a upstream. Users may disable HWP in firmware, in which case intel_pstate will give up unless the CPU model is explicitly supported. See also the following past commits: - commit df51f287b5de ("cpufreq: intel_pstate: Add Sapphire Rapids support in no-HWP mode") - commit d8de7a44e11f ("cpufreq: intel_pstate: Add Skylake servers support") - commit 706c5328851d ("cpufreq: intel_pstate: Add Cometlake support in no-HWP mode") - commit fbdc21e9b038 ("cpufreq: intel_pstate: Add Icelake servers support in no-HWP mode") - commit 71bb5c82aaae ("cpufreq: intel_pstate: Add Tigerlake support in no-HWP mode") Intel-SIG: commit e95013156ad8 cpufreq: intel_pstate: Add Emerald Rapids support in no-HWP mode. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Zhenguo Yao Acked-by: Srinivas Pandruvada [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 458528064b77..99a8b1b617f6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2434,6 +2434,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(TIGERLAKE, core_funcs), X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); From afabdefbaa08d63ec056db94d5732f880deacd53 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 13 Feb 2024 12:16:00 +0100 Subject: [PATCH 04/22] cpufreq: intel_pstate: remove cpudata::prev_cummulative_iowait commit 4615ac9010be84d676f9e893e5a7ea4b5febd1e8 upstream. Commit 09c448d3c61f ("cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm") removed the last user of cpudata::prev_cummulative_iowait. Remove the member too. Found by https://github.com/jirislaby/clang-struct. Intel-SIG: commit 4615ac9010be cpufreq: intel_pstate: remove cpudata. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 99a8b1b617f6..94c7fc893406 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -202,8 +202,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -242,7 +240,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; From 65b4db9c872fe3d7f4b72d1f7f1779d635a4f2a2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:29:43 +0100 Subject: [PATCH 05/22] cpufreq: intel_pstate: Drop redundant locking from intel_pstate_driver_cleanup() commit f186b2dace86f36cc08872b693185eaf71128898 upstream. Remove the spinlock locking from intel_pstate_driver_cleanup() as it is not necessary because no other code accessing all_cpu_data[] can run in parallel with that function. Had the locking been necessary, though, it would have been incorrect because the lock in question is acquired from a hardirq handler and it cannot be acquired from thread context without disabling interrupts. Intel-SIG: commit f186b2dace86 cpufreq: intel_pstate: Drop redundant locking from. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 94c7fc893406..ccc583eb7031 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3134,10 +3134,8 @@ static void intel_pstate_driver_cleanup(void) if (intel_pstate_driver == &intel_pstate) intel_pstate_clear_update_util_hook(cpu); - raw_spin_lock(&hwp_notify_lock); kfree(all_cpu_data[cpu]); WRITE_ONCE(all_cpu_data[cpu], NULL); - raw_spin_unlock(&hwp_notify_lock); } } cpus_read_unlock(); From 4369b8551bb274a902f0f9831066c1f883f974ee Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:32:02 +0100 Subject: [PATCH 06/22] cpufreq: intel_pstate: Wait for canceled delayed work to complete commit 432acb219af4edecdd11d360f30b7cc643524db8 upstream. Make intel_pstate_disable_hwp_interrupt() wait for canceled delayed work to complete to avoid leftover work items running when it returns which may be during driver unregistration and may confuse things going forward. Intel-SIG: commit 432acb219af4 cpufreq: intel_pstate: Wait for canceled delayed work to complete. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ccc583eb7031..f9558baae06b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1682,6 +1682,7 @@ void notify_hwp_interrupt(void) static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) { unsigned long flags; + bool cancel_work; if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) return; @@ -1690,9 +1691,11 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); raw_spin_lock_irqsave(&hwp_notify_lock, flags); - if (cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask)) - cancel_delayed_work(&cpudata->hwp_notify_work); + cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask); raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); + + if (cancel_work) + cancel_delayed_work_sync(&cpudata->hwp_notify_work); } static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) From 6a5952479fc1ca08f5abaa5d9af6edc1c86e1f6c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Mar 2024 19:52:06 +0100 Subject: [PATCH 07/22] cpufreq: intel_pstate: Get rid of unnecessary READ_ONCE() annotations commit 0f2828e17b6f41b8b345f0031e3fe58529991748 upstream. Drop two redundant checks involving READ_ONCE() from notify_hwp_interrupt() and make it check hwp_active without READ_ONCE() which is not necessary, because that variable is only set once during the early initialization of the driver. In order to make that clear, annotate hwp_active with __ro_after_init. Intel-SIG: commit 0f2828e17b6f cpufreq: intel_pstate: Get rid of unnecessary READ_ONCE() annotations. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f9558baae06b..789309002528 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -292,7 +292,7 @@ struct pstate_funcs { static struct pstate_funcs pstate_funcs __read_mostly; -static int hwp_active __read_mostly; +static bool hwp_active __ro_after_init; static int hwp_mode_bdw __read_mostly; static bool per_cpu_limits __read_mostly; static bool hwp_boost __read_mostly; @@ -1635,11 +1635,10 @@ static cpumask_t hwp_intr_enable_mask; void notify_hwp_interrupt(void) { unsigned int this_cpu = smp_processor_id(); - struct cpudata *cpudata; unsigned long flags; u64 value; - if (!READ_ONCE(hwp_active) || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) return; rdmsrl_safe(MSR_HWP_STATUS, &value); @@ -1651,24 +1650,8 @@ void notify_hwp_interrupt(void) if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask)) goto ack_intr; - /* - * Currently we never free all_cpu_data. And we can't reach here - * without this allocated. But for safety for future changes, added - * check. - */ - if (unlikely(!READ_ONCE(all_cpu_data))) - goto ack_intr; - - /* - * The free is done during cleanup, when cpufreq registry is failed. - * We wouldn't be here if it fails on init or switch status. But for - * future changes, added check. - */ - cpudata = READ_ONCE(all_cpu_data[this_cpu]); - if (unlikely(!cpudata)) - goto ack_intr; - - schedule_delayed_work(&cpudata->hwp_notify_work, msecs_to_jiffies(10)); + schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work, + msecs_to_jiffies(10)); raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); @@ -3466,7 +3449,7 @@ static int __init intel_pstate_init(void) * deal with it. */ if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { - WRITE_ONCE(hwp_active, 1); + hwp_active = true; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; From c0a0faf0548581d29de04c85279de4b7f8624483 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2024 20:34:06 +0100 Subject: [PATCH 08/22] cpufreq: intel_pstate: Use __ro_after_init for three variables commit e97a98238da68aea4a0be0b2cc40e39527c880b1 upstream. There are at least 3 variables in intel_pstate that do not get updated after they have been initialized, so annotate them with __ro_after_init. Intel-SIG: commit e97a98238da6 cpufreq: intel_pstate: Use __ro_after_init for three variables. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 789309002528..bc50f803f1e5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -293,10 +293,10 @@ struct pstate_funcs { static struct pstate_funcs pstate_funcs __read_mostly; static bool hwp_active __ro_after_init; -static int hwp_mode_bdw __read_mostly; -static bool per_cpu_limits __read_mostly; +static int hwp_mode_bdw __ro_after_init; +static bool per_cpu_limits __ro_after_init; +static bool hwp_forced __ro_after_init; static bool hwp_boost __read_mostly; -static bool hwp_forced __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; From bbe78665b28d042934bc48ff964d9c9659e8ae02 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:01:58 +0100 Subject: [PATCH 09/22] cpufreq: intel_pstate: Fold intel_pstate_max_within_limits() into caller commit 032c5565eb80edb6f2faeb31939540c897987119 upstream. Fold intel_pstate_max_within_limits() into its only caller. No functional impact. Intel-SIG: commit 032c5565eb80 cpufreq: intel_pstate: Fold intel_pstate_max_within_limits() into caller. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bc50f803f1e5..559df7042858 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2014,14 +2014,6 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu) intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); } -static void intel_pstate_max_within_limits(struct cpudata *cpu) -{ - int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); - - update_turbo_state(); - intel_pstate_set_pstate(cpu, pstate); -} - static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu); @@ -2596,12 +2588,15 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_update_perf_limits(cpu, policy->min, policy->max); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { + int pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); + /* * NOHZ_FULL CPUs need this as the governor callback may not * be invoked on them. */ intel_pstate_clear_update_util_hook(policy->cpu); - intel_pstate_max_within_limits(cpu); + update_turbo_state(); + intel_pstate_set_pstate(cpu, pstate); } else { intel_pstate_set_update_util_hook(policy->cpu); } From 855eac63fd8d01e0663cfa857265a7db65cd55c1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:02:42 +0100 Subject: [PATCH 10/22] cpufreq: intel_pstate: Do not update global.turbo_disabled after initialization commit 0940f1a8011fd69be5082015068e0dc31c800c20 upstream. The global.turbo_disabled is updated quite often, especially in the passive mode in which case it is updated every time the scheduler calls into the driver. However, this is generally not necessary and it adds MSR read overhead to scheduler code paths (and that particular MSR is slow to read). For this reason, make the driver read MSR_IA32_MISC_ENABLE_TURBO_DISABLE just once at the cpufreq driver registration time and remove all of the in-flight updates of global.turbo_disabled. Intel-SIG: commit 0940f1a8011f cpufreq: intel_pstate: Do not update global.turbo_disabled. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 51 ++++++---------------------------- 1 file changed, 8 insertions(+), 43 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 559df7042858..352e4d7e7e9f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -173,7 +173,6 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. - * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -182,7 +181,6 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; - bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -593,12 +591,13 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } -static inline void update_turbo_state(void) +static bool turbo_is_disabled(void) { u64 misc_en; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - global.turbo_disabled = misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + + return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } static int min_perf_pct_min(void) @@ -1153,40 +1152,16 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; refresh_frequency_limits(policy); } -static void intel_pstate_update_max_freq(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - - if (!policy) - return; - - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); - - cpufreq_cpu_release(policy); -} - static void intel_pstate_update_limits(unsigned int cpu) { mutex_lock(&intel_pstate_driver_lock); - update_turbo_state(); - /* - * If turbo has been turned on or off globally, policy limits for - * all CPUs need to be updated to reflect that. - */ - if (global.turbo_disabled_mf != global.turbo_disabled) { - global.turbo_disabled_mf = global.turbo_disabled; - arch_set_max_freq_ratio(global.turbo_disabled); - for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(cpu); - } else { - cpufreq_update_policy(cpu); - } + cpufreq_update_policy(cpu); mutex_unlock(&intel_pstate_driver_lock); } @@ -1286,7 +1261,6 @@ static ssize_t show_no_turbo(struct kobject *kobj, return -EAGAIN; } - update_turbo_state(); if (global.turbo_disabled) ret = sprintf(buf, "%u\n", global.turbo_disabled); else @@ -1316,7 +1290,6 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_lock(&intel_pstate_limits_lock); - update_turbo_state(); if (global.turbo_disabled) { pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); mutex_unlock(&intel_pstate_limits_lock); @@ -2283,8 +2256,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu) struct sample *sample; int target_pstate; - update_turbo_state(); - target_pstate = get_target_pstate(cpu); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); @@ -2595,7 +2566,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) * be invoked on them. */ intel_pstate_clear_update_util_hook(policy->cpu); - update_turbo_state(); intel_pstate_set_pstate(cpu, pstate); } else { intel_pstate_set_update_util_hook(policy->cpu); @@ -2639,7 +2609,6 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, { int max_freq; - update_turbo_state(); if (hwp_active) { intel_pstate_get_hwp_cap(cpu); max_freq = global.no_turbo || global.turbo_disabled ? @@ -2736,8 +2705,6 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_freq; - update_turbo_state(); - global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; @@ -2903,8 +2870,6 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int target_pstate; - update_turbo_state(); - freqs.old = policy->cur; freqs.new = target_freq; @@ -2926,8 +2891,6 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, struct cpudata *cpu = all_cpu_data[policy->cpu]; int target_pstate; - update_turbo_state(); - target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); @@ -2945,7 +2908,6 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; - update_turbo_state(); cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : HWP_HIGHEST_PERF(hwp_cap); @@ -3133,6 +3095,9 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; + global.turbo_disabled = turbo_is_disabled(); + + arch_set_max_freq_ratio(global.turbo_disabled); intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); From c18eb71d9ac0d5ca57d3d425a83cde4d9714b003 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:03:25 +0100 Subject: [PATCH 11/22] cpufreq: intel_pstate: Rearrange show_no_turbo() and store_no_turbo() commit c626a438452079824139f97137f17af47b1a8989 upstream. Now that global.turbo_disabled can only change at the cpufreq driver registration time, initialize global.no_turbo at that time too so they are in sync to start with (if the former is set, the latter cannot be updated later anyway). That allows show_no_turbo() to be simlified because it does not need to check global.turbo_disabled and store_no_turbo() can be rearranged to avoid doing anything if the new value of global.no_turbo is equal to the current one and only return an error on attempts to clear global.no_turbo when global.turbo_disabled. While at it, eliminate the redundant ret variable from store_no_turbo(). No intentional functional impact. Intel-SIG: commit c626a4384520 cpufreq: intel_pstate: Rearrange show_no_turbo() and store_no_turbo(). Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 352e4d7e7e9f..91bd198e6452 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1261,10 +1261,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return -EAGAIN; } - if (global.turbo_disabled) - ret = sprintf(buf, "%u\n", global.turbo_disabled); - else - ret = sprintf(buf, "%u\n", global.no_turbo); + ret = sprintf(buf, "%u\n", global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -1275,31 +1272,34 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; - int ret; + bool no_turbo; - ret = sscanf(buf, "%u", &input); - if (ret != 1) + if (sscanf(buf, "%u", &input) != 1) return -EINVAL; mutex_lock(&intel_pstate_driver_lock); if (!intel_pstate_driver) { - mutex_unlock(&intel_pstate_driver_lock); - return -EAGAIN; + count = -EAGAIN; + goto unlock_driver; } - mutex_lock(&intel_pstate_limits_lock); + no_turbo = !!clamp_t(int, input, 0, 1); + + if (no_turbo == global.no_turbo) + goto unlock_driver; if (global.turbo_disabled) { pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); - mutex_unlock(&intel_pstate_limits_lock); - mutex_unlock(&intel_pstate_driver_lock); - return -EPERM; + count = -EPERM; + goto unlock_driver; } - global.no_turbo = clamp_t(int, input, 0, 1); + global.no_turbo = no_turbo; + + mutex_lock(&intel_pstate_limits_lock); - if (global.no_turbo) { + if (no_turbo) { struct cpudata *cpu = all_cpu_data[0]; int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate; @@ -1311,8 +1311,9 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); intel_pstate_update_policies(); - arch_set_max_freq_ratio(global.no_turbo); + arch_set_max_freq_ratio(no_turbo); +unlock_driver: mutex_unlock(&intel_pstate_driver_lock); return count; @@ -3096,6 +3097,7 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; global.turbo_disabled = turbo_is_disabled(); + global.no_turbo = global.turbo_disabled; arch_set_max_freq_ratio(global.turbo_disabled); From b33662ed6bb294e9a95883ae2360db0953760eb8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:04:24 +0100 Subject: [PATCH 12/22] cpufreq: intel_pstate: Read global.no_turbo under READ_ONCE() commit 9558fae8ce97b3b320b387dd7c88309df2c36d4d upstream. Because global.no_turbo is generally not read under intel_pstate_driver_lock make store_no_turbo() use WRITE_ONCE() for updating it (this is the only place at which it is updated except for the initialization) and make the majority of places reading it use READ_ONCE(). Also remove redundant global.turbo_disabled checks from places that depend on the 'true' value of global.no_turbo because it can only be 'true' if global.turbo_disabled is also 'true'. Intel-SIG: commit 9558fae8ce97 cpufreq: intel_pstate: Read global.no_turbo under READ_ONCE(). Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 91bd198e6452..8a4c0fd3d5a5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1295,7 +1295,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, goto unlock_driver; } - global.no_turbo = no_turbo; + WRITE_ONCE(global.no_turbo, no_turbo); mutex_lock(&intel_pstate_limits_lock); @@ -1750,7 +1750,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (global.no_turbo && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1915,7 +1915,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (global.no_turbo && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) val |= (u64)1 << 32; return val; @@ -2213,7 +2213,7 @@ static inline int32_t get_target_pstate(struct cpudata *cpu) sample->busy_scaled = busy_frac * 100; - target = global.no_turbo || global.turbo_disabled ? + target = READ_ONCE(global.no_turbo) ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; target += target >> 2; target = mul_fp(target, busy_frac); @@ -2475,7 +2475,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) static int intel_pstate_get_max_freq(struct cpudata *cpu) { - return global.turbo_disabled || global.no_turbo ? + return READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; } @@ -2612,7 +2612,7 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, if (hwp_active) { intel_pstate_get_hwp_cap(cpu); - max_freq = global.no_turbo || global.turbo_disabled ? + max_freq = READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; } else { max_freq = intel_pstate_get_max_freq(cpu); From 4cbadced0bf69fff4d9a3521711a13e0312949e3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 18:05:06 +0100 Subject: [PATCH 13/22] cpufreq: intel_pstate: Replace three global.turbo_disabled checks commit f32587dcbe5f40e160d8de262add6abab79356a7 upstream. Replace the global.turbo_disabled in __intel_pstate_update_max_freq() with a global.no_turbo one to make store_no_turbo() actually update the maximum CPU frequency on the trubo preference changes, which needs to be consistent with arch_set_max_freq_ratio() called from there. For more consistency, replace the global.turbo_disabled checks in __intel_pstate_cpu_init() and intel_cpufreq_adjust_perf() with global.no_turbo checks either. Intel-SIG: commit f32587dcbe5f cpufreq: intel_pstate: Replace three global.turbo_disabled checks. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8a4c0fd3d5a5..e6d5a81582dd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1152,7 +1152,7 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - policy->cpuinfo.max_freq = global.turbo_disabled ? + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; refresh_frequency_limits(policy); } @@ -2706,7 +2706,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_freq; - policy->cpuinfo.max_freq = global.turbo_disabled ? + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; policy->min = policy->cpuinfo.min_freq; @@ -2909,8 +2909,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; - cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : - HWP_HIGHEST_PERF(hwp_cap); + cap_pstate = READ_ONCE(global.no_turbo) ? + HWP_GUARANTEED_PERF(hwp_cap) : + HWP_HIGHEST_PERF(hwp_cap); /* Optimization: Avoid unnecessary divisions. */ From 77e85a782d01bba6f82c6c3aa54fa5bd3fa1147e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Mar 2024 19:52:45 +0100 Subject: [PATCH 14/22] cpufreq: intel_pstate: Update the maximum CPU frequency consistently commit e8217b4bece379e66d43ab5070431712f07bf625 upstream. There are 3 places at which the maximum CPU frequency may change, store_no_turbo(), intel_pstate_update_limits() (when called by the cpufreq core) and intel_pstate_notify_work() (when handling a HWP change notification). Currently, cpuinfo.max_freq is only updated by store_no_turbo() and intel_pstate_notify_work(), although it principle it may be necessary to update it in intel_pstate_update_limits() either. Make all of them mutually consistent. Intel-SIG: commit e8217b4bece3 cpufreq: intel_pstate: Update the maximum CPU frequency consistently. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e6d5a81582dd..da16e661ecfa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1152,18 +1152,32 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { + intel_pstate_get_hwp_cap(cpudata); + policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + refresh_frequency_limits(policy); } static void intel_pstate_update_limits(unsigned int cpu) { - mutex_lock(&intel_pstate_driver_lock); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - cpufreq_update_policy(cpu); + if (!policy) + return; - mutex_unlock(&intel_pstate_driver_lock); + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + + cpufreq_cpu_release(policy); +} + +static void intel_pstate_update_limits_for_all(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + intel_pstate_update_limits(cpu); } /************************** sysfs begin ************************/ @@ -1310,7 +1324,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + intel_pstate_update_limits_for_all(); arch_set_max_freq_ratio(no_turbo); unlock_driver: @@ -1594,7 +1608,6 @@ static void intel_pstate_notify_work(struct work_struct *work) struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); if (policy) { - intel_pstate_get_hwp_cap(cpudata); __intel_pstate_update_max_freq(cpudata, policy); cpufreq_cpu_release(policy); From e2442cb80947418804c875a0dac08438b36c1f6f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:45 +0200 Subject: [PATCH 15/22] cpufreq: intel_pstate: hide unused intel_pstate_cpu_oob_ids[] commit 8c556541a53848d6611ff8b5f9bf52e96c56f48e upstream. The reference to this variable is hidden in an #ifdef: drivers/cpufreq/intel_pstate.c:2440:32: error: 'intel_pstate_cpu_oob_ids' defined but not used [-Werror=unused-const-variable=] Use the same check around the definition. Intel-SIG: commit 8c556541a538 cpufreq: intel_pstate: hide unused intel_pstate_cpu_oob_ids[]. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index da16e661ecfa..7b349d8809a4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2399,6 +2399,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); +#ifdef CONFIG_ACPI static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(BROADWELL_D, core_funcs), X86_MATCH(BROADWELL_X, core_funcs), @@ -2407,6 +2408,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), {} }; +#endif static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { X86_MATCH(KABYLAKE, core_funcs), From 7146aaa2f77ba37e94e1886c32ae476ae34ed250 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sun, 5 May 2024 12:07:12 -0700 Subject: [PATCH 16/22] cpufreq: intel_pstate: fix struct cpudata::epp_cached kernel-doc commit 0a206fe35d360a9ec1c8b1609ca394c2759a8962 upstream. make C=1 currently gives the following warning: drivers/cpufreq/intel_pstate.c:262: warning: Function parameter or struct member 'epp_cached' not described in 'cpudata' Add the missing ":" to fix the trivial kernel-doc syntax error. Intel-SIG: commit 0a206fe35d36 cpufreq: intel_pstate: fix struct cpudata::epp_cached kernel-doc. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Jeff Johnson Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7b349d8809a4..17cbcdca7db8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -211,7 +211,7 @@ struct global_params { * @epp_policy: Last saved policy used to set EPP/EPB * @epp_default: Power on default HWP energy performance * preference/bias - * @epp_cached Cached HWP energy-performance preference value + * @epp_cached: Cached HWP energy-performance preference value * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set From 1c754db19f79fc15da2fc3842b225ea322b0b8db Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 31 May 2024 16:00:04 -0700 Subject: [PATCH 17/22] cpufreq: intel_pstate: Fix unchecked HWP MSR access commit 1e24c31351787e24b7eebe84866bd55fd62a0aef upstream. Fix unchecked MSR access error for processors with no HWP support. On such processors, maximum frequency can be changed by the system firmware using ACPI event ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED. This results in accessing HWP MSR 0x771. Call Trace: generic_exec_single+0x58/0x120 smp_call_function_single+0xbf/0x110 rdmsrl_on_cpu+0x46/0x60 intel_pstate_get_hwp_cap+0x1b/0x70 intel_pstate_update_limits+0x2a/0x60 acpi_processor_notify+0xb7/0x140 acpi_ev_notify_dispatch+0x3b/0x60 HWP MSR 0x771 can be only read on a CPU which supports HWP and enabled. Hence intel_pstate_get_hwp_cap() can only be called when hwp_active is true. Intel-SIG: commit 1e24c3135178 cpufreq: intel_pstate: Fix unchecked HWP MSR access. Backport intel_pstate driver update for 6.6 from 6.11 Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-pm/20240529155740.Hq2Hw7be@linutronix.de/ Fixes: e8217b4bece3 ("cpufreq: intel_pstate: Update the maximum CPU frequency consistently") Tested-by: Sebastian Andrzej Siewior Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 17cbcdca7db8..3155cd2be873 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1152,7 +1152,8 @@ static void intel_pstate_update_policies(void) static void __intel_pstate_update_max_freq(struct cpudata *cpudata, struct cpufreq_policy *policy) { - intel_pstate_get_hwp_cap(cpudata); + if (hwp_active) + intel_pstate_get_hwp_cap(cpudata); policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; From 0d1b6768135fbc5adb96c381152001338de0dc5e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jun 2024 16:53:06 +0200 Subject: [PATCH 18/22] cpufreq: intel_pstate: Check turbo_is_disabled() in store_no_turbo() commit 350cbb5d2f676bff22c49e5e81764c3b8da342a9 upstream. After recent changes in intel_pstate, global.turbo_disabled is only set at the initialization time and never changed. However, it turns out that on some systems the "turbo disabled" bit in MSR_IA32_MISC_ENABLE, the initial state of which is reflected by global.turbo_disabled, can be flipped later and there should be a way to take that into account (other than checking that MSR every time the driver runs which is costly and useless overhead on the vast majority of systems). For this purpose, notice that before the changes in question, store_no_turbo() contained a turbo_is_disabled() check that was used for updating global.turbo_disabled if the "turbo disabled" bit in MSR_IA32_MISC_ENABLE had been flipped and that functionality can be restored. Then, users will be able to reset global.turbo_disabled by writing 0 to no_turbo which used to work before on systems with flipping "turbo disabled" bit. This guarantees the driver state to remain in sync, but READ_ONCE() annotations need to be added in two places where global.turbo_disabled is accessed locklessly, so modify the driver to make that happen. Intel-SIG: commit 350cbb5d2f67 cpufreq: intel_pstate: Check turbo_is_disabled() in store_no_turbo(). Backport intel_pstate driver update for 6.6 from 6.11 Fixes: 0940f1a8011f ("cpufreq: intel_pstate: Do not update global.turbo_disabled after initialization") Closes: https://lore.kernel.org/linux-pm/bf3ebf1571a4788e97daf861eb493c12d42639a3.camel@xry111.site Suggested-by: Srinivas Pandruvada Reported-by: Xi Ruoyao Tested-by: Xi Ruoyao Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3155cd2be873..f5fe300a328a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1301,12 +1301,17 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, no_turbo = !!clamp_t(int, input, 0, 1); - if (no_turbo == global.no_turbo) - goto unlock_driver; - - if (global.turbo_disabled) { - pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); + WRITE_ONCE(global.turbo_disabled, turbo_is_disabled()); + if (global.turbo_disabled && !no_turbo) { + pr_notice("Turbo disabled by BIOS or unavailable on processor\n"); count = -EPERM; + if (global.no_turbo) + goto unlock_driver; + else + no_turbo = 1; + } + + if (no_turbo == global.no_turbo) { goto unlock_driver; } @@ -1764,7 +1769,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1929,7 +1934,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; return val; From 48c12476436cf5a27502a09fb26bf2ed507cc349 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:11:08 -0700 Subject: [PATCH 19/22] x86/cpufeatures: Add HWP highest perf change feature flag commit 7ea81936b85317aee8a73cd35d7f9cd6ce654dee upstream. When CPUID[6].EAX[15] is set to 1, this CPU supports notification for HWP (Hardware P-states) highest performance change. Add a feature flag to check if the CPU supports HWP highest performance change. Intel-SIG: commit 7ea81936b853 x86/cpufeatures: Add HWP highest perf change feature flag. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624161109.1427640-2-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 49f084fbde52..26f6ad9d6cc0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -380,6 +380,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* "" HWP Highest perf change */ #define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ From e20aa05ab075821ebaf2422df335c03184c8c795 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:27:14 -0700 Subject: [PATCH 20/22] cpufreq: intel_pstate: Replace boot_cpu_has() commit acfc429e42f09524653af52998548cd9317892a6 upstream. Replace boot_cpu_has() with cpu_feature_enabled(). Intel-SIG: commit acfc429e42f0 cpufreq: intel_pstate: Replace boot_cpu_has(). Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624162714.1431182-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f5fe300a328a..fcdee6208fdd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1631,7 +1631,7 @@ void notify_hwp_interrupt(void) unsigned long flags; u64 value; - if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!hwp_active || !cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; rdmsrl_safe(MSR_HWP_STATUS, &value); @@ -1660,7 +1660,7 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) unsigned long flags; bool cancel_work; - if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + if (!cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ From e33b9062b2d954f720d20a4761fd6efd54c45bf5 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 24 Jun 2024 09:11:09 -0700 Subject: [PATCH 21/22] cpufreq: intel_pstate: Support highest performance change interrupt commit d845cd901b28f1b6c02a208b864fc3fc46d14536 upstream. On some systems, the HWP (Hardware P-states) highest performance level can change from the value set at boot-up. This behavior can lead to two issues: - The 'cpuinfo_max_freq' within the 'cpufreq' sysfs will not reflect the CPU's highest achievable performance. - Even if the CPU's highest performance level is increased after booting, the CPU may not reach the full expected performance. The availability of this feature is indicated by the CPUID instruction: if CPUID[6].EAX[15] is set to 1, the feature is supported. When supported, setting bit 2 of the MSR_HWP_INTERRUPT register enables notifications of the highest performance level changes. Therefore, as part of enabling the HWP interrupt, bit 2 of the MSR_HWP_INTERRUPT should also be set when this feature is supported. Upon a change in the highest performance level, a new HWP interrupt is generated, with bit 3 of the MSR_HWP_STATUS register set, and the MSR_HWP_CAPABILITIES register is updated with the new highest performance limit. The processing of the interrupt is the same as the guaranteed performance change. Notify change to cpufreq core and update MSR_HWP_REQUEST with new performance limits. The current driver implementation already takes care of the highest performance change as part of: commit dfeeedc1bf57 ("cpufreq: intel_pstate: Update cpuinfo.max_freq on HWP_CAP changes") For example: Before highest performance change interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3700000 cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3700000 After highest performance changes interrupt: cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq 3900000 cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq 3900000 Intel-SIG: commit d845cd901b28 cpufreq: intel_pstate: Support highest performance change interrupt. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240624161109.1427640-3-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fcdee6208fdd..880aa552384e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1625,17 +1625,24 @@ static void intel_pstate_notify_work(struct work_struct *work) static DEFINE_RAW_SPINLOCK(hwp_notify_lock); static cpumask_t hwp_intr_enable_mask; +#define HWP_GUARANTEED_PERF_CHANGE_STATUS BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_STATUS BIT(3) + void notify_hwp_interrupt(void) { unsigned int this_cpu = smp_processor_id(); + u64 value, status_mask; unsigned long flags; - u64 value; if (!hwp_active || !cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; + status_mask = HWP_GUARANTEED_PERF_CHANGE_STATUS; + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS; + rdmsrl_safe(MSR_HWP_STATUS, &value); - if (!(value & 0x01)) + if (!(value & status_mask)) return; raw_spin_lock_irqsave(&hwp_notify_lock, flags); @@ -1674,10 +1681,14 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) cancel_delayed_work_sync(&cpudata->hwp_notify_work); } +#define HWP_GUARANTEED_PERF_CHANGE_REQ BIT(0) +#define HWP_HIGHEST_PERF_CHANGE_REQ BIT(2) + static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) { - /* Enable HWP notification interrupt for guaranteed performance change */ + /* Enable HWP notification interrupt for performance change */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { + u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ; unsigned long flags; raw_spin_lock_irqsave(&hwp_notify_lock, flags); @@ -1685,8 +1696,11 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask); raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); + if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) + interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } From f696d8ccb046d170792500c1d9bc1b72e69da744 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Kopper Date: Thu, 1 Aug 2024 13:41:50 -0300 Subject: [PATCH 22/22] cpufreq: intel_pstate: Update Balance performance EPP for Emerald Rapids commit 64a66f4a3c89b4602ee1e6cd23b28729fc4562b3 upstream. On Intel Emerald Rapids machines, we ship the Energy Performance Preference (EPP) default for balance_performance as 128. However, during an internal investigation together with Intel, we have determined that 32 is a more suitable value. This leads to significant improvements in both performance and energy: POV-Ray: 32% faster | 12% less energy OpenSSL: 12% faster | energy within 1% Build Linux Kernel: 29% faster | 18% less energy Therefore, we should move the default EPP for balance_performance to 32. This is in line with what has already been done for Sapphire Rapids. Intel-SIG: commit 64a66f4a3c89 cpufreq: intel_pstate: Update Balance performance EPP for Emerald Rapids. Backport intel_pstate driver update for 6.6 from 6.11 Signed-off-by: Pedro Henrique Kopper Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/Zqu6zjVMoiXwROBI@capivara Signed-off-by: Rafael J. Wysocki [ Yingbao Jia: amend commit log ] Signed-off-by: Yingbao Jia --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 880aa552384e..b3ee1e5b07de 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3409,6 +3409,7 @@ static const struct x86_cpu_id intel_epp_default[] = { */ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE, 115, 16)), {}