| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
| /* |
| * |
| * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| /* |
| * Metrics for power management |
| */ |
| |
| #include <mali_kbase.h> |
| #include <mali_kbase_pm.h> |
| #include <backend/gpu/mali_kbase_pm_internal.h> |
| |
| #if MALI_USE_CSF |
| #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" |
| #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> |
| #else |
| #include <backend/gpu/mali_kbase_jm_rb.h> |
| #endif /* !MALI_USE_CSF */ |
| |
| #include <backend/gpu/mali_kbase_pm_defs.h> |
| #include <mali_linux_trace.h> |
| |
| /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns |
| * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly |
| * under 11s. Exceeding this will cause overflow |
| */ |
| #define KBASE_PM_TIME_SHIFT 8 |
| |
| #if MALI_USE_CSF |
| /* To get the GPU_ACTIVE value in nano seconds unit */ |
| #define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) |
| #endif |
| |
| #ifdef CONFIG_MALI_MIDGARD_DVFS |
| static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) |
| { |
| unsigned long flags; |
| struct kbasep_pm_metrics_state *metrics; |
| |
| KBASE_DEBUG_ASSERT(timer != NULL); |
| |
| metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); |
| kbase_pm_get_dvfs_action(metrics->kbdev); |
| |
| spin_lock_irqsave(&metrics->lock, flags); |
| |
| if (metrics->timer_active) |
| hrtimer_start(timer, |
| HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), |
| HRTIMER_MODE_REL); |
| |
| spin_unlock_irqrestore(&metrics->lock, flags); |
| |
| return HRTIMER_NORESTART; |
| } |
| #endif /* CONFIG_MALI_MIDGARD_DVFS */ |
| |
| int kbasep_pm_metrics_init(struct kbase_device *kbdev) |
| { |
| #if MALI_USE_CSF |
| struct kbase_ipa_control_perf_counter perf_counter; |
| int err; |
| |
| /* One counter group */ |
| const size_t NUM_PERF_COUNTERS = 1; |
| |
| KBASE_DEBUG_ASSERT(kbdev != NULL); |
| kbdev->pm.backend.metrics.kbdev = kbdev; |
| kbdev->pm.backend.metrics.time_period_start = ktime_get(); |
| kbdev->pm.backend.metrics.values.time_busy = 0; |
| kbdev->pm.backend.metrics.values.time_idle = 0; |
| kbdev->pm.backend.metrics.values.time_in_protm = 0; |
| |
| perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; |
| |
| /* Normalize values by GPU frequency */ |
| perf_counter.gpu_norm = true; |
| |
| /* We need the GPU_ACTIVE counter, which is in the CSHW group */ |
| perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW; |
| |
| /* We need the GPU_ACTIVE counter */ |
| perf_counter.idx = GPU_ACTIVE_CNT_IDX; |
| |
| err = kbase_ipa_control_register( |
| kbdev, &perf_counter, NUM_PERF_COUNTERS, |
| &kbdev->pm.backend.metrics.ipa_control_client); |
| if (err) { |
| dev_err(kbdev->dev, |
| "Failed to register IPA with kbase_ipa_control: err=%d", |
| err); |
| return -1; |
| } |
| #else |
| KBASE_DEBUG_ASSERT(kbdev != NULL); |
| kbdev->pm.backend.metrics.kbdev = kbdev; |
| kbdev->pm.backend.metrics.time_period_start = ktime_get(); |
| |
| kbdev->pm.backend.metrics.gpu_active = false; |
| kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; |
| kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; |
| kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; |
| kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; |
| kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; |
| |
| kbdev->pm.backend.metrics.values.time_busy = 0; |
| kbdev->pm.backend.metrics.values.time_idle = 0; |
| kbdev->pm.backend.metrics.values.busy_cl[0] = 0; |
| kbdev->pm.backend.metrics.values.busy_cl[1] = 0; |
| kbdev->pm.backend.metrics.values.busy_gl = 0; |
| |
| #endif |
| spin_lock_init(&kbdev->pm.backend.metrics.lock); |
| |
| #ifdef CONFIG_MALI_MIDGARD_DVFS |
| hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, |
| HRTIMER_MODE_REL); |
| kbdev->pm.backend.metrics.timer.function = dvfs_callback; |
| kbdev->pm.backend.metrics.initialized = true; |
| kbase_pm_metrics_start(kbdev); |
| #endif /* CONFIG_MALI_MIDGARD_DVFS */ |
| |
| #if MALI_USE_CSF |
| /* The sanity check on the GPU_ACTIVE performance counter |
| * is skipped for Juno platforms that have timing problems. |
| */ |
| kbdev->pm.backend.metrics.skip_gpu_active_sanity_check = |
| of_machine_is_compatible("arm,juno"); |
| #endif |
| |
| return 0; |
| } |
| KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); |
| |
| void kbasep_pm_metrics_term(struct kbase_device *kbdev) |
| { |
| #ifdef CONFIG_MALI_MIDGARD_DVFS |
| unsigned long flags; |
| |
| KBASE_DEBUG_ASSERT(kbdev != NULL); |
| |
| spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); |
| kbdev->pm.backend.metrics.timer_active = false; |
| spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); |
| |
| hrtimer_cancel(&kbdev->pm.backend.metrics.timer); |
| kbdev->pm.backend.metrics.initialized = false; |
| #endif /* CONFIG_MALI_MIDGARD_DVFS */ |
| |
| #if MALI_USE_CSF |
| kbase_ipa_control_unregister( |
| kbdev, kbdev->pm.backend.metrics.ipa_control_client); |
| #endif |
| } |
| |
| KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); |
| |
| /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this |
| * function |
| */ |
| #if MALI_USE_CSF |
| #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) |
| static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) |
| { |
| int err; |
| u64 gpu_active_counter; |
| u64 protected_time; |
| ktime_t now; |
| |
| lockdep_assert_held(&kbdev->pm.backend.metrics.lock); |
| |
| /* Query IPA_CONTROL for the latest GPU-active and protected-time |
| * info. |
| */ |
| err = kbase_ipa_control_query( |
| kbdev, kbdev->pm.backend.metrics.ipa_control_client, |
| &gpu_active_counter, 1, &protected_time); |
| |
| /* Read the timestamp after reading the GPU_ACTIVE counter value. |
| * This ensures the time gap between the 2 reads is consistent for |
| * a meaningful comparison between the increment of GPU_ACTIVE and |
| * elapsed time. The lock taken inside kbase_ipa_control_query() |
| * function can cause lot of variation. |
| */ |
| now = ktime_get(); |
| |
| if (err) { |
| dev_err(kbdev->dev, |
| "Failed to query the increment of GPU_ACTIVE counter: err=%d", |
| err); |
| } else { |
| u64 diff_ns; |
| s64 diff_ns_signed; |
| u32 ns_time; |
| ktime_t diff = ktime_sub( |
| now, kbdev->pm.backend.metrics.time_period_start); |
| |
| diff_ns_signed = ktime_to_ns(diff); |
| |
| if (diff_ns_signed < 0) |
| return; |
| |
| diff_ns = (u64)diff_ns_signed; |
| |
| #if !IS_ENABLED(CONFIG_MALI_NO_MALI) |
| /* The GPU_ACTIVE counter shouldn't clock-up more time than has |
| * actually elapsed - but still some margin needs to be given |
| * when doing the comparison. There could be some drift between |
| * the CPU and GPU clock. |
| * |
| * Can do the check only in a real driver build, as an arbitrary |
| * value for GPU_ACTIVE can be fed into dummy model in no_mali |
| * configuration which may not correspond to the real elapsed |
| * time. |
| */ |
| if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { |
| /* Use a margin value that is approximately 1% of the time |
| * difference. |
| */ |
| u64 margin_ns = diff_ns >> 6; |
| if (gpu_active_counter > (diff_ns + margin_ns)) { |
| dev_info( |
| kbdev->dev, |
| "GPU activity takes longer than time interval: %llu ns > %llu ns", |
| (unsigned long long)gpu_active_counter, |
| (unsigned long long)diff_ns); |
| } |
| } |
| #endif |
| /* Calculate time difference in units of 256ns */ |
| ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT); |
| |
| /* Add protected_time to gpu_active_counter so that time in |
| * protected mode is included in the apparent GPU active time, |
| * then convert it from units of 1ns to units of 256ns, to |
| * match what JM GPUs use. The assumption is made here that the |
| * GPU is 100% busy while in protected mode, so we should add |
| * this since the GPU can't (and thus won't) update these |
| * counters while it's actually in protected mode. |
| * |
| * Perform the add after dividing each value down, to reduce |
| * the chances of overflows. |
| */ |
| protected_time >>= KBASE_PM_TIME_SHIFT; |
| gpu_active_counter >>= KBASE_PM_TIME_SHIFT; |
| gpu_active_counter += protected_time; |
| |
| /* Ensure the following equations don't go wrong if ns_time is |
| * slightly larger than gpu_active_counter somehow |
| */ |
| gpu_active_counter = MIN(gpu_active_counter, ns_time); |
| |
| kbdev->pm.backend.metrics.values.time_busy += |
| gpu_active_counter; |
| |
| kbdev->pm.backend.metrics.values.time_idle += |
| ns_time - gpu_active_counter; |
| |
| /* Also make time in protected mode available explicitly, |
| * so users of this data have this info, too. |
| */ |
| kbdev->pm.backend.metrics.values.time_in_protm += |
| protected_time; |
| } |
| |
| kbdev->pm.backend.metrics.time_period_start = now; |
| } |
| #endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ |
| #else |
| static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, |
| ktime_t now) |
| { |
| ktime_t diff; |
| |
| lockdep_assert_held(&kbdev->pm.backend.metrics.lock); |
| |
| diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); |
| if (ktime_to_ns(diff) < 0) |
| return; |
| |
| if (kbdev->pm.backend.metrics.gpu_active) { |
| u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); |
| |
| kbdev->pm.backend.metrics.values.time_busy += ns_time; |
| if (kbdev->pm.backend.metrics.active_cl_ctx[0]) |
| kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time; |
| if (kbdev->pm.backend.metrics.active_cl_ctx[1]) |
| kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time; |
| if (kbdev->pm.backend.metrics.active_gl_ctx[0]) |
| kbdev->pm.backend.metrics.values.busy_gl += ns_time; |
| if (kbdev->pm.backend.metrics.active_gl_ctx[1]) |
| kbdev->pm.backend.metrics.values.busy_gl += ns_time; |
| if (kbdev->pm.backend.metrics.active_gl_ctx[2]) |
| kbdev->pm.backend.metrics.values.busy_gl += ns_time; |
| } else { |
| kbdev->pm.backend.metrics.values.time_idle += |
| (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); |
| } |
| |
| kbdev->pm.backend.metrics.time_period_start = now; |
| } |
| #endif /* MALI_USE_CSF */ |
| |
| #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) |
| void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, |
| struct kbasep_pm_metrics *last, |
| struct kbasep_pm_metrics *diff) |
| { |
| struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; |
| unsigned long flags; |
| |
| spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); |
| #if MALI_USE_CSF |
| kbase_pm_get_dvfs_utilisation_calc(kbdev); |
| #else |
| kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); |
| #endif |
| |
| memset(diff, 0, sizeof(*diff)); |
| diff->time_busy = cur->time_busy - last->time_busy; |
| diff->time_idle = cur->time_idle - last->time_idle; |
| |
| #if MALI_USE_CSF |
| diff->time_in_protm = cur->time_in_protm - last->time_in_protm; |
| #else |
| diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; |
| diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; |
| diff->busy_gl = cur->busy_gl - last->busy_gl; |
| #endif |
| |
| *last = *cur; |
| |
| spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); |
| } |
| KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); |
| #endif |
| |
| #ifdef CONFIG_MALI_MIDGARD_DVFS |
| void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) |
| { |
| int utilisation; |
| struct kbasep_pm_metrics *diff; |
| #if !MALI_USE_CSF |
| int busy; |
| int util_gl_share; |
| int util_cl_share[2]; |
| #endif |
| |
| KBASE_DEBUG_ASSERT(kbdev != NULL); |
| |
| diff = &kbdev->pm.backend.metrics.dvfs_diff; |
| |
| kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, |
| diff); |
| |
| utilisation = (100 * diff->time_busy) / |
| max(diff->time_busy + diff->time_idle, 1u); |
| |
| #if !MALI_USE_CSF |
| busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); |
| |
| util_gl_share = (100 * diff->busy_gl) / busy; |
| util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; |
| util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; |
| |
| kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, |
| util_cl_share); |
| #else |
| /* Note that, at present, we don't pass protected-mode time to the |
| * platform here. It's unlikely to be useful, however, as the platform |
| * probably just cares whether the GPU is busy or not; time in |
| * protected mode is already added to busy-time at this point, though, |
| * so we should be good. |
| */ |
| kbase_platform_dvfs_event(kbdev, utilisation); |
| #endif |
| } |
| |
| bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) |
| { |
| bool isactive; |
| unsigned long flags; |
| |
| KBASE_DEBUG_ASSERT(kbdev != NULL); |
| |
| spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); |
| isactive = kbdev->pm.backend.metrics.timer_active; |
| spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); |
| |
| return isactive; |
| } |
| KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); |
| |
| void kbase_pm_metrics_start(struct kbase_device *kbdev) |
| { |
| unsigned long flags; |
| bool update = true; |
| |
| if (unlikely(!kbdev->pm.backend.metrics.initialized)) |
| return; |
| |
| spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); |
| if (!kbdev->pm.backend.metrics.timer_active) |
| kbdev->pm.backend.metrics.timer_active = true; |
| else |
| update = false; |
| spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); |
| |
| if (update) |
| hrtimer_start(&kbdev->pm.backend.metrics.timer, |
| HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), |
| HRTIMER_MODE_REL); |
| } |
| |
| void kbase_pm_metrics_stop(struct kbase_device *kbdev) |
| { |
| unsigned long flags; |
| bool update = true; |
| |
| if (unlikely(!kbdev->pm.backend.metrics.initialized)) |
| return; |
| |
| spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); |
| if (kbdev->pm.backend.metrics.timer_active) |
| kbdev->pm.backend.metrics.timer_active = false; |
| else |
| update = false; |
| spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); |
| |
| if (update) |
| hrtimer_cancel(&kbdev->pm.backend.metrics.timer); |
| } |
| |
| |
| #endif /* CONFIG_MALI_MIDGARD_DVFS */ |
| |
| #if !MALI_USE_CSF |
| /** |
| * kbase_pm_metrics_active_calc - Update PM active counts based on currently |
| * running atoms |
| * @kbdev: Device pointer |
| * |
| * The caller must hold kbdev->pm.backend.metrics.lock |
| */ |
| static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) |
| { |
| int js; |
| |
| lockdep_assert_held(&kbdev->pm.backend.metrics.lock); |
| |
| kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; |
| kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; |
| kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; |
| kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; |
| kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; |
| kbdev->pm.backend.metrics.gpu_active = false; |
| |
| for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { |
| struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); |
| |
| /* Head atom may have just completed, so if it isn't running |
| * then try the next atom |
| */ |
| if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) |
| katom = kbase_gpu_inspect(kbdev, js, 1); |
| |
| if (katom && katom->gpu_rb_state == |
| KBASE_ATOM_GPU_RB_SUBMITTED) { |
| if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { |
| int device_nr = (katom->core_req & |
| BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) |
| ? katom->device_nr : 0; |
| if (!WARN_ON(device_nr >= 2)) |
| kbdev->pm.backend.metrics. |
| active_cl_ctx[device_nr] = 1; |
| } else { |
| kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; |
| trace_sysgraph(SGR_ACTIVE, 0, js); |
| } |
| kbdev->pm.backend.metrics.gpu_active = true; |
| } else { |
| trace_sysgraph(SGR_INACTIVE, 0, js); |
| } |
| } |
| } |
| |
| /* called when job is submitted to or removed from a GPU slot */ |
| void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) |
| { |
| unsigned long flags; |
| ktime_t now; |
| |
| lockdep_assert_held(&kbdev->hwaccess_lock); |
| |
| spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); |
| |
| if (!timestamp) { |
| now = ktime_get(); |
| timestamp = &now; |
| } |
| |
| /* Track how much of time has been spent busy or idle. For JM GPUs, |
| * this also evaluates how long CL and/or GL jobs have been busy for. |
| */ |
| kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); |
| |
| kbase_pm_metrics_active_calc(kbdev); |
| spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); |
| } |
| #endif /* !MALI_USE_CSF */ |