| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
| /* |
| * |
| * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| #include <mali_kbase.h> |
| #include "mali_kbase_config_defaults.h" |
| #include <mali_kbase_ctx_sched.h> |
| #include <mali_kbase_reset_gpu.h> |
| #include <mali_kbase_as_fault_debugfs.h> |
| #include "mali_kbase_csf.h" |
| #include <tl/mali_kbase_tracepoints.h> |
| #include <backend/gpu/mali_kbase_pm_internal.h> |
| #include <linux/export.h> |
| #include <csf/mali_kbase_csf_registers.h> |
| #include <uapi/gpu/arm/midgard/mali_base_kernel.h> |
| #include <mali_kbase_hwaccess_time.h> |
| #include "mali_kbase_csf_tiler_heap.h" |
| |
| /* Value to indicate that a queue group is not groups_to_schedule list */ |
| #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) |
| |
| /* This decides the upper limit on the waiting time for the Scheduler |
| * to exit the sleep state. Usually the value of autosuspend_delay is |
| * expected to be around 100 milli seconds. |
| */ |
| #define MAX_AUTO_SUSPEND_DELAY_MS (5000) |
| |
| /* Maximum number of endpoints which may run tiler jobs. */ |
| #define CSG_TILER_MAX ((u8)1) |
| |
| /* Maximum dynamic CSG slot priority value */ |
| #define MAX_CSG_SLOT_PRIORITY ((u8)15) |
| |
| /* CSF scheduler time slice value */ |
| #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ |
| |
| /* |
| * CSF scheduler time threshold for converting "tock" requests into "tick" if |
| * they come too close to the end of a tick interval. This avoids scheduling |
| * twice in a row. |
| */ |
| #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \ |
| CSF_SCHEDULER_TIME_TICK_MS |
| |
| #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \ |
| msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS) |
| |
| /* Nanoseconds per millisecond */ |
| #define NS_PER_MS ((u64)1000 * 1000) |
| |
| /* |
| * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock" |
| * requests are not serviced immediately, but shall wait for a minimum time in |
| * order to reduce load on the CSF scheduler thread. |
| */ |
| #define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */ |
| |
| /* CS suspended and is idle (empty ring buffer) */ |
| #define CS_IDLE_FLAG (1 << 0) |
| |
| /* CS suspended and is wait for a CQS condition */ |
| #define CS_WAIT_SYNC_FLAG (1 << 1) |
| |
| /* A GPU address space slot is reserved for MCU. */ |
| #define NUM_RESERVED_AS_SLOTS (1) |
| |
| /* Time to wait for completion of PING req before considering MCU as hung */ |
| #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) |
| |
| /* Heap deferral time in ms from a CSG suspend to be included in reclaim scan list. The |
| * value corresponds to realtime priority CSGs. Other priority are of derived time value |
| * from this, with the realtime case the highest delay. |
| */ |
| #define HEAP_RECLAIM_PRIO_DEFERRAL_MS (1000) |
| |
| /* Additional heap deferral time in ms if a CSG suspended is in state of WAIT_SYNC */ |
| #define HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS (200) |
| |
| /* Tiler heap reclaim count size for limiting a count run length */ |
| #define HEAP_RECLAIM_COUNT_BATCH_SIZE (HEAP_SHRINKER_BATCH << 6) |
| |
| /* Tiler heap reclaim scan (free) method size for limiting a scan run length */ |
| #define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7) |
| |
| static int scheduler_group_schedule(struct kbase_queue_group *group); |
| static void remove_group_from_idle_wait(struct kbase_queue_group *const group); |
| static |
| void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *const group, |
| enum kbase_csf_group_state run_state); |
| static struct kbase_queue_group *scheduler_get_protm_enter_async_group( |
| struct kbase_device *const kbdev, |
| struct kbase_queue_group *const group); |
| static struct kbase_queue_group *get_tock_top_group( |
| struct kbase_csf_scheduler *const scheduler); |
| static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); |
| static int suspend_active_queue_groups(struct kbase_device *kbdev, |
| unsigned long *slot_mask); |
| static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, |
| bool system_suspend); |
| static void schedule_in_cycle(struct kbase_queue_group *group, bool force); |
| static bool queue_group_scheduled_locked(struct kbase_queue_group *group); |
| |
| #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) |
| |
| /** |
| * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and |
| * scheduling tick/tock to complete before the group deschedule. |
| * |
| * @group: Pointer to the group that is being descheduled. |
| * |
| * This function blocks the descheduling of the group until the dump on fault is |
| * completed and scheduling tick/tock has completed. |
| * To deschedule an on slot group CSG termination request would be sent and that |
| * might time out if the fault had occurred and also potentially affect the state |
| * being dumped. Moreover the scheduler lock would be held, so the access to debugfs |
| * files would get blocked. |
| * Scheduler lock and 'kctx->csf.lock' are released before this function starts |
| * to wait. When a request sent by the Scheduler to the FW times out, Scheduler |
| * would also wait for the dumping to complete and release the Scheduler lock |
| * before the wait. Meanwhile Userspace can try to delete the group, this function |
| * would ensure that the group doesn't exit the Scheduler until scheduling |
| * tick/tock has completed. Though very unlikely, group deschedule can be triggered |
| * from multiple threads around the same time and after the wait Userspace thread |
| * can win the race and get the group descheduled and free the memory for group |
| * pointer before the other threads wake up and notice that group has already been |
| * descheduled. To avoid the freeing in such a case, a sort of refcount is used |
| * for the group which is incremented & decremented across the wait. |
| */ |
| static void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) |
| { |
| #if IS_ENABLED(CONFIG_DEBUG_FS) |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kctx->csf.lock); |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) |
| return; |
| |
| while ((!kbase_debug_csf_fault_dump_complete(kbdev) || (scheduler->state == SCHED_BUSY)) && |
| queue_group_scheduled_locked(group)) { |
| group->deschedule_deferred_cnt++; |
| mutex_unlock(&scheduler->lock); |
| mutex_unlock(&kctx->csf.lock); |
| kbase_debug_csf_fault_wait_completion(kbdev); |
| mutex_lock(&kctx->csf.lock); |
| mutex_lock(&scheduler->lock); |
| group->deschedule_deferred_cnt--; |
| } |
| #endif |
| } |
| |
| /** |
| * schedule_actions_trigger_df() - Notify the client about the fault and |
| * wait for the dumping to complete. |
| * |
| * @kbdev: Pointer to the device |
| * @kctx: Pointer to the context associated with the CSG slot for which |
| * the timeout was seen. |
| * @error: Error code indicating the type of timeout that occurred. |
| * |
| * This function notifies the Userspace client waiting for the faults and wait |
| * for the Client to complete the dumping. |
| * The function is called only from Scheduling tick/tock when a request sent by |
| * the Scheduler to FW times out or from the protm event work item of the group |
| * when the protected mode entry request times out. |
| * In the latter case there is no wait done as scheduler lock would be released |
| * immediately. In the former case the function waits and releases the scheduler |
| * lock before the wait. It has been ensured that the Scheduler view of the groups |
| * won't change meanwhile, so no group can enter/exit the Scheduler, become |
| * runnable or go off slot. |
| */ |
| static void schedule_actions_trigger_df(struct kbase_device *kbdev, struct kbase_context *kctx, |
| enum dumpfault_error_type error) |
| { |
| #if IS_ENABLED(CONFIG_DEBUG_FS) |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (!kbase_debug_csf_fault_notify(kbdev, kctx, error)) |
| return; |
| |
| if (unlikely(scheduler->state != SCHED_BUSY)) { |
| WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE); |
| return; |
| } |
| |
| mutex_unlock(&scheduler->lock); |
| kbase_debug_csf_fault_wait_completion(kbdev); |
| mutex_lock(&scheduler->lock); |
| WARN_ON(scheduler->state != SCHED_BUSY); |
| #endif |
| } |
| |
| #ifdef KBASE_PM_RUNTIME |
| /** |
| * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the |
| * sleeping state. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function waits until the Scheduler has exited the sleep state and |
| * it is called when an on-slot group is terminated or when the suspend |
| * buffer of an on-slot group needs to be captured. |
| * |
| * Return: 0 when the wait is successful, otherwise an error code. |
| */ |
| static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| int autosuspend_delay = kbdev->dev->power.autosuspend_delay; |
| unsigned int sleep_exit_wait_time; |
| long remaining; |
| int ret = 0; |
| |
| lockdep_assert_held(&scheduler->lock); |
| WARN_ON(scheduler->state != SCHED_SLEEPING); |
| |
| /* No point in waiting if autosuspend_delay value is negative. |
| * For the negative value of autosuspend_delay Driver will directly |
| * go for the suspend of Scheduler, but the autosuspend_delay value |
| * could have been changed after the sleep was initiated. |
| */ |
| if (autosuspend_delay < 0) |
| return -EINVAL; |
| |
| if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS) |
| autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS; |
| |
| /* Usually Scheduler would remain in sleeping state until the |
| * auto-suspend timer expires and all active CSGs are suspended. |
| */ |
| sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms; |
| |
| remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time); |
| |
| while ((scheduler->state == SCHED_SLEEPING) && !ret) { |
| mutex_unlock(&scheduler->lock); |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, |
| (scheduler->state != SCHED_SLEEPING), |
| remaining); |
| mutex_lock(&scheduler->lock); |
| if (!remaining && (scheduler->state == SCHED_SLEEPING)) |
| ret = -ETIMEDOUT; |
| } |
| |
| return ret; |
| } |
| |
| /** |
| * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function will force the Scheduler to exit the sleep state by doing the |
| * wake up of MCU and suspension of on-slot groups. It is called at the time of |
| * system suspend. |
| * |
| * Return: 0 on success. |
| */ |
| static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| int ret = 0; |
| |
| lockdep_assert_held(&scheduler->lock); |
| WARN_ON(scheduler->state != SCHED_SLEEPING); |
| WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active); |
| |
| kbase_pm_lock(kbdev); |
| ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev); |
| kbase_pm_unlock(kbdev); |
| if (ret) { |
| dev_warn(kbdev->dev, |
| "[%llu] Wait for MCU wake up failed on forced scheduler suspend", |
| kbase_backend_get_cycle_cnt(kbdev)); |
| goto out; |
| } |
| |
| ret = suspend_active_groups_on_powerdown(kbdev, true); |
| if (ret) |
| goto out; |
| |
| kbase_pm_lock(kbdev); |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| kbdev->pm.backend.gpu_sleep_mode_active = false; |
| kbdev->pm.backend.gpu_wakeup_override = false; |
| kbase_pm_update_state(kbdev); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| ret = kbase_pm_wait_for_desired_state(kbdev); |
| kbase_pm_unlock(kbdev); |
| if (ret) { |
| dev_warn(kbdev->dev, |
| "[%llu] Wait for pm state change failed on forced scheduler suspend", |
| kbase_backend_get_cycle_cnt(kbdev)); |
| goto out; |
| } |
| |
| scheduler->state = SCHED_SUSPENDED; |
| |
| return 0; |
| |
| out: |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| kbdev->pm.backend.exit_gpu_sleep_mode = true; |
| kbdev->pm.backend.gpu_wakeup_override = false; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| kbase_csf_scheduler_invoke_tick(kbdev); |
| |
| return ret; |
| } |
| #endif |
| |
| /** |
| * tick_timer_callback() - Callback function for the scheduling tick hrtimer |
| * |
| * @timer: Pointer to the scheduling tick hrtimer |
| * |
| * This function will enqueue the scheduling tick work item for immediate |
| * execution, if it has not been queued already. |
| * |
| * Return: enum value to indicate that timer should not be restarted. |
| */ |
| static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) |
| { |
| struct kbase_device *kbdev = container_of(timer, struct kbase_device, |
| csf.scheduler.tick_timer); |
| |
| kbase_csf_scheduler_tick_advance(kbdev); |
| return HRTIMER_NORESTART; |
| } |
| |
| /** |
| * start_tick_timer() - Start the scheduling tick hrtimer. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function will start the scheduling tick hrtimer and is supposed to |
| * be called only from the tick work item function. The tick hrtimer should |
| * not be active already. |
| */ |
| static void start_tick_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| WARN_ON(scheduler->tick_timer_active); |
| if (likely(!work_pending(&scheduler->tick_work))) { |
| scheduler->tick_timer_active = true; |
| |
| hrtimer_start(&scheduler->tick_timer, |
| HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), |
| HRTIMER_MODE_REL); |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| /** |
| * cancel_tick_timer() - Cancel the scheduling tick hrtimer |
| * |
| * @kbdev: Pointer to the device |
| */ |
| static void cancel_tick_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| scheduler->tick_timer_active = false; |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| hrtimer_cancel(&scheduler->tick_timer); |
| } |
| |
| /** |
| * enqueue_tick_work() - Enqueue the scheduling tick work item |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function will queue the scheduling tick work item for immediate |
| * execution. This shall only be called when both the tick hrtimer and tick |
| * work item are not active/pending. |
| */ |
| static void enqueue_tick_work(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| kbase_csf_scheduler_invoke_tick(kbdev); |
| } |
| |
| static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) |
| { |
| WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); |
| } |
| |
| static int acquire_doorbell(struct kbase_device *kbdev) |
| { |
| int doorbell_nr; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| doorbell_nr = find_first_zero_bit( |
| kbdev->csf.scheduler.doorbell_inuse_bitmap, |
| CSF_NUM_DOORBELL); |
| |
| if (doorbell_nr >= CSF_NUM_DOORBELL) |
| return KBASEP_USER_DB_NR_INVALID; |
| |
| set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); |
| |
| return doorbell_nr; |
| } |
| |
| static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { |
| release_doorbell(kbdev, group->doorbell_nr); |
| group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; |
| } |
| } |
| |
| static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, |
| struct kbase_queue *queue) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| mutex_lock(&kbdev->csf.reg_lock); |
| |
| if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { |
| queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; |
| /* After this the dummy page would be mapped in */ |
| unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, |
| queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); |
| } |
| |
| mutex_unlock(&kbdev->csf.reg_lock); |
| } |
| |
| static void assign_user_doorbell_to_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID) |
| group->doorbell_nr = acquire_doorbell(kbdev); |
| } |
| |
| static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, |
| struct kbase_queue *const queue) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| mutex_lock(&kbdev->csf.reg_lock); |
| |
| /* If bind operation for the queue hasn't completed yet, then the |
| * CSI can't be programmed for the queue |
| * (even in stopped state) and so the doorbell also can't be assigned |
| * to it. |
| */ |
| if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) && |
| (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) { |
| WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID); |
| queue->doorbell_nr = queue->group->doorbell_nr; |
| |
| /* After this the real Hw doorbell page would be mapped in */ |
| unmap_mapping_range( |
| kbdev->csf.db_filp->f_inode->i_mapping, |
| queue->db_file_offset << PAGE_SHIFT, |
| PAGE_SIZE, 1); |
| } |
| |
| mutex_unlock(&kbdev->csf.reg_lock); |
| } |
| |
| static void scheduler_doorbell_init(struct kbase_device *kbdev) |
| { |
| int doorbell_nr; |
| |
| bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, |
| CSF_NUM_DOORBELL); |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| /* Reserve doorbell 0 for use by kernel driver */ |
| doorbell_nr = acquire_doorbell(kbdev); |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR); |
| } |
| |
| /** |
| * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs |
| * |
| * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
| * |
| * This function updates the EXTRACT offset for all queues which groups have |
| * been assigned a physical slot. These values could be used to detect a |
| * queue's true idleness status. This is intended to be an additional check |
| * on top of the GPU idle notification to account for race conditions. |
| * This function is supposed to be called only when GPU idle notification |
| * interrupt is received. |
| */ |
| static void update_on_slot_queues_offsets(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| /* All CSGs have the same number of CSs */ |
| size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num; |
| size_t i; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| /* csg_slots_idle_mask is not used here for the looping, as it could get |
| * updated concurrently when Scheduler re-evaluates the idle status of |
| * the CSGs for which idle notification was received previously. |
| */ |
| for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) { |
| struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; |
| size_t j; |
| |
| if (WARN_ON(!group)) |
| continue; |
| |
| for (j = 0; j < max_streams; ++j) { |
| struct kbase_queue *const queue = group->bound_queues[j]; |
| |
| if (queue && queue->user_io_addr) { |
| u64 const *const output_addr = |
| (u64 const *)(queue->user_io_addr + PAGE_SIZE); |
| |
| queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; |
| } |
| } |
| } |
| } |
| |
| static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler) |
| { |
| atomic_set(&scheduler->gpu_no_longer_idle, false); |
| queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work); |
| } |
| |
| void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| int non_idle_offslot_grps; |
| bool can_suspend_on_idle; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); |
| can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL, |
| ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32)); |
| |
| if (!non_idle_offslot_grps) { |
| if (can_suspend_on_idle) { |
| /* fast_gpu_idle_handling is protected by the |
| * interrupt_lock, which would prevent this from being |
| * updated whilst gpu_idle_worker() is executing. |
| */ |
| scheduler->fast_gpu_idle_handling = |
| (kbdev->csf.gpu_idle_hysteresis_ms == 0) || |
| !kbase_csf_scheduler_all_csgs_idle(kbdev); |
| |
| /* The GPU idle worker relies on update_on_slot_queues_offsets() to have |
| * finished. It's queued before to reduce the time it takes till execution |
| * but it'll eventually be blocked by the scheduler->interrupt_lock. |
| */ |
| enqueue_gpu_idle_work(scheduler); |
| |
| /* The extract offsets are unused in fast GPU idle handling */ |
| if (!scheduler->fast_gpu_idle_handling) |
| update_on_slot_queues_offsets(kbdev); |
| } |
| } else { |
| /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ |
| kbase_csf_scheduler_tick_advance_nolock(kbdev); |
| } |
| } |
| |
| u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev) |
| { |
| u32 nr_active_csgs; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); |
| |
| nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, |
| kbdev->csf.global_iface.group_num); |
| |
| return nr_active_csgs; |
| } |
| |
| u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev) |
| { |
| u32 nr_active_csgs; |
| unsigned long flags; |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| return nr_active_csgs; |
| } |
| |
| /** |
| * csg_slot_in_use - returns true if a queue group has been programmed on a |
| * given CSG slot. |
| * |
| * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
| * @slot: Index/number of the CSG slot in question. |
| * |
| * Return: the interface is actively engaged flag. |
| * |
| * Note: Caller must hold the scheduler lock. |
| */ |
| static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL); |
| } |
| |
| static bool queue_group_suspended_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return (group->run_state == KBASE_CSF_GROUP_SUSPENDED || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); |
| } |
| |
| static bool queue_group_idle_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return (group->run_state == KBASE_CSF_GROUP_IDLE || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); |
| } |
| |
| static bool on_slot_group_idle_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return (group->run_state == KBASE_CSF_GROUP_IDLE); |
| } |
| |
| static bool can_schedule_idle_group(struct kbase_queue_group *group) |
| { |
| return (on_slot_group_idle_locked(group) || |
| (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)); |
| } |
| |
| static bool queue_group_scheduled(struct kbase_queue_group *group) |
| { |
| return (group->run_state != KBASE_CSF_GROUP_INACTIVE && |
| group->run_state != KBASE_CSF_GROUP_TERMINATED && |
| group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); |
| } |
| |
| static bool queue_group_scheduled_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return queue_group_scheduled(group); |
| } |
| |
| /** |
| * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode. |
| * |
| * @kbdev: Pointer to the GPU device |
| * |
| * This function waits for the GPU to exit protected mode which is confirmed |
| * when active_protm_grp is set to NULL. |
| * |
| * Return: true on success, false otherwise. |
| */ |
| static bool scheduler_protm_wait_quit(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| long remaining; |
| bool success = true; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt)); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); |
| |
| if (unlikely(!remaining)) { |
| struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp; |
| struct kbase_context *kctx = group ? group->kctx : NULL; |
| |
| dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped", |
| kbase_backend_get_cycle_cnt(kbdev), |
| kbdev->csf.fw_timeout_ms); |
| schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT); |
| success = false; |
| } |
| |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining)); |
| |
| return success; |
| } |
| |
| /** |
| * scheduler_force_protm_exit() - Force GPU to exit protected mode. |
| * |
| * @kbdev: Pointer to the GPU device |
| * |
| * This function sends a ping request to the firmware and waits for the GPU |
| * to exit protected mode. |
| * |
| * If the GPU does not exit protected mode, it is considered as hang. |
| * A GPU reset would then be triggered. |
| */ |
| static void scheduler_force_protm_exit(struct kbase_device *kbdev) |
| { |
| unsigned long flags; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| kbase_csf_firmware_ping(kbdev); |
| |
| if (scheduler_protm_wait_quit(kbdev)) |
| return; |
| |
| dev_err(kbdev->dev, "Possible GPU hang in Protected mode"); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| if (kbdev->csf.scheduler.active_protm_grp) { |
| dev_err(kbdev->dev, |
| "Group-%d of context %d_%d ran in protected mode for too long on slot %d", |
| kbdev->csf.scheduler.active_protm_grp->handle, |
| kbdev->csf.scheduler.active_protm_grp->kctx->tgid, |
| kbdev->csf.scheduler.active_protm_grp->kctx->id, |
| kbdev->csf.scheduler.active_protm_grp->csg_nr); |
| } |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| /* The GPU could be stuck in Protected mode. To prevent a hang, |
| * a GPU reset is performed. |
| */ |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| } |
| |
| /** |
| * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up |
| * automatically for periodic tasks. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the |
| * CSF scheduler lock to already have been held. |
| * |
| * Return: true if the scheduler is configured to wake up periodically |
| */ |
| static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return kbdev->csf.scheduler.timer_enabled; |
| } |
| |
| /** |
| * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for |
| * Scheduler |
| * |
| * @kbdev: Pointer to the device |
| * @suspend_handler: Handler code for how to handle a suspend that might occur. |
| * |
| * This function is usually called when Scheduler needs to be activated. |
| * The PM reference count is acquired for the Scheduler and the power on |
| * of GPU is initiated. |
| * |
| * Return: 0 if successful or a negative error code on failure. |
| */ |
| static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, |
| enum kbase_pm_suspend_handler suspend_handler) |
| { |
| unsigned long flags; |
| u32 prev_count; |
| int ret = 0; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| prev_count = kbdev->csf.scheduler.pm_active_count; |
| if (!WARN_ON(prev_count == U32_MAX)) |
| kbdev->csf.scheduler.pm_active_count++; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| |
| /* On 0 => 1, make a pm_ctx_active request */ |
| if (!prev_count) { |
| ret = kbase_pm_context_active_handle_suspend(kbdev, |
| suspend_handler); |
| /* Invoke the PM state machines again as the change in MCU |
| * desired status, due to the update of scheduler.pm_active_count, |
| * may be missed by the thread that called pm_wait_for_desired_state() |
| */ |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| if (ret) |
| kbdev->csf.scheduler.pm_active_count--; |
| kbase_pm_update_state(kbdev); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| } |
| |
| return ret; |
| } |
| |
| #ifdef KBASE_PM_RUNTIME |
| /** |
| * scheduler_pm_active_after_sleep() - Acquire the PM reference count for |
| * Scheduler |
| * |
| * @kbdev: Pointer to the device |
| * @flags: flags containing previous interrupt state |
| * |
| * This function is called when Scheduler needs to be activated from the |
| * sleeping state. |
| * The PM reference count is acquired for the Scheduler and the wake up of |
| * MCU is initiated. It resets the flag that indicates to the MCU state |
| * machine that MCU needs to be put in sleep state. |
| * |
| * Note: This function shall be called with hwaccess lock held and it will |
| * release that lock. |
| * |
| * Return: zero when the PM reference was taken and non-zero when the |
| * system is being suspending/suspended. |
| */ |
| static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, |
| unsigned long flags) |
| { |
| u32 prev_count; |
| int ret = 0; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| lockdep_assert_held(&kbdev->hwaccess_lock); |
| |
| prev_count = kbdev->csf.scheduler.pm_active_count; |
| if (!WARN_ON(prev_count == U32_MAX)) |
| kbdev->csf.scheduler.pm_active_count++; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| |
| /* On 0 => 1, make a pm_ctx_active request */ |
| if (!prev_count) { |
| ret = kbase_pm_context_active_handle_suspend(kbdev, |
| KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| if (ret) |
| kbdev->csf.scheduler.pm_active_count--; |
| else |
| kbdev->pm.backend.gpu_sleep_mode_active = false; |
| kbase_pm_update_state(kbdev); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| } |
| |
| return ret; |
| } |
| #endif |
| |
| /** |
| * scheduler_pm_idle() - Release the PM reference count held by Scheduler |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function is usually called after Scheduler is suspended. |
| * The PM reference count held by the Scheduler is released to trigger the |
| * power down of GPU. |
| */ |
| static void scheduler_pm_idle(struct kbase_device *kbdev) |
| { |
| unsigned long flags; |
| u32 prev_count; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| prev_count = kbdev->csf.scheduler.pm_active_count; |
| if (!WARN_ON(prev_count == 0)) |
| kbdev->csf.scheduler.pm_active_count--; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| |
| if (prev_count == 1) { |
| kbase_pm_context_idle(kbdev); |
| /* Invoke the PM state machines again as the change in MCU |
| * desired status, due to the update of scheduler.pm_active_count, |
| * may be missed by the thread that called pm_wait_for_desired_state() |
| */ |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| kbase_pm_update_state(kbdev); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| } |
| } |
| |
| #ifdef KBASE_PM_RUNTIME |
| /** |
| * scheduler_pm_idle_before_sleep() - Release the PM reference count and |
| * trigger the tranistion to sleep state. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function is called on the GPU idle notification. It releases the |
| * Scheduler's PM reference count and sets the flag to indicate to the |
| * MCU state machine that MCU needs to be put in sleep state. |
| */ |
| static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev) |
| { |
| unsigned long flags; |
| u32 prev_count; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| prev_count = kbdev->csf.scheduler.pm_active_count; |
| if (!WARN_ON(prev_count == 0)) |
| kbdev->csf.scheduler.pm_active_count--; |
| kbdev->pm.backend.gpu_sleep_mode_active = true; |
| kbdev->pm.backend.exit_gpu_sleep_mode = false; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| |
| if (prev_count == 1) { |
| kbase_pm_context_idle(kbdev); |
| /* Invoke the PM state machines again as the change in MCU |
| * desired status, due to the update of scheduler.pm_active_count, |
| * may be missed by the thread that called pm_wait_for_desired_state() |
| */ |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| kbase_pm_update_state(kbdev); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| } |
| } |
| #endif |
| |
| static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| int ret; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if ((scheduler->state != SCHED_SUSPENDED) && |
| (scheduler->state != SCHED_SLEEPING)) |
| return; |
| |
| if (scheduler->state == SCHED_SUSPENDED) { |
| dev_dbg(kbdev->dev, |
| "Re-activating the Scheduler after suspend"); |
| ret = scheduler_pm_active_handle_suspend(kbdev, |
| KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); |
| } else { |
| #ifdef KBASE_PM_RUNTIME |
| unsigned long flags; |
| |
| dev_dbg(kbdev->dev, |
| "Re-activating the Scheduler out of sleep"); |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| ret = scheduler_pm_active_after_sleep(kbdev, flags); |
| /* hwaccess_lock is released in the previous function call. */ |
| #endif |
| } |
| |
| if (ret) { |
| /* GPUCORE-29850 would add the handling for the case where |
| * Scheduler could not be activated due to system suspend. |
| */ |
| dev_info(kbdev->dev, |
| "Couldn't wakeup Scheduler due to system suspend"); |
| return; |
| } |
| |
| scheduler->state = SCHED_INACTIVE; |
| |
| if (kick) |
| scheduler_enable_tick_timer_nolock(kbdev); |
| } |
| |
| static void scheduler_suspend(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { |
| dev_dbg(kbdev->dev, "Suspending the Scheduler"); |
| scheduler_pm_idle(kbdev); |
| scheduler->state = SCHED_SUSPENDED; |
| } |
| } |
| |
| /** |
| * update_idle_suspended_group_state() - Move the queue group to a non-idle |
| * suspended state. |
| * @group: Pointer to the queue group. |
| * |
| * This function is called to change the state of queue group to non-idle |
| * suspended state, if the group was suspended when all the queues bound to it |
| * became empty or when some queues got blocked on a sync wait & others became |
| * empty. The group is also moved to the runnable list from idle wait list in |
| * the latter case. |
| * So the function gets called when a queue is kicked or sync wait condition |
| * gets satisfied. |
| */ |
| static void update_idle_suspended_group_state(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int new_val; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { |
| remove_group_from_idle_wait(group); |
| insert_group_to_runnable(scheduler, group, |
| KBASE_CSF_GROUP_SUSPENDED); |
| } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED; |
| |
| /* If scheduler is not suspended and the given group's |
| * static priority (reflected by the scan_seq_num) is inside |
| * the current tick slot-range, or there are some on_slot |
| * idle groups, schedule an async tock. |
| */ |
| if (scheduler->state != SCHED_SUSPENDED) { |
| unsigned long flags; |
| int n_idle; |
| int n_used; |
| int n_slots = |
| group->kctx->kbdev->csf.global_iface.group_num; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| n_idle = bitmap_weight(scheduler->csg_slots_idle_mask, |
| n_slots); |
| n_used = bitmap_weight(scheduler->csg_inuse_bitmap, |
| n_slots); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, |
| flags); |
| |
| if (n_idle || |
| n_used < scheduler->num_csg_slots_for_tick || |
| group->scan_seq_num < |
| scheduler->num_csg_slots_for_tick) |
| schedule_in_cycle(group, true); |
| } |
| } else |
| return; |
| |
| new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, |
| new_val); |
| } |
| |
| int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| if (slot_num >= 0) { |
| if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != |
| group)) |
| return -1; |
| } |
| |
| return slot_num; |
| } |
| |
| int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| unsigned long flags; |
| int slot_num; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| slot_num = kbase_csf_scheduler_group_get_slot_locked(group); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| return slot_num; |
| } |
| |
| /* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot. |
| * |
| * @group: GPU queue group to be checked |
| * |
| * This function needs to be called with scheduler's lock held |
| * |
| * Return: true if @group is on slot. |
| */ |
| static bool kbasep_csf_scheduler_group_is_on_slot_locked( |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (slot_num >= 0) { |
| if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != |
| group)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| if (WARN_ON(slot_num < 0)) |
| return false; |
| |
| return test_bit(slot_num, scheduler->csgs_events_enable_mask); |
| } |
| |
| struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( |
| struct kbase_device *kbdev, int slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); |
| |
| return kbdev->csf.scheduler.csg_slots[slot].resident_group; |
| } |
| |
| static int halt_stream_sync(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| struct kbase_csf_cmd_stream_info *stream; |
| int csi_index = queue->csi_index; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| unsigned long flags; |
| |
| if (WARN_ON(!group) || |
| WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return -EINVAL; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| ginfo = &global_iface->groups[group->csg_nr]; |
| stream = &ginfo->streams[csi_index]; |
| |
| if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == |
| CS_REQ_STATE_START) { |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) |
| == CS_ACK_STATE_START), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d", |
| kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, |
| csi_index, group->handle, group->csg_nr); |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| |
| |
| return -ETIMEDOUT; |
| } |
| |
| remaining = |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| } |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| /* Set state to STOP */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| |
| kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u); |
| |
| /* Timed wait */ |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) |
| == CS_ACK_STATE_STOP), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d", |
| kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, |
| queue->csi_index, group->handle, group->csg_nr); |
| |
| /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU |
| * will be reset as a work-around. |
| */ |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| |
| |
| } |
| return (remaining) ? 0 : -ETIMEDOUT; |
| } |
| |
| static bool can_halt_stream(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| unsigned long flags; |
| bool can_halt; |
| int slot; |
| |
| if (!queue_group_scheduled(group)) |
| return true; |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| slot = kbase_csf_scheduler_group_get_slot_locked(group); |
| can_halt = (slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, |
| flags); |
| |
| return can_halt; |
| } |
| |
| /** |
| * sched_halt_stream() - Stop a GPU queue when its queue group is not running |
| * on a CSG slot. |
| * @queue: Pointer to the GPU queue to stop. |
| * |
| * This function handles stopping gpu queues for groups that are either not on |
| * a CSG slot or are on the slot but undergoing transition to |
| * resume or suspend states. |
| * It waits until the queue group is scheduled on a slot and starts running, |
| * which is needed as groups that were suspended may need to resume all queues |
| * that were enabled and running at the time of suspension. |
| * |
| * Return: 0 on success, or negative on failure. |
| */ |
| static int sched_halt_stream(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = |
| &kbdev->csf.scheduler; |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| bool retry_needed = false; |
| bool retried = false; |
| long remaining; |
| int slot; |
| int err = 0; |
| const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); |
| |
| if (WARN_ON(!group)) |
| return -EINVAL; |
| |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| lockdep_assert_held(&scheduler->lock); |
| |
| slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| if (slot >= 0) { |
| WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); |
| |
| if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { |
| dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", |
| queue->csi_index, group->handle); |
| retry_needed = true; |
| } |
| } |
| retry: |
| /* Update the group state so that it can get scheduled soon */ |
| update_idle_suspended_group_state(group); |
| |
| mutex_unlock(&scheduler->lock); |
| |
| /* This function is called when the queue group is either not on a CSG |
| * slot or is on the slot but undergoing transition. |
| * |
| * To stop the queue, the function needs to wait either for the queue |
| * group to be assigned a CSG slot (and that slot has to reach the |
| * running state) or for the eviction of the queue group from the |
| * scheduler's list. |
| * |
| * In order to evaluate the latter condition, the function doesn't |
| * really need to lock the scheduler, as any update to the run_state |
| * of the queue group by sched_evict_group() would be visible due |
| * to implicit barriers provided by the kernel waitqueue macros. |
| * |
| * The group pointer cannot disappear meanwhile, as the high level |
| * CSF context is locked. Therefore, the scheduler would be |
| * the only one to update the run_state of the group. |
| */ |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, can_halt_stream(kbdev, group), |
| kbase_csf_timeout_in_jiffies(group_schedule_timeout)); |
| |
| mutex_lock(&scheduler->lock); |
| |
| if (remaining && queue_group_scheduled_locked(group)) { |
| slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| /* If the group is still on slot and slot is in running state |
| * then explicitly stop the CSI of the |
| * queue. Otherwise there are different cases to consider |
| * |
| * - If the queue group was already undergoing transition to |
| * resume/start state when this function was entered then it |
| * would not have disabled the CSI of the |
| * queue being stopped and the previous wait would have ended |
| * once the slot was in a running state with CS |
| * interface still enabled. |
| * Now the group is going through another transition either |
| * to a suspend state or to a resume state (it could have |
| * been suspended before the scheduler lock was grabbed). |
| * In both scenarios need to wait again for the group to |
| * come on a slot and that slot to reach the running state, |
| * as that would guarantee that firmware will observe the |
| * CSI as disabled. |
| * |
| * - If the queue group was either off the slot or was |
| * undergoing transition to suspend state on entering this |
| * function, then the group would have been resumed with the |
| * queue's CSI in disabled state. |
| * So now if the group is undergoing another transition |
| * (after the resume) then just need to wait for the state |
| * bits in the ACK register of CSI to be |
| * set to STOP value. It is expected that firmware will |
| * process the stop/disable request of the CS |
| * interface after resuming the group before it processes |
| * another state change request of the group. |
| */ |
| if ((slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { |
| err = halt_stream_sync(queue); |
| } else if (retry_needed && !retried) { |
| retried = true; |
| goto retry; |
| } else if (slot >= 0) { |
| struct kbase_csf_global_iface *global_iface = |
| &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &global_iface->groups[slot]; |
| struct kbase_csf_cmd_stream_info *stream = |
| &ginfo->streams[queue->csi_index]; |
| u32 cs_req = |
| kbase_csf_firmware_cs_input_read(stream, CS_REQ); |
| |
| if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != |
| CS_REQ_STATE_STOP)) { |
| /* Timed wait */ |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET( |
| kbase_csf_firmware_cs_output( |
| stream, CS_ACK)) == |
| CS_ACK_STATE_STOP), |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms)); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, |
| "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d", |
| kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, |
| queue->csi_index, |
| group->handle, group->csg_nr); |
| |
| |
| err = -ETIMEDOUT; |
| } |
| } |
| } |
| } else if (!remaining) { |
| dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)", |
| kbase_backend_get_cycle_cnt(kbdev), |
| group->handle, queue->csi_index, |
| group_schedule_timeout); |
| |
| |
| err = -ETIMEDOUT; |
| } |
| |
| return err; |
| } |
| |
| /** |
| * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU |
| * queue needs to be stopped. |
| * |
| * @queue: Pointer the GPU command queue |
| * |
| * This function is called when the CSI to which GPU queue is bound needs to |
| * be stopped. For that the corresponding queue group needs to be resident on |
| * the CSG slot and MCU firmware should be running. So this function makes the |
| * Scheduler exit the sleeping or suspended state. |
| */ |
| static void scheduler_activate_on_queue_stop(struct kbase_queue *queue) |
| { |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| |
| scheduler_wakeup(kbdev, true); |
| |
| /* Wait for MCU firmware to start running */ |
| if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { |
| dev_warn( |
| kbdev->dev, |
| "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d", |
| kbase_backend_get_cycle_cnt(kbdev), |
| queue->csi_index, queue->group->handle, |
| queue->kctx->tgid, queue->kctx->id, |
| queue->group->csg_nr); |
| } |
| } |
| |
| int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) |
| { |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_queue_group *group = queue->group; |
| bool const cs_enabled = queue->enabled; |
| int err = 0; |
| |
| if (WARN_ON(!group)) |
| return -EINVAL; |
| |
| kbase_reset_gpu_assert_failed_or_prevented(kbdev); |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| |
| queue->enabled = false; |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); |
| |
| if (cs_enabled && queue_group_scheduled_locked(group)) { |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| int slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| /* Since the group needs to be resumed in order to stop the queue, |
| * check if GPU needs to be powered up. |
| */ |
| scheduler_activate_on_queue_stop(queue); |
| |
| if ((slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) |
| err = halt_stream_sync(queue); |
| else |
| err = sched_halt_stream(queue); |
| |
| unassign_user_doorbell_from_queue(kbdev, queue); |
| } |
| |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| return err; |
| } |
| |
| static void update_hw_active(struct kbase_queue *queue, bool active) |
| { |
| #if IS_ENABLED(CONFIG_MALI_NO_MALI) |
| if (queue && queue->enabled) { |
| u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); |
| |
| output_addr[CS_ACTIVE / sizeof(u32)] = active; |
| } |
| #else |
| CSTD_UNUSED(queue); |
| CSTD_UNUSED(active); |
| #endif |
| } |
| |
| static void program_cs_extract_init(struct kbase_queue *queue) |
| { |
| u64 *input_addr = (u64 *)queue->user_io_addr; |
| u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); |
| |
| input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = |
| output_addr[CS_EXTRACT_LO / sizeof(u64)]; |
| } |
| |
| static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, |
| struct kbase_queue *queue) |
| { |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| u32 const glb_version = kbdev->csf.global_iface.version; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| /* If cs_trace_command not supported, nothing to program */ |
| if (glb_version < kbase_csf_interface_version(1, 1, 0)) |
| return; |
| |
| /* Program for cs_trace if enabled. In the current arrangement, it is |
| * possible for the context to enable the cs_trace after some queues |
| * has been registered in cs_trace in disabled state. This is tracked by |
| * the queue's trace buffer base address, which had been validated at the |
| * queue's register_ex call. |
| */ |
| if (kbase_csf_scheduler_queue_has_trace(queue)) { |
| u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( |
| queue->trace_cfg, queue->kctx->as_nr); |
| |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, |
| queue->trace_buffer_size); |
| |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, |
| queue->trace_buffer_base & U32_MAX); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, |
| queue->trace_buffer_base >> 32); |
| |
| kbase_csf_firmware_cs_input( |
| stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, |
| queue->trace_offset_ptr & U32_MAX); |
| kbase_csf_firmware_cs_input( |
| stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, |
| queue->trace_offset_ptr >> 32); |
| } else { |
| /* Place the configuration to the disabled condition */ |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0); |
| } |
| } |
| |
| static void program_cs(struct kbase_device *kbdev, |
| struct kbase_queue *queue, bool ring_csg_doorbell) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| struct kbase_csf_cmd_stream_info *stream; |
| int csi_index = queue->csi_index; |
| unsigned long flags; |
| u64 user_input; |
| u64 user_output; |
| |
| if (WARN_ON(!group)) |
| return; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; |
| |
| if (WARN_ON(csi_index < 0) || |
| WARN_ON(csi_index >= ginfo->stream_num)) |
| return; |
| |
| assign_user_doorbell_to_queue(kbdev, queue); |
| if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) |
| return; |
| |
| WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); |
| |
| if (queue->enabled && queue_group_suspended_locked(group)) |
| program_cs_extract_init(queue); |
| |
| stream = &ginfo->streams[csi_index]; |
| |
| kbase_csf_firmware_cs_input(stream, CS_BASE_LO, |
| queue->base_addr & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_BASE_HI, |
| queue->base_addr >> 32); |
| kbase_csf_firmware_cs_input(stream, CS_SIZE, |
| queue->size); |
| |
| user_input = (queue->reg->start_pfn << PAGE_SHIFT); |
| kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, |
| user_input & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, |
| user_input >> 32); |
| |
| user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); |
| kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, |
| user_output & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, |
| user_output >> 32); |
| |
| kbase_csf_firmware_cs_input(stream, CS_CONFIG, |
| (queue->doorbell_nr << 8) | (queue->priority & 0xF)); |
| |
| /* Program the queue's cs_trace configuration */ |
| program_cs_trace_cfg(stream, queue); |
| |
| /* Enable all interrupts for now */ |
| kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| /* The fault bit could be misaligned between CS_REQ and CS_ACK if the |
| * acknowledgment was deferred due to dump on fault and the group was |
| * removed from the CSG slot before the fault could be acknowledged. |
| */ |
| if (queue->enabled) { |
| u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK); |
| |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK); |
| } |
| |
| /* |
| * Enable the CSG idle notification once the CS's ringbuffer |
| * becomes empty or the CS becomes sync_idle, waiting sync update |
| * or protected mode switch. |
| */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, |
| CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); |
| |
| /* Set state to START/STOP */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, |
| ring_csg_doorbell); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); |
| |
| update_hw_active(queue, true); |
| } |
| |
| int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| bool const cs_enabled = queue->enabled; |
| int err = 0; |
| bool evicted = false; |
| |
| kbase_reset_gpu_assert_prevented(kbdev); |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| |
| if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) |
| return -EINVAL; |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| |
| #if IS_ENABLED(CONFIG_DEBUG_FS) |
| if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) { |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| return -EBUSY; |
| } |
| #endif |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, |
| group->run_state); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, |
| queue->status_wait); |
| |
| if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { |
| err = -EIO; |
| evicted = true; |
| } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { |
| dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", |
| queue->csi_index, group->handle); |
| } else { |
| err = scheduler_group_schedule(group); |
| |
| if (!err) { |
| queue->enabled = true; |
| if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { |
| if (cs_enabled) { |
| /* In normal situation, when a queue is |
| * already running, the queue update |
| * would be a doorbell kick on user |
| * side. However, if such a kick is |
| * shortly following a start or resume, |
| * the queue may actually in transition |
| * hence the said kick would enter the |
| * kernel as the hw_active flag is yet |
| * to be set. The scheduler needs to |
| * give a kick to the corresponding |
| * user door-bell on such a case. |
| */ |
| kbase_csf_ring_cs_user_doorbell(kbdev, queue); |
| } else |
| program_cs(kbdev, queue, true); |
| } |
| queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work, |
| msecs_to_jiffies(kbase_get_timeout_ms( |
| kbdev, CSF_FIRMWARE_PING_TIMEOUT))); |
| } |
| } |
| |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| if (evicted) |
| kbase_csf_term_descheduled_queue_group(group); |
| |
| return err; |
| } |
| |
| static enum kbase_csf_csg_slot_state update_csg_slot_status( |
| struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_csg_slot *csg_slot = |
| &kbdev->csf.scheduler.csg_slots[slot]; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 state; |
| enum kbase_csf_csg_slot_state slot_state; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, |
| CSG_ACK)); |
| slot_state = atomic_read(&csg_slot->state); |
| |
| switch (slot_state) { |
| case CSG_SLOT_READY2RUN: |
| if ((state == CSG_ACK_STATE_START) || |
| (state == CSG_ACK_STATE_RESUME)) { |
| slot_state = CSG_SLOT_RUNNING; |
| atomic_set(&csg_slot->state, slot_state); |
| csg_slot->trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group, |
| state); |
| dev_dbg(kbdev->dev, "Group %u running on slot %d\n", |
| csg_slot->resident_group->handle, slot); |
| } |
| break; |
| case CSG_SLOT_DOWN2STOP: |
| if ((state == CSG_ACK_STATE_SUSPEND) || |
| (state == CSG_ACK_STATE_TERMINATE)) { |
| slot_state = CSG_SLOT_STOPPED; |
| atomic_set(&csg_slot->state, slot_state); |
| csg_slot->trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); |
| dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", |
| csg_slot->resident_group->handle, slot); |
| } |
| break; |
| case CSG_SLOT_DOWN2STOP_TIMEDOUT: |
| case CSG_SLOT_READY2RUN_TIMEDOUT: |
| case CSG_SLOT_READY: |
| case CSG_SLOT_RUNNING: |
| case CSG_SLOT_STOPPED: |
| break; |
| default: |
| dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state); |
| break; |
| } |
| |
| return slot_state; |
| } |
| |
| static bool csg_slot_running(struct kbase_device *kbdev, s8 slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING); |
| } |
| |
| static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) |
| { |
| enum kbase_csf_csg_slot_state slot_state; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| slot_state = update_csg_slot_status(kbdev, slot); |
| |
| return (slot_state == CSG_SLOT_STOPPED || |
| slot_state == CSG_SLOT_READY); |
| } |
| |
| static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 state; |
| |
| state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, |
| CSG_ACK)); |
| |
| if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); |
| dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_csg_slot *csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| s8 slot; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| slot = group->csg_nr; |
| |
| /* When in transition, wait for it to complete */ |
| if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { |
| long remaining = |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| |
| dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| csg_slot_running(kbdev, slot), remaining); |
| if (!remaining) |
| dev_warn(kbdev->dev, |
| "[%llu] slot %d timeout (%d ms) on up-running\n", |
| kbase_backend_get_cycle_cnt(kbdev), |
| slot, kbdev->csf.fw_timeout_ms); |
| } |
| |
| if (csg_slot_running(kbdev, slot)) { |
| unsigned long flags; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &global_iface->groups[slot]; |
| u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : |
| CSG_REQ_STATE_TERMINATE; |
| |
| dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", |
| suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| /* Set state to SUSPEND/TERMINATE */ |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, |
| CSG_REQ_STATE_MASK); |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, |
| flags); |
| atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); |
| csg_slot[slot].trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); |
| |
| KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( |
| kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); |
| } |
| } |
| |
| static void term_csg_slot(struct kbase_queue_group *group) |
| { |
| halt_csg_slot(group, false); |
| } |
| |
| static void suspend_csg_slot(struct kbase_queue_group *group) |
| { |
| halt_csg_slot(group, true); |
| } |
| |
| /** |
| * evaluate_sync_update() - Evaluate the sync wait condition the GPU command |
| * queue has been blocked on. |
| * |
| * @queue: Pointer to the GPU command queue |
| * |
| * Return: true if sync wait condition is satisfied. |
| */ |
| static bool evaluate_sync_update(struct kbase_queue *queue) |
| { |
| struct kbase_vmap_struct *mapping; |
| bool updated = false; |
| u32 *sync_ptr; |
| u32 sync_wait_cond; |
| u32 sync_current_val; |
| struct kbase_device *kbdev; |
| |
| if (WARN_ON(!queue)) |
| return false; |
| |
| kbdev = queue->kctx->kbdev; |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, |
| &mapping); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue, |
| queue->sync_ptr); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue, |
| queue->blocked_reason); |
| |
| if (!sync_ptr) { |
| dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", |
| queue->sync_ptr); |
| goto out; |
| } |
| |
| sync_wait_cond = |
| CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); |
| |
| WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && |
| (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE)); |
| |
| sync_current_val = READ_ONCE(*sync_ptr); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue, |
| sync_current_val); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue, |
| queue->sync_value); |
| |
| if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && |
| (sync_current_val > queue->sync_value)) || |
| ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && |
| (sync_current_val <= queue->sync_value))) { |
| /* The sync wait condition is satisfied so the group to which |
| * queue is bound can be re-scheduled. |
| */ |
| updated = true; |
| } else { |
| dev_dbg(queue->kctx->kbdev->dev, |
| "sync memory not updated yet(%u)", sync_current_val); |
| } |
| |
| kbase_phy_alloc_mapping_put(queue->kctx, mapping); |
| out: |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated); |
| return updated; |
| } |
| |
| /** |
| * save_slot_cs() - Save the state for blocked GPU command queue. |
| * |
| * @ginfo: Pointer to the CSG interface used by the group |
| * the queue is bound to. |
| * @queue: Pointer to the GPU command queue. |
| * |
| * This function will check if GPU command queue is blocked on a sync wait and |
| * evaluate the wait condition. If the wait condition isn't satisfied it would |
| * save the state needed to reevaluate the condition in future. |
| * The group to which queue is bound shall be in idle state. |
| * |
| * Return: true if the queue is blocked on a sync wait operation. |
| */ |
| static |
| bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, |
| struct kbase_queue *queue) |
| { |
| struct kbase_csf_cmd_stream_info *const stream = |
| &ginfo->streams[queue->csi_index]; |
| u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); |
| bool is_waiting = false; |
| |
| #if IS_ENABLED(CONFIG_DEBUG_FS) |
| u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO); |
| |
| cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32; |
| queue->saved_cmd_ptr = cmd_ptr; |
| #endif |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, |
| queue, status); |
| |
| if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { |
| queue->status_wait = status; |
| queue->sync_ptr = kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_POINTER_LO); |
| queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; |
| queue->sync_value = kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_VALUE); |
| |
| queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_SCOREBOARDS)); |
| queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_BLOCKED_REASON)); |
| |
| if (!evaluate_sync_update(queue)) { |
| is_waiting = true; |
| } else { |
| /* Sync object already got updated & met the condition |
| * thus it doesn't need to be reevaluated and so can |
| * clear the 'status_wait' here. |
| */ |
| queue->status_wait = 0; |
| } |
| } else { |
| /* Invalidate wait status info that would have been recorded if |
| * this queue was blocked when the group (in idle state) was |
| * suspended previously. After that the group could have been |
| * unblocked due to the kicking of another queue bound to it & |
| * so the wait status info would have stuck with this queue. |
| */ |
| queue->status_wait = 0; |
| } |
| |
| return is_waiting; |
| } |
| |
| static void schedule_in_cycle(struct kbase_queue_group *group, bool force) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* Only try to schedule work for this event if no requests are pending, |
| * otherwise the function will end up canceling previous work requests, |
| * and scheduler is configured to wake up periodically (or the schedule |
| * of work needs to be enforced in situation such as entering into |
| * protected mode). |
| */ |
| if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { |
| dev_dbg(kbdev->dev, "Kicking async for group %d\n", |
| group->handle); |
| kbase_csf_scheduler_invoke_tock(kbdev); |
| } |
| } |
| |
| static |
| void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *const group, |
| enum kbase_csf_group_state run_state) |
| { |
| struct kbase_context *const kctx = group->kctx; |
| struct kbase_device *const kbdev = kctx->kbdev; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); |
| |
| if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) |
| return; |
| |
| group->run_state = run_state; |
| |
| if (run_state == KBASE_CSF_GROUP_RUNNABLE) |
| group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; |
| |
| list_add_tail(&group->link, |
| &kctx->csf.sched.runnable_groups[group->priority]); |
| kctx->csf.sched.num_runnable_grps++; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group, |
| kctx->csf.sched.num_runnable_grps); |
| |
| /* Add the kctx if not yet in runnable kctxs */ |
| if (kctx->csf.sched.num_runnable_grps == 1) { |
| /* First runnable csg, adds to the runnable_kctxs */ |
| INIT_LIST_HEAD(&kctx->csf.link); |
| list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u); |
| } |
| |
| scheduler->total_runnable_grps++; |
| |
| if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && |
| (scheduler->total_runnable_grps == 1 || |
| scheduler->state == SCHED_SUSPENDED || |
| scheduler->state == SCHED_SLEEPING)) { |
| dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); |
| /* Fire a scheduling to start the time-slice */ |
| enqueue_tick_work(kbdev); |
| } else |
| schedule_in_cycle(group, false); |
| |
| /* Since a new group has become runnable, check if GPU needs to be |
| * powered up. |
| */ |
| scheduler_wakeup(kbdev, false); |
| } |
| |
| static |
| void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *group, |
| enum kbase_csf_group_state run_state) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_queue_group *new_head_grp; |
| struct list_head *list = |
| &kctx->csf.sched.runnable_groups[group->priority]; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(!queue_group_scheduled_locked(group)); |
| |
| group->run_state = run_state; |
| list_del_init(&group->link); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| /* The below condition will be true when the group running in protected |
| * mode is being terminated but the protected mode exit interrupt wasn't |
| * received. This can happen if the FW got stuck during protected mode |
| * for some reason (like GPU page fault or some internal error). |
| * In normal cases FW is expected to send the protected mode exit |
| * interrupt before it handles the CSG termination request. |
| */ |
| if (unlikely(scheduler->active_protm_grp == group)) { |
| /* CSG slot cleanup should have happened for the pmode group */ |
| WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group)); |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); |
| /* Initiate a GPU reset, in case it wasn't initiated yet, |
| * in order to rectify the anomaly. |
| */ |
| if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kctx->kbdev); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT, |
| scheduler->active_protm_grp, 0u); |
| scheduler->active_protm_grp = NULL; |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| if (scheduler->top_grp == group) { |
| /* |
| * Note: this disables explicit rotation in the next scheduling |
| * cycle. However, removing the top_grp is the same as an |
| * implicit rotation (e.g. if we instead rotated the top_ctx |
| * and then remove top_grp) |
| * |
| * This implicit rotation is assumed by the scheduler rotate |
| * functions. |
| */ |
| scheduler->top_grp = NULL; |
| |
| /* |
| * Trigger a scheduling tock for a CSG containing protected |
| * content in case there has been any in order to minimize |
| * latency. |
| */ |
| group = scheduler_get_protm_enter_async_group(kctx->kbdev, |
| NULL); |
| if (group) |
| schedule_in_cycle(group, true); |
| } |
| |
| kctx->csf.sched.num_runnable_grps--; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group, |
| kctx->csf.sched.num_runnable_grps); |
| new_head_grp = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_queue_group, link) : |
| NULL; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u); |
| |
| if (kctx->csf.sched.num_runnable_grps == 0) { |
| struct kbase_context *new_head_kctx; |
| struct list_head *kctx_list = &scheduler->runnable_kctxs; |
| /* drop the kctx */ |
| list_del_init(&kctx->csf.link); |
| if (scheduler->top_ctx == kctx) |
| scheduler->top_ctx = NULL; |
| KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u); |
| new_head_kctx = (!list_empty(kctx_list)) ? |
| list_first_entry(kctx_list, struct kbase_context, csf.link) : |
| NULL; |
| KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u); |
| } |
| |
| WARN_ON(scheduler->total_runnable_grps == 0); |
| scheduler->total_runnable_grps--; |
| if (!scheduler->total_runnable_grps) { |
| dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); |
| cancel_tick_timer(kctx->kbdev); |
| WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); |
| if (scheduler->state != SCHED_SUSPENDED) |
| enqueue_gpu_idle_work(scheduler); |
| } |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, |
| scheduler->num_active_address_spaces | |
| (((u64)scheduler->total_runnable_grps) << 32)); |
| } |
| |
| static void insert_group_to_idle_wait(struct kbase_queue_group *const group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE); |
| |
| list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); |
| kctx->csf.sched.num_idle_wait_grps++; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group, |
| kctx->csf.sched.num_idle_wait_grps); |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; |
| dev_dbg(kctx->kbdev->dev, |
| "Group-%d suspended on sync_wait, total wait_groups: %u\n", |
| group->handle, kctx->csf.sched.num_idle_wait_grps); |
| } |
| |
| static void remove_group_from_idle_wait(struct kbase_queue_group *const group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct list_head *list = &kctx->csf.sched.idle_wait_groups; |
| struct kbase_queue_group *new_head_grp; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); |
| |
| list_del_init(&group->link); |
| WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); |
| kctx->csf.sched.num_idle_wait_grps--; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group, |
| kctx->csf.sched.num_idle_wait_grps); |
| new_head_grp = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_queue_group, link) : |
| NULL; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u); |
| group->run_state = KBASE_CSF_GROUP_INACTIVE; |
| } |
| |
| static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (WARN_ON(!group)) |
| return; |
| |
| remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE); |
| insert_group_to_idle_wait(group); |
| } |
| |
| static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { |
| int new_val = |
| atomic_dec_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); |
| } |
| } |
| |
| static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->csg_nr < 0); |
| |
| if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { |
| int new_val = |
| atomic_dec_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val); |
| } |
| } |
| |
| static void update_offslot_non_idle_cnt_on_grp_suspend( |
| struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->state == SCHED_BUSY) { |
| /* active phase or, async entering the protected mode */ |
| if (group->prepared_seq_num >= |
| scheduler->non_idle_scanout_grps) { |
| /* At scanout, it was tagged as on-slot idle */ |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_inc_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, |
| group, new_val); |
| } |
| } else { |
| if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_dec_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, |
| group, new_val); |
| } |
| } |
| } else { |
| /* async phases */ |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_inc_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, |
| new_val); |
| } |
| } |
| } |
| |
| static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) |
| { |
| bool cs_empty; |
| bool cs_idle; |
| u32 sb_status = 0; |
| |
| struct kbase_device const *const kbdev = queue->group->kctx->kbdev; |
| struct kbase_csf_global_iface const *const iface = |
| &kbdev->csf.global_iface; |
| |
| u32 glb_version = iface->version; |
| |
| u64 const *input_addr = (u64 const *)queue->user_io_addr; |
| u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); |
| |
| if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { |
| /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ |
| struct kbase_csf_cmd_stream_group_info const *const ginfo = |
| &kbdev->csf.global_iface.groups[queue->group->csg_nr]; |
| struct kbase_csf_cmd_stream_info const *const stream = |
| &ginfo->streams[queue->csi_index]; |
| |
| sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_SCOREBOARDS)); |
| } |
| |
| cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == |
| output_addr[CS_EXTRACT_LO / sizeof(u64)]); |
| cs_idle = cs_empty && (!sb_status); |
| |
| return cs_idle; |
| } |
| |
| static void detach_from_sched_reclaim_mgr(struct kbase_context *kctx) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; |
| struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (!list_empty(&heap_info->mgr_link)) { |
| WARN_ON(!heap_info->flags); |
| list_del_init(&heap_info->mgr_link); |
| |
| if (heap_info->flags & CSF_CTX_RECLAIM_CANDI_FLAG) |
| WARN_ON(atomic_sub_return(heap_info->nr_est_pages, |
| &scheduler->reclaim_mgr.est_cand_pages) < 0); |
| if (heap_info->flags & CSF_CTX_RECLAIM_SCAN_FLAG) |
| WARN_ON(atomic_sub_return(heap_info->nr_scan_pages, |
| &scheduler->reclaim_mgr.mgr_scan_pages) < 0); |
| |
| dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_detach: ctx_%d_%d, flags = 0x%x\n", |
| kctx->tgid, kctx->id, heap_info->flags); |
| /* Clear on detaching */ |
| heap_info->nr_est_pages = 0; |
| heap_info->nr_scan_pages = 0; |
| heap_info->flags = 0; |
| } |
| } |
| |
| static void attach_to_sched_reclaim_mgr(struct kbase_context *kctx) |
| { |
| struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info; |
| struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (WARN_ON(!list_empty(&heap_info->mgr_link))) |
| list_del_init(&heap_info->mgr_link); |
| |
| list_add_tail(&heap_info->mgr_link, &scheduler->reclaim_mgr.candidate_ctxs); |
| |
| /* Read the kctx's tiler heap estimate of pages, this separates it away |
| * from the kctx's tiler heap side updates/changes. The value remains static |
| * for the duration of this kctx on the reclaim manager's candidate_ctxs list. |
| */ |
| heap_info->nr_est_pages = (u32)atomic_read(&kctx->csf.tiler_heaps.est_count_pages); |
| atomic_add(heap_info->nr_est_pages, &scheduler->reclaim_mgr.est_cand_pages); |
| |
| heap_info->attach_jiffies = jiffies; |
| heap_info->flags = CSF_CTX_RECLAIM_CANDI_FLAG; |
| |
| dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages = %u\n", |
| kctx->tgid, kctx->id, heap_info->nr_est_pages); |
| } |
| |
| static void update_kctx_heap_info_on_grp_on_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| heap_info->on_slot_grps++; |
| /* If the kctx transitioned on-slot CSGs: 0 => 1, detach the kctx scheduler->reclaim_mgr */ |
| if (heap_info->on_slot_grps == 1) { |
| dev_dbg(kctx->kbdev->dev, |
| "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager\n", |
| group->kctx->tgid, group->kctx->id, group->handle); |
| |
| detach_from_sched_reclaim_mgr(kctx); |
| } |
| } |
| |
| static void update_kctx_heap_info_on_grp_evict(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info; |
| struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; |
| const u32 num_groups = kctx->kbdev->csf.global_iface.group_num; |
| u32 on_slot_grps = 0; |
| u32 i; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* Group eviction from the scheduler is a bit more complex, but fairly less |
| * frequent in operations. Taking the opportunity to actually count the |
| * on-slot CSGs from the given kctx, for robustness and clearer code logic. |
| */ |
| for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { |
| struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; |
| struct kbase_queue_group *grp = csg_slot->resident_group; |
| |
| if (unlikely(!grp)) |
| continue; |
| |
| if (grp->kctx == kctx) |
| on_slot_grps++; |
| } |
| |
| heap_info->on_slot_grps = on_slot_grps; |
| |
| /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */ |
| if (!heap_info->on_slot_grps) { |
| if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) { |
| /* The kctx has other operational CSGs, attach it if not yet done */ |
| if (list_empty(&heap_info->mgr_link)) { |
| dev_dbg(kctx->kbdev->dev, |
| "CSG_%d_%d_%d evict, add kctx to reclaim manager\n", |
| group->kctx->tgid, group->kctx->id, group->handle); |
| |
| attach_to_sched_reclaim_mgr(kctx); |
| } |
| } else { |
| /* The kctx is a zombie after the group eviction, drop it out */ |
| dev_dbg(kctx->kbdev->dev, |
| "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager\n", |
| group->kctx->tgid, group->kctx->id, group->handle); |
| |
| detach_from_sched_reclaim_mgr(kctx); |
| } |
| } |
| } |
| |
| static void update_kctx_heap_info_on_grp_suspend(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| if (!WARN_ON(heap_info->on_slot_grps == 0)) |
| heap_info->on_slot_grps--; |
| /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */ |
| if (heap_info->on_slot_grps == 0) { |
| dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager\n", |
| group->kctx->tgid, group->kctx->id, group->handle); |
| |
| attach_to_sched_reclaim_mgr(kctx); |
| } |
| } |
| |
| static void save_csg_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| u32 state; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; |
| |
| state = |
| CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); |
| |
| if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && |
| (state != CSG_ACK_STATE_TERMINATE))) { |
| u32 max_streams = ginfo->stream_num; |
| u32 i; |
| bool sync_wait = false; |
| bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & |
| CSG_STATUS_STATE_IDLE_MASK; |
| #if IS_ENABLED(CONFIG_MALI_NO_MALI) |
| for (i = 0; i < max_streams; i++) |
| update_hw_active(group->bound_queues[i], false); |
| #endif /* CONFIG_MALI_NO_MALI */ |
| for (i = 0; idle && i < max_streams; i++) { |
| struct kbase_queue *const queue = |
| group->bound_queues[i]; |
| |
| if (!queue || !queue->enabled) |
| continue; |
| |
| if (save_slot_cs(ginfo, queue)) |
| sync_wait = true; |
| else { |
| /* Need to confirm if ringbuffer of the GPU |
| * queue is empty or not. A race can arise |
| * between the flush of GPU queue and suspend |
| * of CSG. If a queue is flushed after FW has |
| * set the IDLE bit in CSG_STATUS_STATE, then |
| * Scheduler will incorrectly consider CSG |
| * as idle. And there may not be any further |
| * flush call for the GPU queue, which would |
| * have de-idled the CSG. |
| */ |
| idle = confirm_cmd_buf_empty(queue); |
| } |
| } |
| |
| if (idle) { |
| /* Take the suspended group out of the runnable_groups |
| * list of the context and move it to the |
| * idle_wait_groups list. |
| */ |
| if (sync_wait) |
| deschedule_idle_wait_group(scheduler, group); |
| else { |
| group->run_state = |
| KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; |
| dev_dbg(kbdev->dev, "Group-%d suspended: idle", |
| group->handle); |
| } |
| } else { |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED; |
| } |
| |
| update_offslot_non_idle_cnt_on_grp_suspend(group); |
| update_kctx_heap_info_on_grp_suspend(group); |
| } |
| } |
| |
| /* Cleanup_csg_slot after it has been vacated, ready for next csg run. |
| * Return whether there is a kctx address fault associated with the group |
| * for which the clean-up is done. |
| */ |
| static bool cleanup_csg_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| s8 slot; |
| struct kbase_csf_csg_slot *csg_slot; |
| unsigned long flags; |
| u32 i; |
| bool as_fault = false; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return as_fault; |
| |
| slot = group->csg_nr; |
| csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; |
| ginfo = &global_iface->groups[slot]; |
| |
| /* Now loop through all the bound CSs, and clean them via a stop */ |
| for (i = 0; i < ginfo->stream_num; i++) { |
| struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; |
| |
| if (group->bound_queues[i]) { |
| if (group->bound_queues[i]->enabled) { |
| kbase_csf_firmware_cs_input_mask(stream, |
| CS_REQ, CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| } |
| |
| unassign_user_doorbell_from_queue(kbdev, |
| group->bound_queues[i]); |
| } |
| } |
| |
| unassign_user_doorbell_from_group(kbdev, group); |
| |
| /* The csg does not need cleanup other than drop its AS */ |
| spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); |
| as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); |
| kbase_ctx_sched_release_ctx(kctx); |
| if (unlikely(group->faulted)) |
| as_fault = true; |
| spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); |
| |
| /* now marking the slot is vacant */ |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; |
| clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| kbdev->csf.scheduler.csg_slots_idle_mask[0]); |
| |
| group->csg_nr = KBASEP_CSG_NR_INVALID; |
| set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); |
| clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| csg_slot->trigger_jiffies = jiffies; |
| atomic_set(&csg_slot->state, CSG_SLOT_READY); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); |
| dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", |
| group->handle, slot); |
| |
| KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, |
| kbdev->gpu_props.props.raw_props.gpu_id, slot); |
| |
| return as_fault; |
| } |
| |
| static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_csg_slot *csg_slot; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| s8 slot; |
| u8 prev_prio; |
| u32 ep_cfg; |
| u32 csg_req; |
| unsigned long flags; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| slot = group->csg_nr; |
| csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; |
| ginfo = &kbdev->csf.global_iface.groups[slot]; |
| |
| /* CSGs remaining on-slot can be either idle or runnable. |
| * This also applies in protected mode. |
| */ |
| WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || |
| (group->run_state == KBASE_CSF_GROUP_IDLE))); |
| |
| /* Update consumes a group from scanout */ |
| update_offslot_non_idle_cnt_for_onslot_grp(group); |
| |
| if (csg_slot->priority == prio) |
| return; |
| |
| /* Read the csg_ep_cfg back for updating the priority field */ |
| ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); |
| prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); |
| ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); |
| kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| csg_req ^= CSG_REQ_EP_CFG_MASK; |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, |
| CSG_REQ_EP_CFG_MASK); |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| csg_slot->priority = prio; |
| |
| dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", |
| group->handle, group->kctx->tgid, group->kctx->id, slot, |
| prev_prio, prio); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio); |
| |
| set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); |
| } |
| |
| static void program_csg_slot(struct kbase_queue_group *group, s8 slot, |
| u8 prio) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| const u64 shader_core_mask = |
| kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); |
| const u64 tiler_core_mask = |
| kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); |
| const u64 compute_mask = shader_core_mask & group->compute_mask; |
| const u64 fragment_mask = shader_core_mask & group->fragment_mask; |
| const u64 tiler_mask = tiler_core_mask & group->tiler_mask; |
| const u8 num_cores = kbdev->gpu_props.num_cores; |
| const u8 compute_max = min(num_cores, group->compute_max); |
| const u8 fragment_max = min(num_cores, group->fragment_max); |
| const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| u32 ep_cfg = 0; |
| u32 csg_req; |
| u32 state; |
| int i; |
| unsigned long flags; |
| const u64 normal_suspend_buf = |
| group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; |
| struct kbase_csf_csg_slot *csg_slot = |
| &kbdev->csf.scheduler.csg_slots[slot]; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(slot < 0) && |
| WARN_ON(slot >= global_iface->group_num)) |
| return; |
| |
| WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); |
| |
| ginfo = &global_iface->groups[slot]; |
| |
| /* Pick an available address space for this context */ |
| mutex_lock(&kbdev->mmu_hw_mutex); |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| kbase_ctx_sched_retain_ctx(kctx); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| mutex_unlock(&kbdev->mmu_hw_mutex); |
| |
| if (kctx->as_nr == KBASEP_AS_NR_INVALID) { |
| dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", |
| group->handle, kctx->tgid, kctx->id, slot); |
| return; |
| } |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); |
| kbdev->csf.scheduler.csg_slots[slot].resident_group = group; |
| group->csg_nr = slot; |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| assign_user_doorbell_to_group(kbdev, group); |
| |
| /* Now loop through all the bound & kicked CSs, and program them */ |
| for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { |
| struct kbase_queue *queue = group->bound_queues[i]; |
| |
| if (queue) |
| program_cs(kbdev, queue, false); |
| } |
| |
| |
| /* Endpoint programming for CSG */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, |
| compute_mask & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, |
| compute_mask >> 32); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, |
| fragment_mask & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, |
| fragment_mask >> 32); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, |
| tiler_mask & U32_MAX); |
| |
| /* Register group UID with firmware */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG, |
| group->group_uid); |
| |
| ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); |
| ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); |
| ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); |
| ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); |
| kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); |
| |
| /* Program the address space number assigned to the context */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); |
| |
| kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, |
| normal_suspend_buf & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, |
| normal_suspend_buf >> 32); |
| |
| if (group->protected_suspend_buf.reg) { |
| const u64 protm_suspend_buf = |
| group->protected_suspend_buf.reg->start_pfn << |
| PAGE_SHIFT; |
| kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, |
| protm_suspend_buf & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, |
| protm_suspend_buf >> 32); |
| } |
| |
| |
| /* Enable all interrupts for now */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| csg_req ^= CSG_REQ_EP_CFG_MASK; |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, |
| CSG_REQ_EP_CFG_MASK); |
| |
| /* Set state to START/RESUME */ |
| if (queue_group_suspended_locked(group)) { |
| state = CSG_REQ_STATE_RESUME; |
| } else { |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE); |
| state = CSG_REQ_STATE_START; |
| } |
| |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, |
| state, CSG_REQ_STATE_MASK); |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| /* Update status before rings the door-bell, marking ready => run */ |
| atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); |
| csg_slot->trigger_jiffies = jiffies; |
| csg_slot->priority = prio; |
| |
| /* Trace the programming of the CSG on the slot */ |
| KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( |
| kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id, |
| group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0); |
| |
| dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", |
| group->handle, kctx->tgid, kctx->id, slot, prio); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group, |
| (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) | |
| (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); |
| |
| /* Update the heap reclaim manager */ |
| update_kctx_heap_info_on_grp_on_slot(group); |
| |
| /* Programming a slot consumes a group from scanout */ |
| update_offslot_non_idle_cnt_for_onslot_grp(group); |
| } |
| |
| static void remove_scheduled_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->prepared_seq_num == |
| KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); |
| WARN_ON(list_empty(&group->link_to_schedule)); |
| |
| list_del_init(&group->link_to_schedule); |
| scheduler->ngrp_to_schedule--; |
| group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; |
| group->kctx->csf.sched.ngrp_to_schedule--; |
| } |
| |
| static void sched_evict_group(struct kbase_queue_group *group, bool fault, |
| bool update_non_idle_offslot_grps_cnt) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (queue_group_scheduled_locked(group)) { |
| u32 i; |
| |
| if (update_non_idle_offslot_grps_cnt && |
| (group->run_state == KBASE_CSF_GROUP_SUSPENDED || |
| group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { |
| int new_val = atomic_dec_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, |
| new_val); |
| } |
| |
| for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { |
| if (group->bound_queues[i]) |
| group->bound_queues[i]->enabled = false; |
| } |
| |
| if (group->prepared_seq_num != |
| KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) |
| remove_scheduled_group(kbdev, group); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| remove_group_from_idle_wait(group); |
| else { |
| remove_group_from_runnable(scheduler, group, |
| KBASE_CSF_GROUP_INACTIVE); |
| } |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); |
| |
| if (fault) |
| group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group, |
| (((u64)scheduler->total_runnable_grps) << 32) | |
| ((u32)group->run_state)); |
| dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", |
| group->handle, scheduler->total_runnable_grps); |
| /* Notify a group has been evicted */ |
| wake_up_all(&kbdev->csf.event_wait); |
| } |
| |
| update_kctx_heap_info_on_grp_evict(group); |
| } |
| |
| static int term_group_sync(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| int err = 0; |
| |
| term_csg_slot(group); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr), |
| remaining); |
| |
| if (unlikely(!remaining)) { |
| enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT; |
| |
| dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", |
| kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, |
| group->handle, group->kctx->tgid, |
| group->kctx->id, group->csg_nr); |
| if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) |
| error_type = DF_PING_REQUEST_TIMEOUT; |
| kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type); |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| |
| |
| err = -ETIMEDOUT; |
| } |
| |
| return err; |
| } |
| |
| void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| bool wait_for_termination = true; |
| bool on_slot; |
| |
| kbase_reset_gpu_assert_failed_or_prevented(kbdev); |
| lockdep_assert_held(&group->kctx->csf.lock); |
| mutex_lock(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); |
| wait_for_dump_complete_on_group_deschedule(group); |
| if (!queue_group_scheduled_locked(group)) |
| goto unlock; |
| |
| on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); |
| |
| #ifdef KBASE_PM_RUNTIME |
| /* If the queue group is on slot and Scheduler is in SLEEPING state, |
| * then we need to wake up the Scheduler to exit the sleep state rather |
| * than waiting for the runtime suspend or power down of GPU. |
| * The group termination is usually triggered in the context of Application |
| * thread and it has been seen that certain Apps can destroy groups at |
| * random points and not necessarily when the App is exiting. |
| */ |
| if (on_slot && (scheduler->state == SCHED_SLEEPING)) { |
| scheduler_wakeup(kbdev, true); |
| |
| /* Wait for MCU firmware to start running */ |
| if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { |
| dev_warn( |
| kbdev->dev, |
| "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", |
| kbase_backend_get_cycle_cnt(kbdev), |
| group->handle, group->kctx->tgid, |
| group->kctx->id, group->csg_nr); |
| /* No point in waiting for CSG termination if MCU didn't |
| * become active. |
| */ |
| wait_for_termination = false; |
| } |
| } |
| #endif |
| if (!on_slot) { |
| sched_evict_group(group, false, true); |
| } else { |
| bool as_faulty; |
| |
| if (likely(wait_for_termination)) |
| term_group_sync(group); |
| else |
| term_csg_slot(group); |
| |
| /* Treat the csg been terminated */ |
| as_faulty = cleanup_csg_slot(group); |
| /* remove from the scheduler list */ |
| sched_evict_group(group, as_faulty, false); |
| } |
| |
| WARN_ON(queue_group_scheduled_locked(group)); |
| |
| unlock: |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| /** |
| * scheduler_group_schedule() - Schedule a GPU command queue group on firmware |
| * |
| * @group: Pointer to the queue group to be scheduled. |
| * |
| * This function would enable the scheduling of GPU command queue group on |
| * firmware. |
| * |
| * Return: 0 on success, or negative on failure. |
| */ |
| static int scheduler_group_schedule(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kctx->csf.lock); |
| lockdep_assert_held(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| update_idle_suspended_group_state(group); |
| else if (queue_group_idle_locked(group)) { |
| WARN_ON(kctx->csf.sched.num_runnable_grps == 0); |
| WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) |
| update_idle_suspended_group_state(group); |
| else { |
| struct kbase_queue_group *protm_grp; |
| unsigned long flags; |
| |
| WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( |
| group)); |
| |
| group->run_state = KBASE_CSF_GROUP_RUNNABLE; |
| |
| /* A normal mode CSG could be idle onslot during |
| * protected mode. In this case clear the |
| * appropriate bit in csg_slots_idle_mask. |
| */ |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| protm_grp = scheduler->active_protm_grp; |
| if (protm_grp && protm_grp != group) { |
| clear_bit((unsigned int)group->csg_nr, |
| scheduler->csg_slots_idle_mask); |
| /* Request the update to confirm the condition inferred. */ |
| group->reevaluate_idle_status = true; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| scheduler->csg_slots_idle_mask[0]); |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, |
| flags); |
| |
| /* If GPU is in protected mode then any doorbells rang |
| * would have no effect. Check if GPU is in protected |
| * mode and if this group has higher priority than the |
| * active protected mode group. If so prompt the FW |
| * to exit protected mode. |
| */ |
| if (protm_grp && |
| group->scan_seq_num < protm_grp->scan_seq_num) { |
| /* Prompt the FW to exit protected mode */ |
| scheduler_force_protm_exit(kbdev); |
| } |
| } |
| } els
|