| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
| /* |
| * |
| * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| #include <mali_kbase.h> |
| #include "mali_kbase_config_defaults.h" |
| #include <mali_kbase_ctx_sched.h> |
| #include <mali_kbase_reset_gpu.h> |
| #include <mali_kbase_as_fault_debugfs.h> |
| #include "mali_kbase_csf.h" |
| #include <tl/mali_kbase_tracepoints.h> |
| #include <backend/gpu/mali_kbase_pm_internal.h> |
| #include <linux/export.h> |
| #include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> |
| #include <uapi/gpu/arm/midgard/mali_base_kernel.h> |
| |
| /* Value to indicate that a queue group is not groups_to_schedule list */ |
| #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) |
| |
| /* Waiting timeout for scheduler state change for descheduling a CSG */ |
| #define CSG_SCHED_STOP_TIMEOUT_MS (50) |
| |
| #define CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS DEFAULT_RESET_TIMEOUT_MS |
| |
| /* Maximum number of endpoints which may run tiler jobs. */ |
| #define CSG_TILER_MAX ((u8)1) |
| |
| /* Maximum dynamic CSG slot priority value */ |
| #define MAX_CSG_SLOT_PRIORITY ((u8)15) |
| |
| /* CSF scheduler time slice value */ |
| #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ |
| |
| /* |
| * CSF scheduler time threshold for converting "tock" requests into "tick" if |
| * they come too close to the end of a tick interval. This avoids scheduling |
| * twice in a row. |
| */ |
| #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \ |
| CSF_SCHEDULER_TIME_TICK_MS |
| |
| #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \ |
| msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS) |
| |
| /* Nanoseconds per millisecond */ |
| #define NS_PER_MS ((u64)1000 * 1000) |
| |
| /* |
| * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock" |
| * requests are not serviced immediately, but shall wait for a minimum time in |
| * order to reduce load on the CSF scheduler thread. |
| */ |
| #define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */ |
| |
| /* CS suspended and is idle (empty ring buffer) */ |
| #define CS_IDLE_FLAG (1 << 0) |
| |
| /* CS suspended and is wait for a CQS condition */ |
| #define CS_WAIT_SYNC_FLAG (1 << 1) |
| |
| /* 2 GPU address space slots are reserved for MCU and privileged context for HW |
| * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293. |
| */ |
| #define NUM_RESERVED_AS_SLOTS (2) |
| |
| static int scheduler_group_schedule(struct kbase_queue_group *group); |
| static void remove_group_from_idle_wait(struct kbase_queue_group *const group); |
| static |
| void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *const group, |
| enum kbase_csf_group_state run_state); |
| static struct kbase_queue_group *scheduler_get_protm_enter_async_group( |
| struct kbase_device *const kbdev, |
| struct kbase_queue_group *const group); |
| static struct kbase_queue_group *get_tock_top_group( |
| struct kbase_csf_scheduler *const scheduler); |
| static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); |
| static int suspend_active_queue_groups(struct kbase_device *kbdev, |
| unsigned long *slot_mask); |
| static void schedule_in_cycle(struct kbase_queue_group *group, bool force); |
| |
| #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) |
| |
| /** |
| * tick_timer_callback() - Callback function for the scheduling tick hrtimer |
| * |
| * @timer: Pointer to the device |
| * |
| * This function will enqueue the scheduling tick work item for immediate |
| * execution, if it has not been queued already. |
| * |
| * Return: enum value to indicate that timer should not be restarted. |
| */ |
| static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) |
| { |
| struct kbase_device *kbdev = container_of(timer, struct kbase_device, |
| csf.scheduler.tick_timer); |
| |
| kbase_csf_scheduler_advance_tick(kbdev); |
| return HRTIMER_NORESTART; |
| } |
| |
| /** |
| * start_tick_timer() - Start the scheduling tick hrtimer. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function will start the scheduling tick hrtimer and is supposed to |
| * be called only from the tick work item function. The tick hrtimer should |
| * should not be active already. |
| */ |
| static void start_tick_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| WARN_ON(scheduler->tick_timer_active); |
| if (likely(!work_pending(&scheduler->tick_work))) { |
| scheduler->tick_timer_active = true; |
| |
| hrtimer_start(&scheduler->tick_timer, |
| HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), |
| HRTIMER_MODE_REL); |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| /** |
| * cancel_tick_timer() - Cancel the scheduling tick hrtimer |
| * |
| * @kbdev: Pointer to the device |
| */ |
| static void cancel_tick_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| scheduler->tick_timer_active = false; |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| hrtimer_cancel(&scheduler->tick_timer); |
| } |
| |
| /** |
| * enqueue_tick_work() - Enqueue the scheduling tick work item |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function will queue the scheduling tick work item for immediate |
| * execution. This shall only be called when both the tick hrtimer and tick |
| * work item are not active/pending. |
| */ |
| static void enqueue_tick_work(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| WARN_ON(scheduler->tick_timer_active); |
| queue_work(scheduler->wq, &scheduler->tick_work); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) |
| { |
| WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); |
| } |
| |
| static int acquire_doorbell(struct kbase_device *kbdev) |
| { |
| int doorbell_nr; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| doorbell_nr = find_first_zero_bit( |
| kbdev->csf.scheduler.doorbell_inuse_bitmap, |
| CSF_NUM_DOORBELL); |
| |
| if (doorbell_nr >= CSF_NUM_DOORBELL) |
| return KBASEP_USER_DB_NR_INVALID; |
| |
| set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); |
| |
| return doorbell_nr; |
| } |
| |
| static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { |
| release_doorbell(kbdev, group->doorbell_nr); |
| group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; |
| } |
| } |
| |
| static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, |
| struct kbase_queue *queue) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| mutex_lock(&kbdev->csf.reg_lock); |
| |
| if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { |
| queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; |
| /* After this the dummy page would be mapped in */ |
| unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, |
| queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); |
| } |
| |
| mutex_unlock(&kbdev->csf.reg_lock); |
| } |
| |
| static void assign_user_doorbell_to_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID) |
| group->doorbell_nr = acquire_doorbell(kbdev); |
| } |
| |
| static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, |
| struct kbase_queue *const queue) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| mutex_lock(&kbdev->csf.reg_lock); |
| |
| /* If bind operation for the queue hasn't completed yet, then the |
| * the CSI can't be programmed for the queue |
| * (even in stopped state) and so the doorbell also can't be assigned |
| * to it. |
| */ |
| if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) && |
| (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) { |
| WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID); |
| queue->doorbell_nr = queue->group->doorbell_nr; |
| |
| /* After this the real Hw doorbell page would be mapped in */ |
| unmap_mapping_range( |
| kbdev->csf.db_filp->f_inode->i_mapping, |
| queue->db_file_offset << PAGE_SHIFT, |
| PAGE_SIZE, 1); |
| } |
| |
| mutex_unlock(&kbdev->csf.reg_lock); |
| } |
| |
| static void scheduler_doorbell_init(struct kbase_device *kbdev) |
| { |
| int doorbell_nr; |
| |
| bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, |
| CSF_NUM_DOORBELL); |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| /* Reserve doorbell 0 for use by kernel driver */ |
| doorbell_nr = acquire_doorbell(kbdev); |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR); |
| } |
| |
| static u32 get_nr_active_csgs(struct kbase_device *kbdev) |
| { |
| u32 nr_active_csgs; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, |
| kbdev->csf.global_iface.group_num); |
| |
| return nr_active_csgs; |
| } |
| |
| /** |
| * csgs_active - returns true if any of CSG slots are in use |
| * |
| * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
| * |
| * Return: the interface is actively engaged flag. |
| */ |
| static bool csgs_active(struct kbase_device *kbdev) |
| { |
| u32 nr_active_csgs; |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| nr_active_csgs = get_nr_active_csgs(kbdev); |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| /* Right now if any of the CSG interfaces are in use |
| * then we need to assume that there is some work pending. |
| * In future when we have IDLE notifications from firmware implemented |
| * then we would have a better idea of the pending work. |
| */ |
| return (nr_active_csgs != 0); |
| } |
| |
| /** |
| * csg_slot_in_use - returns true if a queue group has been programmed on a |
| * given CSG slot. |
| * |
| * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
| * @slot: Index/number of the CSG slot in question. |
| * |
| * Return: the interface is actively engaged flag. |
| * |
| * Note: Caller must hold the scheduler lock. |
| */ |
| static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL); |
| } |
| |
| static bool queue_group_suspended_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return (group->run_state == KBASE_CSF_GROUP_SUSPENDED || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); |
| } |
| |
| static bool queue_group_idle_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return (group->run_state == KBASE_CSF_GROUP_IDLE || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); |
| } |
| |
| static bool queue_group_scheduled(struct kbase_queue_group *group) |
| { |
| return (group->run_state != KBASE_CSF_GROUP_INACTIVE && |
| group->run_state != KBASE_CSF_GROUP_TERMINATED && |
| group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); |
| } |
| |
| static bool queue_group_scheduled_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return queue_group_scheduled(group); |
| } |
| |
| /** |
| * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode. |
| * |
| * @kbdev: Pointer to the GPU device |
| * |
| * This function waits for the GPU to exit protected mode which is confirmed |
| * when active_protm_grp is set to NULL. |
| */ |
| static void scheduler_wait_protm_quit(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| long remaining; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL, |
| jiffies_to_msecs(wt)); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); |
| |
| if (!remaining) |
| dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); |
| |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL, |
| jiffies_to_msecs(remaining)); |
| } |
| |
| /** |
| * scheduler_force_protm_exit() - Force GPU to exit protected mode. |
| * |
| * @kbdev: Pointer to the GPU device |
| * |
| * This function sends a ping request to the firmware and waits for the GPU |
| * to exit protected mode. |
| */ |
| static void scheduler_force_protm_exit(struct kbase_device *kbdev) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| kbase_csf_firmware_ping(kbdev); |
| scheduler_wait_protm_quit(kbdev); |
| } |
| |
| /** |
| * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up |
| * automatically for periodic tasks. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the |
| * CSF scheduler lock to already have been held. |
| * |
| * Return: true if the scheduler is configured to wake up periodically |
| */ |
| static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return kbdev->csf.scheduler.timer_enabled; |
| } |
| |
| static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->gpu_idle_fw_timer_enabled) |
| return; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| |
| /* Update the timer_enabled flag requires holding interrupt_lock */ |
| scheduler->gpu_idle_fw_timer_enabled = true; |
| kbase_csf_firmware_enable_gpu_idle_timer(kbdev); |
| |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| /* Update of the timer_enabled flag requires holding interrupt_lock */ |
| if (scheduler->gpu_idle_fw_timer_enabled) { |
| scheduler->gpu_idle_fw_timer_enabled = false; |
| kbase_csf_firmware_disable_gpu_idle_timer(kbdev); |
| } |
| } |
| |
| static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (!scheduler->gpu_idle_fw_timer_enabled) |
| return; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| disable_gpu_idle_fw_timer_locked(kbdev); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->state == SCHED_SUSPENDED) { |
| dev_dbg(kbdev->dev, "Re-activating the Scheduler"); |
| kbase_csf_scheduler_pm_active(kbdev); |
| scheduler->state = SCHED_INACTIVE; |
| |
| if (kick) |
| scheduler_enable_tick_timer_nolock(kbdev); |
| } |
| } |
| |
| static void scheduler_suspend(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { |
| dev_dbg(kbdev->dev, "Suspending the Scheduler"); |
| kbase_csf_scheduler_pm_idle(kbdev); |
| scheduler->state = SCHED_SUSPENDED; |
| } |
| } |
| |
| /** |
| * update_idle_suspended_group_state() - Move the queue group to a non-idle |
| * suspended state. |
| * @group: Pointer to the queue group. |
| * |
| * This function is called to change the state of queue group to non-idle |
| * suspended state, if the group was suspended when all the queues bound to it |
| * became empty or when some queues got blocked on a sync wait & others became |
| * empty. The group is also moved to the runnable list from idle wait list in |
| * the latter case. |
| * So the function gets called when a queue is kicked or sync wait condition |
| * gets satisfied. |
| */ |
| static void update_idle_suspended_group_state(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int new_val; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { |
| remove_group_from_idle_wait(group); |
| insert_group_to_runnable(scheduler, group, |
| KBASE_CSF_GROUP_SUSPENDED); |
| } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED; |
| |
| /* If scheduler is not suspended and the given group's |
| * static priority (reflected by the scan_seq_num) is inside |
| * the current tick slot-range, schedules an async tock. |
| */ |
| if (scheduler->state != SCHED_SUSPENDED && |
| group->scan_seq_num < scheduler->num_csg_slots_for_tick) |
| schedule_in_cycle(group, true); |
| } else |
| return; |
| |
| new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| |
| int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| if (slot_num >= 0) { |
| if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != |
| group)) |
| return -1; |
| } |
| |
| return slot_num; |
| } |
| |
| int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| unsigned long flags; |
| int slot_num; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| slot_num = kbase_csf_scheduler_group_get_slot_locked(group); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| return slot_num; |
| } |
| |
| static bool kbasep_csf_scheduler_group_is_on_slot_locked( |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (slot_num >= 0) { |
| if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != |
| group)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| if (WARN_ON(slot_num < 0)) |
| return false; |
| |
| return test_bit(slot_num, scheduler->csgs_events_enable_mask); |
| } |
| |
| struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( |
| struct kbase_device *kbdev, int slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); |
| |
| return kbdev->csf.scheduler.csg_slots[slot].resident_group; |
| } |
| |
| static int halt_stream_sync(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| struct kbase_csf_cmd_stream_info *stream; |
| int csi_index = queue->csi_index; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| |
| if (WARN_ON(!group) || |
| WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return -EINVAL; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| ginfo = &global_iface->groups[group->csg_nr]; |
| stream = &ginfo->streams[csi_index]; |
| |
| if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == |
| CS_REQ_STATE_START) { |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) |
| == CS_ACK_STATE_START), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d", |
| csi_index, group->handle, group->csg_nr); |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| |
| return -ETIMEDOUT; |
| } |
| |
| remaining = |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| } |
| |
| /* Set state to STOP */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u); |
| kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); |
| |
| /* Timed wait */ |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) |
| == CS_ACK_STATE_STOP), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d", |
| queue->csi_index, group->handle, group->csg_nr); |
| |
| /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU |
| * will be reset as a work-around. |
| */ |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| } |
| return (remaining) ? 0 : -ETIMEDOUT; |
| } |
| |
| static bool can_halt_stream(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| unsigned long flags; |
| bool can_halt; |
| int slot; |
| |
| if (!queue_group_scheduled(group)) |
| return true; |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| slot = kbase_csf_scheduler_group_get_slot_locked(group); |
| can_halt = (slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, |
| flags); |
| |
| return can_halt; |
| } |
| |
| /** |
| * sched_halt_stream() - Stop a GPU queue when its queue group is not running |
| * on a CSG slot. |
| * @queue: Pointer to the GPU queue to stop. |
| * |
| * This function handles stopping gpu queues for groups that are either not on |
| * a CSG slot or are on the slot but undergoing transition to |
| * resume or suspend states. |
| * It waits until the queue group is scheduled on a slot and starts running, |
| * which is needed as groups that were suspended may need to resume all queues |
| * that were enabled and running at the time of suspension. |
| * |
| * Return: 0 on success, or negative on failure. |
| */ |
| static int sched_halt_stream(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = |
| &kbdev->csf.scheduler; |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| bool retry_needed = false; |
| bool retried = false; |
| long remaining; |
| int slot; |
| int err = 0; |
| |
| if (WARN_ON(!group)) |
| return -EINVAL; |
| |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| lockdep_assert_held(&scheduler->lock); |
| |
| slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| if (slot >= 0) { |
| WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); |
| |
| if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { |
| dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", |
| queue->csi_index, group->handle); |
| retry_needed = true; |
| } |
| } |
| retry: |
| /* Update the group state so that it can get scheduled soon */ |
| update_idle_suspended_group_state(group); |
| |
| mutex_unlock(&scheduler->lock); |
| |
| /* This function is called when the queue group is either not on a CSG |
| * slot or is on the slot but undergoing transition. |
| * |
| * To stop the queue, the function needs to wait either for the queue |
| * group to be assigned a CSG slot (and that slot has to reach the |
| * running state) or for the eviction of the queue group from the |
| * scheduler's list. |
| * |
| * In order to evaluate the latter condition, the function doesn't |
| * really need to lock the scheduler, as any update to the run_state |
| * of the queue group by sched_evict_group() would be visible due |
| * to implicit barriers provided by the kernel waitqueue macros. |
| * |
| * The group pointer cannot disappear meanwhile, as the high level |
| * CSF context is locked. Therefore, the scheduler would be |
| * the only one to update the run_state of the group. |
| */ |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, can_halt_stream(kbdev, group), |
| kbase_csf_timeout_in_jiffies( |
| 20 * kbdev->csf.scheduler.csg_scheduling_period_ms)); |
| |
| mutex_lock(&scheduler->lock); |
| |
| if (remaining && queue_group_scheduled_locked(group)) { |
| slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| /* If the group is still on slot and slot is in running state |
| * then explicitly stop the CSI of the |
| * queue. Otherwise there are different cases to consider |
| * |
| * - If the queue group was already undergoing transition to |
| * resume/start state when this function was entered then it |
| * would not have disabled the CSI of the |
| * queue being stopped and the previous wait would have ended |
| * once the slot was in a running state with CS |
| * interface still enabled. |
| * Now the group is going through another transition either |
| * to a suspend state or to a resume state (it could have |
| * been suspended before the scheduler lock was grabbed). |
| * In both scenarios need to wait again for the group to |
| * come on a slot and that slot to reach the running state, |
| * as that would guarantee that firmware will observe the |
| * CSI as disabled. |
| * |
| * - If the queue group was either off the slot or was |
| * undergoing transition to suspend state on entering this |
| * function, then the group would have been resumed with the |
| * queue's CSI in disabled state. |
| * So now if the group is undergoing another transition |
| * (after the resume) then just need to wait for the state |
| * bits in the ACK register of CSI to be |
| * set to STOP value. It is expected that firmware will |
| * process the stop/disable request of the CS |
| * interface after resuming the group before it processes |
| * another state change request of the group. |
| */ |
| if ((slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { |
| err = halt_stream_sync(queue); |
| } else if (retry_needed && !retried) { |
| retried = true; |
| goto retry; |
| } else if (slot >= 0) { |
| struct kbase_csf_global_iface *global_iface = |
| &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &global_iface->groups[slot]; |
| struct kbase_csf_cmd_stream_info *stream = |
| &ginfo->streams[queue->csi_index]; |
| u32 cs_req = |
| kbase_csf_firmware_cs_input_read(stream, CS_REQ); |
| |
| if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != |
| CS_REQ_STATE_STOP)) { |
| /* Timed wait */ |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET( |
| kbase_csf_firmware_cs_output( |
| stream, CS_ACK)) == |
| CS_ACK_STATE_STOP), |
| kbdev->csf.fw_timeout_ms); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, |
| "Timed out waiting for queue stop ack on csi %d bound to group %d on slot %d", |
| queue->csi_index, |
| group->handle, group->csg_nr); |
| err = -ETIMEDOUT; |
| } |
| } |
| } |
| } else if (!remaining) { |
| dev_warn(kbdev->dev, "Group-%d failed to get a slot for stopping the queue on csi %d", |
| group->handle, queue->csi_index); |
| err = -ETIMEDOUT; |
| } |
| |
| return err; |
| } |
| |
| int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) |
| { |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_queue_group *group = queue->group; |
| bool const cs_enabled = queue->enabled; |
| int err = 0; |
| |
| if (WARN_ON(!group)) |
| return -EINVAL; |
| |
| kbase_reset_gpu_assert_failed_or_prevented(kbdev); |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| |
| queue->enabled = false; |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); |
| |
| if (cs_enabled && queue_group_scheduled_locked(group)) { |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| int slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| /* Since the group needs to be resumed in order to stop the queue, |
| * check if GPU needs to be powered up. |
| */ |
| scheduler_wakeup(kbdev, true); |
| |
| if ((slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) |
| err = halt_stream_sync(queue); |
| else |
| err = sched_halt_stream(queue); |
| |
| unassign_user_doorbell_from_queue(kbdev, queue); |
| } |
| |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| return err; |
| } |
| |
| static void update_hw_active(struct kbase_queue *queue, bool active) |
| { |
| CSTD_UNUSED(queue); |
| CSTD_UNUSED(active); |
| } |
| |
| static void program_cs_extract_init(struct kbase_queue *queue) |
| { |
| u64 *input_addr = (u64 *)queue->user_io_addr; |
| u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); |
| |
| input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = |
| output_addr[CS_EXTRACT_LO / sizeof(u64)]; |
| } |
| |
| static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, |
| struct kbase_queue *queue) |
| { |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| u32 const glb_version = kbdev->csf.global_iface.version; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| /* If cs_trace_command not supported, nothing to program */ |
| if (glb_version < kbase_csf_interface_version(1, 1, 0)) |
| return; |
| |
| /* Program for cs_trace if enabled. In the current arrangement, it is |
| * possible for the context to enable the cs_trace after some queues |
| * has been registered in cs_trace in disabled state. This is tracked by |
| * the queue's trace buffer base address, which had been validated at the |
| * queue's register_ex call. |
| */ |
| if (kbase_csf_scheduler_queue_has_trace(queue)) { |
| u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( |
| queue->trace_cfg, queue->kctx->as_nr); |
| |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, |
| queue->trace_buffer_size); |
| |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, |
| queue->trace_buffer_base & U32_MAX); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, |
| queue->trace_buffer_base >> 32); |
| |
| kbase_csf_firmware_cs_input( |
| stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, |
| queue->trace_offset_ptr & U32_MAX); |
| kbase_csf_firmware_cs_input( |
| stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, |
| queue->trace_offset_ptr >> 32); |
| } else { |
| /* Place the configuration to the disabled condition */ |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0); |
| } |
| } |
| |
| static void program_cs(struct kbase_device *kbdev, |
| struct kbase_queue *queue, bool ring_csg_doorbell) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| struct kbase_csf_cmd_stream_info *stream; |
| int csi_index = queue->csi_index; |
| u64 user_input; |
| u64 user_output; |
| |
| if (WARN_ON(!group)) |
| return; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; |
| |
| if (WARN_ON(csi_index < 0) || |
| WARN_ON(csi_index >= ginfo->stream_num)) |
| return; |
| |
| assign_user_doorbell_to_queue(kbdev, queue); |
| if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) |
| return; |
| |
| WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); |
| |
| if (queue->enabled && queue_group_suspended_locked(group)) |
| program_cs_extract_init(queue); |
| |
| stream = &ginfo->streams[csi_index]; |
| |
| kbase_csf_firmware_cs_input(stream, CS_BASE_LO, |
| queue->base_addr & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_BASE_HI, |
| queue->base_addr >> 32); |
| kbase_csf_firmware_cs_input(stream, CS_SIZE, |
| queue->size); |
| |
| user_input = (queue->reg->start_pfn << PAGE_SHIFT); |
| kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, |
| user_input & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, |
| user_input >> 32); |
| |
| user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); |
| kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, |
| user_output & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, |
| user_output >> 32); |
| |
| kbase_csf_firmware_cs_input(stream, CS_CONFIG, |
| (queue->doorbell_nr << 8) | (queue->priority & 0xF)); |
| |
| /* Program the queue's cs_trace configuration */ |
| program_cs_trace_cfg(stream, queue); |
| |
| /* Enable all interrupts for now */ |
| kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); |
| |
| /* |
| * Enable the CSG idle notification once the CS's ringbuffer |
| * becomes empty or the CS becomes sync_idle, waiting sync update |
| * or protected mode switch. |
| */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, |
| CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); |
| |
| /* Set state to START/STOP */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); |
| |
| kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, |
| ring_csg_doorbell); |
| update_hw_active(queue, true); |
| } |
| |
| int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| bool const cs_enabled = queue->enabled; |
| int err = 0; |
| bool evicted = false; |
| |
| kbase_reset_gpu_assert_prevented(kbdev); |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| |
| if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) |
| return -EINVAL; |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, |
| group->run_state); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group, |
| queue, queue->status_wait); |
| |
| if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { |
| err = -EIO; |
| evicted = true; |
| } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { |
| dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", |
| queue->csi_index, group->handle); |
| } else { |
| err = scheduler_group_schedule(group); |
| |
| if (!err) { |
| queue->enabled = true; |
| if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { |
| if (cs_enabled) { |
| /* In normal situation, when a queue is |
| * already running, the queue update |
| * would be a doorbell kick on user |
| * side. However, if such a kick is |
| * shortly following a start or resume, |
| * the queue may actually in transition |
| * hence the said kick would enter the |
| * kernel as the hw_active flag is yet |
| * to be set. The sheduler needs to |
| * give a kick to the corresponding |
| * user door-bell on such a case. |
| */ |
| kbase_csf_ring_cs_user_doorbell(kbdev, queue); |
| } else |
| program_cs(kbdev, queue, true); |
| } |
| queue_delayed_work(system_long_wq, |
| &kbdev->csf.scheduler.ping_work, |
| msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); |
| } |
| } |
| |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| if (evicted) |
| kbase_csf_term_descheduled_queue_group(group); |
| |
| return err; |
| } |
| |
| static enum kbase_csf_csg_slot_state update_csg_slot_status( |
| struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_csg_slot *csg_slot = |
| &kbdev->csf.scheduler.csg_slots[slot]; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 state; |
| enum kbase_csf_csg_slot_state slot_state; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, |
| CSG_ACK)); |
| slot_state = atomic_read(&csg_slot->state); |
| |
| switch (slot_state) { |
| case CSG_SLOT_READY2RUN: |
| if ((state == CSG_ACK_STATE_START) || |
| (state == CSG_ACK_STATE_RESUME)) { |
| slot_state = CSG_SLOT_RUNNING; |
| atomic_set(&csg_slot->state, slot_state); |
| csg_slot->trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state); |
| dev_dbg(kbdev->dev, "Group %u running on slot %d\n", |
| csg_slot->resident_group->handle, slot); |
| } |
| break; |
| case CSG_SLOT_DOWN2STOP: |
| if ((state == CSG_ACK_STATE_SUSPEND) || |
| (state == CSG_ACK_STATE_TERMINATE)) { |
| slot_state = CSG_SLOT_STOPPED; |
| atomic_set(&csg_slot->state, slot_state); |
| csg_slot->trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); |
| dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", |
| csg_slot->resident_group->handle, slot); |
| } |
| break; |
| case CSG_SLOT_DOWN2STOP_TIMEDOUT: |
| case CSG_SLOT_READY2RUN_TIMEDOUT: |
| case CSG_SLOT_READY: |
| case CSG_SLOT_RUNNING: |
| case CSG_SLOT_STOPPED: |
| break; |
| default: |
| dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state); |
| break; |
| } |
| |
| return slot_state; |
| } |
| |
| static bool csg_slot_running(struct kbase_device *kbdev, s8 slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING); |
| } |
| |
| static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) |
| { |
| enum kbase_csf_csg_slot_state slot_state; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| slot_state = update_csg_slot_status(kbdev, slot); |
| |
| return (slot_state == CSG_SLOT_STOPPED || |
| slot_state == CSG_SLOT_READY); |
| } |
| |
| static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 state; |
| |
| state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, |
| CSG_ACK)); |
| |
| if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); |
| dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_csg_slot *csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| s8 slot; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| slot = group->csg_nr; |
| |
| /* When in transition, wait for it to complete */ |
| if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { |
| long remaining = |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| |
| dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| csg_slot_running(kbdev, slot), remaining); |
| if (!remaining) |
| dev_warn(kbdev->dev, |
| "slot %d timed out on up-running\n", slot); |
| } |
| |
| if (csg_slot_running(kbdev, slot)) { |
| unsigned long flags; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &global_iface->groups[slot]; |
| u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : |
| CSG_REQ_STATE_TERMINATE; |
| |
| dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", |
| suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| /* Set state to SUSPEND/TERMINATE */ |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, |
| CSG_REQ_STATE_MASK); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, |
| flags); |
| atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); |
| csg_slot[slot].trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd); |
| |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| } |
| } |
| |
| static void term_csg_slot(struct kbase_queue_group *group) |
| { |
| halt_csg_slot(group, false); |
| } |
| |
| static void suspend_csg_slot(struct kbase_queue_group *group) |
| { |
| halt_csg_slot(group, true); |
| } |
| |
| /** |
| * evaluate_sync_update() - Evaluate the sync wait condition the GPU command |
| * queue has been blocked on. |
| * |
| * @queue: Pointer to the GPU command queue |
| * |
| * Return: true if sync wait condition is satisfied. |
| */ |
| static bool evaluate_sync_update(struct kbase_queue *queue) |
| { |
| struct kbase_vmap_struct *mapping; |
| bool updated = false; |
| u32 *sync_ptr; |
| u32 sync_wait_cond; |
| u32 sync_current_val; |
| struct kbase_device *kbdev; |
| |
| if (WARN_ON(!queue)) |
| return false; |
| |
| kbdev = queue->kctx->kbdev; |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, |
| &mapping); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group, |
| queue, queue->sync_ptr); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON, |
| queue->group, queue, queue->blocked_reason); |
| |
| if (!sync_ptr) { |
| dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", |
| queue->sync_ptr); |
| goto out; |
| } |
| |
| sync_wait_cond = |
| CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); |
| |
| WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && |
| (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE)); |
| |
| sync_current_val = READ_ONCE(*sync_ptr); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group, |
| queue, sync_current_val); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group, |
| queue, queue->sync_value); |
| |
| if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && |
| (sync_current_val > queue->sync_value)) || |
| ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && |
| (sync_current_val <= queue->sync_value))) { |
| /* The sync wait condition is satisfied so the group to which |
| * queue is bound can be re-scheduled. |
| */ |
| updated = true; |
| } else { |
| dev_dbg(queue->kctx->kbdev->dev, |
| "sync memory not updated yet(%u)", sync_current_val); |
| } |
| |
| kbase_phy_alloc_mapping_put(queue->kctx, mapping); |
| out: |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED, |
| queue->group, queue, updated); |
| return updated; |
| } |
| |
| /** |
| * save_slot_cs() - Save the state for blocked GPU command queue. |
| * |
| * @ginfo: Pointer to the CSG interface used by the group |
| * the queue is bound to. |
| * @queue: Pointer to the GPU command queue. |
| * |
| * This function will check if GPU command queue is blocked on a sync wait and |
| * evaluate the wait condition. If the wait condition isn't satisfied it would |
| * save the state needed to reevaluate the condition in future. |
| * The group to which queue is bound shall be in idle state. |
| * |
| * Return: true if the queue is blocked on a sync wait operation. |
| */ |
| static |
| bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, |
| struct kbase_queue *queue) |
| { |
| struct kbase_csf_cmd_stream_info *const stream = |
| &ginfo->streams[queue->csi_index]; |
| u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); |
| bool is_waiting = false; |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT, |
| queue->group, queue, status); |
| |
| if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { |
| queue->status_wait = status; |
| queue->sync_ptr = kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_POINTER_LO); |
| queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; |
| queue->sync_value = kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_VALUE); |
| |
| queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_SCOREBOARDS)); |
| queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_BLOCKED_REASON)); |
| |
| if (!evaluate_sync_update(queue)) { |
| is_waiting = true; |
| } else { |
| /* Sync object already got updated & met the condition |
| * thus it doesn't need to be reevaluated and so can |
| * clear the 'status_wait' here. |
| */ |
| queue->status_wait = 0; |
| } |
| } else { |
| /* Invalidate wait status info that would have been recorded if |
| * this queue was blocked when the group (in idle state) was |
| * suspended previously. After that the group could have been |
| * unblocked due to the kicking of another queue bound to it & |
| * so the wait status info would have stuck with this queue. |
| */ |
| queue->status_wait = 0; |
| } |
| |
| return is_waiting; |
| } |
| |
| /** |
| * Calculate how far in the future an event should be scheduled. |
| * |
| * The objective of this function is making sure that a minimum period of |
| * time is guaranteed between handling two consecutive events. |
| * |
| * This function guarantees a minimum period of time between two consecutive |
| * events: given the minimum period and the distance between the current time |
| * and the last event, the function returns the difference between the two. |
| * However, if more time than the minimum period has already elapsed |
| * since the last event, the function will return 0 to schedule work to handle |
| * the event with the lowest latency possible. |
| * |
| * @last_event: Timestamp of the last event, in jiffies. |
| * @time_now: Timestamp of the new event to handle, in jiffies. |
| * Must be successive to last_event. |
| * @period: Minimum period between two events, in jiffies. |
| * |
| * Return: Time to delay work to handle the current event, in jiffies |
| */ |
| static unsigned long get_schedule_delay(unsigned long last_event, |
| unsigned long time_now, |
| unsigned long period) |
| { |
| const unsigned long t_distance = time_now - last_event; |
| const unsigned long delay_t = (t_distance < period) ? |
| (period - t_distance) : 0; |
| |
| return delay_t; |
| } |
| |
| static void schedule_in_cycle(struct kbase_queue_group *group, bool force) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* Only try to schedule work for this event if no requests are pending, |
| * otherwise the function will end up canceling previous work requests, |
| * and scheduler is configured to wake up periodically (or the schedule |
| * of work needs to be enforced in situation such as entering into |
| * protected mode). |
| */ |
| if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) && |
| !scheduler->tock_pending_request) { |
| const unsigned long delay = |
| get_schedule_delay(scheduler->last_schedule, jiffies, |
| CSF_SCHEDULER_TIME_TOCK_JIFFIES); |
| scheduler->tock_pending_request = true; |
| dev_dbg(kbdev->dev, "Kicking async for group %d\n", |
| group->handle); |
| mod_delayed_work(scheduler->wq, &scheduler->tock_work, delay); |
| } |
| } |
| |
| static |
| void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *const group, |
| enum kbase_csf_group_state run_state) |
| { |
| struct kbase_context *const kctx = group->kctx; |
| struct kbase_device *const kbdev = kctx->kbdev; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); |
| |
| if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) |
| return; |
| |
| group->run_state = run_state; |
| |
| if (run_state == KBASE_CSF_GROUP_RUNNABLE) |
| group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; |
| |
| list_add_tail(&group->link, |
| &kctx->csf.sched.runnable_groups[group->priority]); |
| kctx->csf.sched.num_runnable_grps++; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group, |
| kctx->csf.sched.num_runnable_grps); |
| |
| /* Add the kctx if not yet in runnable kctxs */ |
| if (kctx->csf.sched.num_runnable_grps == 1) { |
| /* First runnable csg, adds to the runnable_kctxs */ |
| INIT_LIST_HEAD(&kctx->csf.link); |
| list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u); |
| } |
| |
| scheduler->total_runnable_grps++; |
| |
| if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && |
| (scheduler->total_runnable_grps == 1 || |
| scheduler->state == SCHED_SUSPENDED)) { |
| dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); |
| /* Fire a scheduling to start the time-slice */ |
| enqueue_tick_work(kbdev); |
| } else |
| schedule_in_cycle(group, false); |
| |
| /* Since a new group has become runnable, check if GPU needs to be |
| * powered up. |
| */ |
| scheduler_wakeup(kbdev, false); |
| } |
| |
| static |
| void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *group, |
| enum kbase_csf_group_state run_state) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_queue_group *new_head_grp; |
| struct list_head *list = |
| &kctx->csf.sched.runnable_groups[group->priority]; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(!queue_group_scheduled_locked(group)); |
| |
| group->run_state = run_state; |
| list_del_init(&group->link); |
| |
| if (scheduler->top_grp == group) { |
| /* |
| * Note: this disables explicit rotation in the next scheduling |
| * cycle. However, removing the top_grp is the same as an |
| * implicit rotation (e.g. if we instead rotated the top_ctx |
| * and then remove top_grp) |
| * |
| * This implicit rotation is assumed by the scheduler rotate |
| * functions. |
| */ |
| scheduler->top_grp = NULL; |
| |
| /* |
| * Trigger a scheduling tock for a CSG containing protected |
| * content in case there has been any in order to minimise |
| * latency. |
| */ |
| group = scheduler_get_protm_enter_async_group(kctx->kbdev, |
| NULL); |
| if (group) |
| schedule_in_cycle(group, true); |
| } |
| |
| kctx->csf.sched.num_runnable_grps--; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group, |
| kctx->csf.sched.num_runnable_grps); |
| new_head_grp = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_queue_group, link) : |
| NULL; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp, |
| 0u); |
| |
| if (kctx->csf.sched.num_runnable_grps == 0) { |
| struct kbase_context *new_head_kctx; |
| struct list_head *kctx_list = &scheduler->runnable_kctxs; |
| /* drop the kctx */ |
| list_del_init(&kctx->csf.link); |
| if (scheduler->top_ctx == kctx) |
| scheduler->top_ctx = NULL; |
| KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx, |
| 0u); |
| new_head_kctx = (!list_empty(kctx_list)) ? |
| list_first_entry(kctx_list, struct kbase_context, csf.link) : |
| NULL; |
| KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE, |
| new_head_kctx, 0u); |
| } |
| |
| WARN_ON(scheduler->total_runnable_grps == 0); |
| scheduler->total_runnable_grps--; |
| if (!scheduler->total_runnable_grps) { |
| dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); |
| cancel_tick_timer(kctx->kbdev); |
| WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); |
| if (scheduler->state != SCHED_SUSPENDED) |
| queue_work(system_wq, &scheduler->gpu_idle_work); |
| } |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, |
| scheduler->num_active_address_spaces | |
| (((u64)scheduler->total_runnable_grps) << 32)); |
| } |
| |
| static void insert_group_to_idle_wait(struct kbase_queue_group *const group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE); |
| |
| list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); |
| kctx->csf.sched.num_idle_wait_grps++; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group, |
| kctx->csf.sched.num_idle_wait_grps); |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; |
| dev_dbg(kctx->kbdev->dev, |
| "Group-%d suspended on sync_wait, total wait_groups: %u\n", |
| group->handle, kctx->csf.sched.num_idle_wait_grps); |
| } |
| |
| static void remove_group_from_idle_wait(struct kbase_queue_group *const group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct list_head *list = &kctx->csf.sched.idle_wait_groups; |
| struct kbase_queue_group *new_head_grp; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); |
| |
| list_del_init(&group->link); |
| WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); |
| kctx->csf.sched.num_idle_wait_grps--; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group, |
| kctx->csf.sched.num_idle_wait_grps); |
| new_head_grp = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_queue_group, link) : |
| NULL; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT, |
| new_head_grp, 0u); |
| group->run_state = KBASE_CSF_GROUP_INACTIVE; |
| } |
| |
| static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (WARN_ON(!group)) |
| return; |
| |
| remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE); |
| insert_group_to_idle_wait(group); |
| } |
| |
| static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { |
| int new_val = |
| atomic_dec_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| } |
| |
| static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->csg_nr < 0); |
| |
| if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { |
| int new_val = |
| atomic_dec_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| } |
| |
| static void update_offslot_non_idle_cnt_on_grp_suspend( |
| struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->state == SCHED_BUSY) { |
| /* active phase or, async entering the protected mode */ |
| if (group->prepared_seq_num >= |
| scheduler->non_idle_scanout_grps) { |
| /* At scanout, it was tagged as on-slot idle */ |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_inc_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| } else { |
| if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_dec_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| } |
| } else { |
| /* async phases */ |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_inc_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| } |
| } |
| |
| static bool confirm_cmd_buf_empty(struct kbase_queue *queue) |
| { |
| bool cs_empty; |
| bool cs_idle; |
| u32 sb_status = 0; |
| |
| struct kbase_device const *const kbdev = queue->group->kctx->kbdev; |
| struct kbase_csf_global_iface const *const iface = |
| &kbdev->csf.global_iface; |
| |
| u32 glb_version = iface->version; |
| |
| u64 *input_addr = (u64 *)queue->user_io_addr; |
| u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); |
| |
| if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { |
| /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ |
| struct kbase_csf_cmd_stream_group_info const *const ginfo = |
| &kbdev->csf.global_iface.groups[queue->group->csg_nr]; |
| struct kbase_csf_cmd_stream_info const *const stream = |
| &ginfo->streams[queue->csi_index]; |
| |
| sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_SCOREBOARDS)); |
| } |
| |
| cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == |
| output_addr[CS_EXTRACT_LO / sizeof(u64)]); |
| cs_idle = cs_empty && (!sb_status); |
| |
| return cs_idle; |
| } |
| |
| static void save_csg_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| u32 state; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; |
| |
| state = |
| CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); |
| |
| if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && |
| (state != CSG_ACK_STATE_TERMINATE))) { |
| u32 max_streams = ginfo->stream_num; |
| u32 i; |
| bool sync_wait = false; |
| bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & |
| CSG_STATUS_STATE_IDLE_MASK; |
| for (i = 0; idle && i < max_streams; i++) { |
| struct kbase_queue *const queue = |
| group->bound_queues[i]; |
| |
| if (!queue || !queue->enabled) |
| continue; |
| |
| if (save_slot_cs(ginfo, queue)) |
| sync_wait = true; |
| else { |
| /* Need to confirm if ringbuffer of the GPU |
| * queue is empty or not. A race can arise |
| * between the flush of GPU queue and suspend |
| * of CSG. If a queue is flushed after FW has |
| * set the IDLE bit in CSG_STATUS_STATE, then |
| * Scheduler will incorrectly consider CSG |
| * as idle. And there may not be any further |
| * flush call for the GPU queue, which would |
| * have de-idled the CSG. |
| */ |
| idle = confirm_cmd_buf_empty(queue); |
| } |
| } |
| |
| if (idle) { |
| /* Take the suspended group out of the runnable_groups |
| * list of the context and move it to the |
| * idle_wait_groups list. |
| */ |
| if (sync_wait) |
| deschedule_idle_wait_group(scheduler, group); |
| else { |
| group->run_state = |
| KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; |
| dev_dbg(kbdev->dev, "Group-%d suspended: idle", |
| group->handle); |
| } |
| } else { |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED; |
| } |
| |
| update_offslot_non_idle_cnt_on_grp_suspend(group); |
| } |
| } |
| |
| /* Cleanup_csg_slot after it has been vacated, ready for next csg run. |
| * Return whether there is a kctx address fault associated with the group |
| * for which the clean-up is done. |
| */ |
| static bool cleanup_csg_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| s8 slot; |
| struct kbase_csf_csg_slot *csg_slot; |
| unsigned long flags; |
| u32 i; |
| bool as_fault = false; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return as_fault; |
| |
| slot = group->csg_nr; |
| csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; |
| ginfo = &global_iface->groups[slot]; |
| |
| /* Now loop through all the bound CSs, and clean them via a stop */ |
| for (i = 0; i < ginfo->stream_num; i++) { |
| struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; |
| |
| if (group->bound_queues[i]) { |
| if (group->bound_queues[i]->enabled) { |
| kbase_csf_firmware_cs_input_mask(stream, |
| CS_REQ, CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| } |
| |
| unassign_user_doorbell_from_queue(kbdev, |
| group->bound_queues[i]); |
| } |
| } |
| |
| unassign_user_doorbell_from_group(kbdev, group); |
| |
| /* The csg does not need cleanup other than drop its AS */ |
| spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); |
| as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); |
| kbase_ctx_sched_release_ctx(kctx); |
| if (unlikely(group->faulted)) |
| as_fault = true; |
| spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); |
| |
| /* now marking the slot is vacant */ |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; |
| clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| kbdev->csf.scheduler.csg_slots_idle_mask[0]); |
| |
| group->csg_nr = KBASEP_CSG_NR_INVALID; |
| set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); |
| clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| csg_slot->trigger_jiffies = jiffies; |
| atomic_set(&csg_slot->state, CSG_SLOT_READY); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); |
| dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", |
| group->handle, slot); |
| |
| KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, |
| kbdev->gpu_props.props.raw_props.gpu_id, slot); |
| |
| return as_fault; |
| } |
| |
| static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_csg_slot *csg_slot; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| s8 slot; |
| u8 prev_prio; |
| u32 ep_cfg; |
| u32 csg_req; |
| unsigned long flags; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| slot = group->csg_nr; |
| csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; |
| ginfo = &kbdev->csf.global_iface.groups[slot]; |
| |
| /* CSGs remaining on-slot can be either idle or runnable. |
| * This also applies in protected mode. |
| */ |
| WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || |
| (group->run_state == KBASE_CSF_GROUP_IDLE))); |
| |
| /* Update consumes a group from scanout */ |
| update_offslot_non_idle_cnt_for_onslot_grp(group); |
| |
| if (csg_slot->priority == prio) |
| return; |
| |
| /* Read the csg_ep_cfg back for updating the priority field */ |
| ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); |
| prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); |
| ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); |
| kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| csg_req ^= CSG_REQ_EP_CFG_MASK; |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, |
| CSG_REQ_EP_CFG_MASK); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| csg_slot->priority = prio; |
| |
| dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", |
| group->handle, group->kctx->tgid, group->kctx->id, slot, |
| prev_prio, prio); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio); |
| |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); |
| } |
| |
| static void program_csg_slot(struct kbase_queue_group *group, s8 slot, |
| u8 prio) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| const u64 shader_core_mask = |
| kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); |
| const u64 tiler_core_mask = |
| kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); |
| const u64 compute_mask = shader_core_mask & group->compute_mask; |
| const u64 fragment_mask = shader_core_mask & group->fragment_mask; |
| const u64 tiler_mask = tiler_core_mask & group->tiler_mask; |
| const u8 num_cores = kbdev->gpu_props.num_cores; |
| const u8 compute_max = min(num_cores, group->compute_max); |
| const u8 fragment_max = min(num_cores, group->fragment_max); |
| const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| u32 ep_cfg = 0; |
| u32 csg_req; |
| u32 state; |
| int i; |
| unsigned long flags; |
| const u64 normal_suspend_buf = |
| group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; |
| struct kbase_csf_csg_slot *csg_slot = |
| &kbdev->csf.scheduler.csg_slots[slot]; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(slot < 0) && |
| WARN_ON(slot >= global_iface->group_num)) |
| return; |
| |
| WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); |
| |
| ginfo = &global_iface->groups[slot]; |
| |
| /* Pick an available address space for this context */ |
| mutex_lock(&kbdev->mmu_hw_mutex); |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| kbase_ctx_sched_retain_ctx(kctx); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| mutex_unlock(&kbdev->mmu_hw_mutex); |
| |
| if (kctx->as_nr == KBASEP_AS_NR_INVALID) { |
| dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", |
| group->handle, kctx->tgid, kctx->id, slot); |
| return; |
| } |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); |
| kbdev->csf.scheduler.csg_slots[slot].resident_group = group; |
| group->csg_nr = slot; |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| assign_user_doorbell_to_group(kbdev, group); |
| |
| /* Now loop through all the bound & kicked CSs, and program them */ |
| for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { |
| struct kbase_queue *queue = group->bound_queues[i]; |
| |
| if (queue) |
| program_cs(kbdev, queue, false); |
| } |
| |
| |
| /* Endpoint programming for CSG */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, |
| compute_mask & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, |
| compute_mask >> 32); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, |
| fragment_mask & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, |
| fragment_mask >> 32); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, |
| tiler_mask & U32_MAX); |
| |
| |
| ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); |
| ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); |
| ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); |
| ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); |
| kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); |
| |
| /* Program the address space number assigned to the context */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); |
| |
| kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, |
| normal_suspend_buf & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, |
| normal_suspend_buf >> 32); |
| |
| if (group->protected_suspend_buf.reg) { |
| const u64 protm_suspend_buf = |
| group->protected_suspend_buf.reg->start_pfn << |
| PAGE_SHIFT; |
| kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, |
| protm_suspend_buf & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, |
| protm_suspend_buf >> 32); |
| } |
| |
| /* Enable all interrupts for now */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| csg_req ^= CSG_REQ_EP_CFG_MASK; |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, |
| CSG_REQ_EP_CFG_MASK); |
| |
| /* Set state to START/RESUME */ |
| if (queue_group_suspended_locked(group)) { |
| state = CSG_REQ_STATE_RESUME; |
| } else { |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE); |
| state = CSG_REQ_STATE_START; |
| } |
| |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, |
| state, CSG_REQ_STATE_MASK); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| /* Update status before rings the door-bell, marking ready => run */ |
| atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); |
| csg_slot->trigger_jiffies = jiffies; |
| csg_slot->priority = prio; |
| |
| /* Trace the programming of the CSG on the slot */ |
| KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev, |
| kbdev->gpu_props.props.raw_props.gpu_id, group->handle, slot); |
| |
| dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", |
| group->handle, kctx->tgid, kctx->id, slot, prio); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group, |
| (((u64)ep_cfg) << 32) | |
| ((((u32)kctx->as_nr) & 0xF) << 16) | |
| (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); |
| |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| |
| /* Programming a slot consumes a group from scanout */ |
| update_offslot_non_idle_cnt_for_onslot_grp(group); |
| } |
| |
| static void remove_scheduled_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->prepared_seq_num == |
| KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); |
| WARN_ON(list_empty(&group->link_to_schedule)); |
| |
| list_del_init(&group->link_to_schedule); |
| scheduler->ngrp_to_schedule--; |
| group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; |
| group->kctx->csf.sched.ngrp_to_schedule--; |
| } |
| |
| static void sched_evict_group(struct kbase_queue_group *group, bool fault, |
| bool update_non_idle_offslot_grps_cnt) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (queue_group_scheduled_locked(group)) { |
| u32 i; |
| |
| if (update_non_idle_offslot_grps_cnt && |
| (group->run_state == KBASE_CSF_GROUP_SUSPENDED || |
| group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { |
| int new_val = atomic_dec_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| |
| for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { |
| if (group->bound_queues[i]) |
| group->bound_queues[i]->enabled = false; |
| } |
| |
| if (group->prepared_seq_num != |
| KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) |
| remove_scheduled_group(kbdev, group); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| remove_group_from_idle_wait(group); |
| else { |
| remove_group_from_runnable(scheduler, group, |
| KBASE_CSF_GROUP_INACTIVE); |
| } |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); |
| |
| if (fault) |
| group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group, |
| (((u64)scheduler->total_runnable_grps) << 32) | |
| ((u32)group->run_state)); |
| dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", |
| group->handle, scheduler->total_runnable_grps); |
| /* Notify a group has been evicted */ |
| wake_up_all(&kbdev->csf.event_wait); |
| } |
| } |
| |
| static int term_group_sync(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| int err = 0; |
| |
| term_csg_slot(group); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| csg_slot_stopped_locked(kbdev, group->csg_nr), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "term request timed out for group %d of context %d_%d on slot %d", |
| group->handle, group->kctx->tgid, |
| group->kctx->id, group->csg_nr); |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| err = -ETIMEDOUT; |
| } |
| |
| return err; |
| } |
| |
| void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| long remaining = |
| kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS); |
| bool force = false; |
| |
| kbase_reset_gpu_assert_failed_or_prevented(kbdev); |
| lockdep_assert_held(&group->kctx->csf.lock); |
| mutex_lock(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); |
| while (queue_group_scheduled_locked(group)) { |
| u32 saved_state = scheduler->state; |
| |
| if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) { |
| sched_evict_group(group, false, true); |
| } else if (saved_state == SCHED_INACTIVE || force) { |
| bool as_faulty; |
| |
| term_group_sync(group); |
| /* Treat the csg been terminated */ |
| as_faulty = cleanup_csg_slot(group); |
| /* remove from the scheduler list */ |
| sched_evict_group(group, as_faulty, false); |
| } |
| |
| /* waiting scheduler state to change */ |
| if (queue_group_scheduled_locked(group)) { |
| mutex_unlock(&scheduler->lock); |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, |
| saved_state != scheduler->state, |
| remaining); |
| if (!remaining) { |
| dev_warn(kbdev->dev, "Scheduler state change wait timed out for group %d on slot %d", |
| group->handle, group->csg_nr); |
| force = true; |
| } |
| mutex_lock(&scheduler->lock); |
| } |
| } |
| |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| /** |
| * scheduler_group_schedule() - Schedule a GPU command queue group on firmware |
| * |
| * @group: Pointer to the queue group to be scheduled. |
| * |
| * This function would enable the scheduling of GPU command queue group on |
| * firmware. |
| * |
| * Return: 0 on success, or negative on failure. |
| */ |
| static int scheduler_group_schedule(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kctx->csf.lock); |
| lockdep_assert_held(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| update_idle_suspended_group_state(group); |
| else if (queue_group_idle_locked(group)) { |
| WARN_ON(kctx->csf.sched.num_runnable_grps == 0); |
| WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) |
| update_idle_suspended_group_state(group); |
| else { |
| struct kbase_queue_group *protm_grp; |
| unsigned long flags; |
| |
| WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( |
| group)); |
| |
| group->run_state = KBASE_CSF_GROUP_RUNNABLE; |
| |
| /* A normal mode CSG could be idle onslot during |
| * protected mode. In this case clear the |
| * appropriate bit in csg_slots_idle_mask. |
| */ |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| protm_grp = scheduler->active_protm_grp; |
| if (protm_grp && protm_grp != group) { |
| clear_bit((unsigned int)group->csg_nr, |
| scheduler->csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| scheduler->csg_slots_idle_mask[0]); |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, |
| flags); |
| |
| /* If GPU is in protected mode then any doorbells rang |
| * would have no effect. Check if GPU is in protected |
| * mode and if this group has higher priority than the |
| * active protected mode group. If so prompt the FW |
| * to exit protected mode. |
| */ |
| if (protm_grp && |
| group->scan_seq_num < protm_grp->scan_seq_num) { |
| /* Prompt the FW to exit protected mode */ |
| scheduler_force_protm_exit(kbdev); |
| } |
| } |
| } else if (!queue_group_scheduled_locked(group)) { |
| int new_val; |
| insert_group_to_runnable(&kbdev->csf.scheduler, group, |
| KBASE_CSF_GROUP_RUNNABLE); |
| /* A new group into the scheduler */ |
| new_val = atomic_inc_return( |
| &kbdev->csf.scheduler.non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| |
| /* Since a group has become active now, check if GPU needs to be |
| * powered up. Also rekick the Scheduler. |
| */ |
| scheduler_wakeup(kbdev, true); |
| |
| return 0; |
| } |
| |
| /** |
| * set_max_csg_slots() - Set the number of available CSG slots |
| * |
| * @kbdev: Pointer of the GPU device. |
| * |
| * This function would set/limit the number of CSG slots that |
| * can be used in the given tick/tock. It would be less than the total CSG |
| * slots supported by firmware if the number of GPU address space slots |
| * required to utilize all the CSG slots is more than the available |
| * address space slots. |
| */ |
| static inline void set_max_csg_slots(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; |
| unsigned int max_address_space_slots = |
| kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; |
| |
| WARN_ON(scheduler->num_active_address_spaces > total_csg_slots); |
| |
| if (likely(scheduler->num_active_address_spaces <= |
| max_address_space_slots)) |
| scheduler->num_csg_slots_for_tick = total_csg_slots; |
| } |
| |
| /** |
| * count_active_address_space() - Count the number of GPU address space slots |
| * |
| * @kbdev: Pointer of the GPU device. |
| * @kctx: Pointer of the Kbase context. |
| * |
| * This function would update the counter that is tracking the number of GPU |
| * address space slots that would be required to program the CS |
| * group slots from the groups at the head of groups_to_schedule list. |
| */ |
| static inline void count_active_address_space(struct kbase_device *kbdev, |
| struct kbase_context *kctx) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; |
| unsigned int max_address_space_slots = |
| kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; |
| |
| if (scheduler->ngrp_to_schedule <= total_csg_slots) { |
| if (kctx->csf.sched.ngrp_to_schedule == 1) |
| scheduler->num_active_address_spaces++; |
| |
| if (scheduler->num_active_address_spaces <= |
| max_address_space_slots) |
| scheduler->num_csg_slots_for_tick++; |
| } |
| } |
| |
| /* Two schemes are used in assigning the priority to CSG slots for a given |
| * CSG from the 'groups_to_schedule' list. |
| * This is needed as an idle on-slot group is deprioritized by moving it to |
| * the tail of 'groups_to_schedule' list. As a result it can either get |
| * evicted from the CSG slot in current tick/tock dealing, or its position |
| * can be after the lower priority non-idle groups in the 'groups_to_schedule' |
| * list. The latter case can result in the on-slot subset containing both |
| * non-idle and idle CSGs, and is handled through the 2nd scheme described |
| * below. |
| * |
| * First scheme :- If all the slots are going to be occupied by the non-idle or |
| * idle groups, then a simple assignment of the priority is done as per the |
| * position of a group in the 'groups_to_schedule' list. So maximum priority |
| * gets assigned to the slot of a group which is at the head of the list. |
| * Here the 'groups_to_schedule' list would effectively be ordered as per the |
| * static priority of groups. |
| * |
| * Second scheme :- If the slots are going to be occupied by a mix of idle and |
| * non-idle groups then the priority assignment needs to ensure that the |
| * priority of a slot belonging to a higher priority idle group will always be |
| * greater than the priority of a slot belonging to a lower priority non-idle |
| * group, reflecting the original position of a group in the scan order (i.e |
| * static priority) 'scan_seq_num', which is set during the prepare phase of a |
| * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it |
| * is idle. |
| * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first |
| * 'slots_for_tick' groups in the original scan order are assigned a priority in |
| * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick), |
| * whereas rest of the groups are assigned the priority in the subrange |
| * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher |
| * priority group ends up after the non-idle lower priority groups in the |
| * 'groups_to_schedule' list, it will get a higher slot priority. And this will |
| * enable the FW to quickly start the execution of higher priority group when it |
| * gets de-idled. |
| */ |
| static u8 get_slot_priority(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| u8 slot_prio; |
| u32 slots_for_tick = scheduler->num_csg_slots_for_tick; |
| u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots; |
| /* Check if all the slots are going to be occupied by the non-idle or |
| * idle groups. |
| */ |
| if (scheduler->non_idle_scanout_grps >= slots_for_tick || |
| !scheduler->non_idle_scanout_grps) { |
| slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots); |
| } else { |
| /* There will be a mix of idle and non-idle groups. */ |
| if (group->scan_seq_num < slots_for_tick) |
| slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - |
| group->scan_seq_num); |
| else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots)) |
| slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots)); |
| else |
| slot_prio = 0; |
| } |
| return slot_prio; |
| } |
| |
| /** |
| * update_resident_groups_priority() - Update the priority of resident groups |
| * |
| * @kbdev: The GPU device. |
| * |
| * This function will update the priority of all resident queue groups |
| * that are at the head of groups_to_schedule list, preceding the first |
| * non-resident group. |
| * |
| * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on |
| * the priority update. |
| */ |
| static void update_resident_groups_priority(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| u32 num_groups = scheduler->num_csg_slots_for_tick; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| while (!list_empty(&scheduler->groups_to_schedule)) { |
| struct kbase_queue_group *group = |
| list_first_entry(&scheduler->groups_to_schedule, |
| struct kbase_queue_group, |
| link_to_schedule); |
| bool resident = |
| kbasep_csf_scheduler_group_is_on_slot_locked(group); |
| |
| if ((group->prepared_seq_num >= num_groups) || !resident) |
| break; |
| |
| update_csg_slot_priority(group, |
| get_slot_priority(group)); |
| |
| /* Drop the head group from the list */ |
| remove_scheduled_group(kbdev, group); |
| scheduler->remaining_tick_slots--; |
| } |
| } |
| |
| /** |
| * program_group_on_vacant_csg_slot() - Program a non-resident group on the |
| * given vacant CSG slot. |
| * @kbdev: Pointer to the GPU device. |
| * @slot: Vacant CSG slot number. |
| * |
| * This function will program a non-resident group at the head of |
| * kbase_csf_scheduler.groups_to_schedule list on the given vacant |
| * CSG slot, provided the initial position of the non-resident |
| * group in the list is less than the number of CSG slots and there is |
| * an available GPU address space slot. |
| * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after |
| * programming the slot. |
| */ |
| static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, |
| s8 slot) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *const group = |
| list_empty(&scheduler->groups_to_schedule) ? NULL : |
| list_first_entry(&scheduler->groups_to_schedule, |
| struct kbase_queue_group, |
| link_to_schedule); |
| u32 num_groups = scheduler->num_csg_slots_for_tick; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| if (group && (group->prepared_seq_num < num_groups)) { |
| bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group); |
| |
| if (!WARN_ON(ret)) { |
| if (kctx_as_enabled(group->kctx) && !group->faulted) { |
| program_csg_slot(group, slot, |
| get_slot_priority(group)); |
| |
| if (likely(csg_slot_in_use(kbdev, slot))) { |
| /* Drop the head group from the list */ |
| remove_scheduled_group(kbdev, group); |
| scheduler->remaining_tick_slots--; |
| } |
| } else { |
| update_offslot_non_idle_cnt_for_faulty_grp( |
| group); |
| remove_scheduled_group(kbdev, group); |
| } |
| } |
| } |
| } |
| |
| /** |
| * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident |
| * group and update the priority of resident groups. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @slot: Vacant CSG slot number. |
| * |
| * This function will first update the priority of all resident queue groups |
| * that are at the head of groups_to_schedule list, preceding the first |
| * non-resident group, it will then try to program the given CS |
| * group slot with the non-resident group. Finally update the priority of all |
| * resident queue groups following the non-resident group. |
| * |
| * kbase_csf_scheduler.remaining_tick_slots would also be adjusted. |
| */ |
| static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| struct kbase_csf_csg_slot *const csg_slot = |
| scheduler->csg_slots; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY); |
| |
| /* First update priority for already resident groups (if any) |
| * before the non-resident group |
| */ |
| update_resident_groups_priority(kbdev); |
| |
| /* Now consume the vacant slot for the non-resident group */ |
| program_group_on_vacant_csg_slot(kbdev, slot); |
| |
| /* Now update priority for already resident groups (if any) |
| * following the non-resident group |
| */ |
| update_resident_groups_priority(kbdev); |
| } |
| |
| static bool slots_state_changed(struct kbase_device *kbdev, |
| unsigned long *slots_mask, |
| bool (*state_check_func)(struct kbase_device *, s8)) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0}; |
| bool changed = false; |
| u32 i; |
| |
| for_each_set_bit(i, slots_mask, num_groups) { |
| if (state_check_func(kbdev, (s8)i)) { |
| set_bit(i, changed_slots); |
| changed = true; |
| } |
| } |
| |
| if (changed) |
| bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS); |
| |
| return changed; |
| } |
| |
| /** |
| * program_suspending_csg_slots() - Program the CSG slots vacated on suspension |
| * of queue groups running on them. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * |
| * This function will first wait for the ongoing suspension to complete on a |
| * CSG slot and will then program the vacant slot with the |
| * non-resident queue group inside the groups_to_schedule list. |
| * The programming of the non-resident queue group on the vacant slot could |
| * fail due to unavailability of free GPU address space slot and so the |
| * programming is re-attempted after the ongoing suspension has completed |
| * for all the CSG slots. |
| * The priority of resident groups before and after the non-resident group |
| * in the groups_to_schedule list would also be updated. |
| * This would be repeated for all the slots undergoing suspension. |
| * GPU reset would be initiated if the wait for suspend times out. |
| */ |
| static void program_suspending_csg_slots(struct kbase_device *kbdev) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); |
| DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; |
| bool suspend_wait_failed = false; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| /* In the current implementation, csgs_events_enable_mask would be used |
| * only to indicate suspending CSGs. |
| */ |
| bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, |
| MAX_SUPPORTED_CSGS); |
| |
| while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { |
| DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); |
| |
| bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| slots_state_changed(kbdev, changed, |
| csg_slot_stopped_raw), |
| remaining); |
| |
| if (remaining) { |
| u32 i; |
| |
| for_each_set_bit(i, changed, num_groups) { |
| struct kbase_queue_group *group = |
| scheduler->csg_slots[i].resident_group; |
| |
| if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) { |
| continue; |
| } |
| /* The on slot csg is now stopped */ |
| clear_bit(i, slot_mask); |
| |
| if (likely(group)) { |
| bool as_fault; |
| /* Only do save/cleanup if the |
| * group is not terminated during |
| * the sleep. |
| */ |
| save_csg_slot(group); |
| as_fault = cleanup_csg_slot(group); |
| /* If AS fault detected, evict it */ |
| if (as_fault) { |
| sched_evict_group(group, true, true); |
| set_bit(i, evicted_mask); |
| } |
| } |
| |
| program_vacant_csg_slot(kbdev, (s8)i); |
| } |
| } else { |
| u32 i; |
| |
| /* Groups that have failed to suspend in time shall |
| * raise a fatal error as they could no longer be |
| * safely resumed. |
| */ |
| for_each_set_bit(i, slot_mask, num_groups) { |
| struct kbase_queue_group *const group = |
| scheduler->csg_slots[i].resident_group; |
| |
| struct base_gpu_queue_group_error const |
| err_payload = { .error_type = |
| BASE_GPU_QUEUE_GROUP_ERROR_FATAL, |
| .payload = { |
| .fatal_group = { |
| .status = |
| GPU_EXCEPTION_TYPE_SW_FAULT_2, |
| } } }; |
| |
| if (unlikely(group == NULL)) |
| continue; |
| |
| kbase_csf_add_group_fatal_error(group, |
| &err_payload); |
| kbase_event_wakeup(group->kctx); |
| |
| /* TODO GPUCORE-25328: The CSG can't be |
| * terminated, the GPU will be reset as a |
| * work-around. |
| */ |
| dev_warn( |
| kbdev->dev, |
| "Group %d of context %d_%d on slot %u failed to suspend", |
| group->handle, group->kctx->tgid, |
| group->kctx->id, i); |
| |
| /* The group has failed suspension, stop |
| * further examination. |
| */ |
| clear_bit(i, slot_mask); |
| set_bit(i, scheduler->csgs_events_enable_mask); |
| update_offslot_non_idle_cnt_for_onslot_grp( |
| group); |
| } |
| |
| suspend_wait_failed = true; |
| } |
| } |
| |
| if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS)) |
| dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", |
| num_groups, evicted_mask); |
| |
| if (likely(!suspend_wait_failed)) { |
| u32 i; |
| |
| while (scheduler->ngrp_to_schedule && |
| scheduler->remaining_tick_slots) { |
| i = find_first_zero_bit(scheduler->csg_inuse_bitmap, |
| num_groups); |
| if (WARN_ON(i == num_groups)) |
| break; |
| program_vacant_csg_slot(kbdev, (s8)i); |
| if (!csg_slot_in_use(kbdev, (int)i)) { |
| dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); |
| break; |
| } |
| } |
| } else { |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| } |
| } |
| |
| static void suspend_queue_group(struct kbase_queue_group *group) |
| { |
| unsigned long flags; |
| struct kbase_csf_scheduler *const scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| /* This shall be used in program_suspending_csg_slots() where we |
| * assume that whilst CSGs are being suspended, this bitmask is not |
| * used by anything else i.e., it indicates only the CSGs going |
| * through suspension. |
| */ |
| clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| /* If AS fault detected, terminate the group */ |
| if (!kctx_as_enabled(group->kctx) || group->faulted) |
| term_csg_slot(group); |
| else |
| suspend_csg_slot(group); |
| } |
| |
| static void wait_csg_slots_start(struct kbase_device *kbdev) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; |
| u32 i; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| /* extract start slot flags for check */ |
| for (i = 0; i < num_groups; i++) { |
| if (atomic_read(&scheduler->csg_slots[i].state) == |
| CSG_SLOT_READY2RUN) |
| set_bit(i, slot_mask); |
| } |
| |
| while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { |
| DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); |
| |
| bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| slots_state_changed(kbdev, changed, csg_slot_running), |
| remaining); |
| |
| if (remaining) { |
| for_each_set_bit(i, changed, num_groups) { |
| struct kbase_queue_group *group = |
| scheduler->csg_slots[i].resident_group; |
| |
| /* The on slot csg is now running */ |
| clear_bit(i, slot_mask); |
| group->run_state = KBASE_CSF_GROUP_RUNNABLE; |
| } |
| } else { |
| dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n", |
| num_groups, slot_mask); |
| |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| break; |
| } |
| } |
| } |
| |
| /** |
| * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state |
| * flagged after the completion of a CSG status |
| * update command |
| * |
| * This function is called at the start of scheduling tick to check the |
| * idle status of a queue group resident on a CSG slot. |
| * The caller must make sure the corresponding status update command has |
| * been called and completed before checking this status. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @slot: The given slot for checking an occupying resident group's idle |
| * state. |
| * |
| * Return: true if the group resident on slot is idle, otherwise false. |
| */ |
| static bool group_on_slot_is_idle(struct kbase_device *kbdev, |
| unsigned long slot) |
| { |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & |
| CSG_STATUS_STATE_IDLE_MASK; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return idle; |
| } |
| |
| /** |
| * slots_update_state_changed() - Check the handshake state of a subset of |
| * command group slots. |
| * |
| * Checks the state of a subset of slots selected through the slots_mask |
| * bit_map. Records which slots' handshake completed and send it back in the |
| * slots_done bit_map. |
| * |
| * @kbdev: The GPU device. |
| * @field_mask: The field mask for checking the state in the csg_req/ack. |
| * @slots_mask: A bit_map specifying the slots to check. |
| * @slots_done: A cleared bit_map for returning the slots that |
| * have finished update. |
| * |
| * Return: true if the slots_done is set for at least one slot. |
| * Otherwise false. |
| */ |
| static |
| bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, |
| const unsigned long *slots_mask, unsigned long *slots_done) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| bool changed = false; |
| u32 i; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| for_each_set_bit(i, slots_mask, num_groups) { |
| struct kbase_csf_cmd_stream_group_info const *const ginfo = |
| &kbdev->csf.global_iface.groups[i]; |
| u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); |
| |
| state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| |
| if (!(state & field_mask)) { |
| set_bit(i, slots_done); |
| changed = true; |
| } |
| } |
| |
| return changed; |
| } |
| |
| /** |
| * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on |
| * the specified groups. |
| * |
| * This function waits for the acknowledgement of the request that have |
| * already been placed for the CSG slots by the caller. Currently used for |
| * the CSG priority update and status update requests. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @field_mask: The field mask for checking the state in the csg_req/ack. |
| * @slot_mask: Bitmap reflecting the slots, the function will modify |
| * the acknowledged slots by clearing their corresponding |
| * bits. |
| * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out. |
| * |
| * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For |
| * timed out condition with unacknowledged slots, their bits remain |
| * set in the slot_mask. |
| */ |
| static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, |
| u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies) |
| { |
| const u32 num_groups = kbdev->csf.global_iface.group_num; |
| long remaining = wait_in_jiffies; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| while (!bitmap_empty(slot_mask, num_groups) && |
| !kbase_reset_gpu_is_active(kbdev)) { |
| DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 }; |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| slots_update_state_changed(kbdev, field_mask, |
| slot_mask, dones), |
| remaining); |
| |
| if (remaining) |
| bitmap_andnot(slot_mask, slot_mask, dones, num_groups); |
| else |
| /* Timed-out on the wait */ |
| return -ETIMEDOUT; |
| } |
| |
| return 0; |
| } |
| |
| static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) |
| { |
| unsigned long *slot_mask = |
| kbdev->csf.scheduler.csg_slots_prio_update; |
| long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, |
| slot_mask, wait_time); |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (ret != 0) { |
| /* The update timeout is not regarded as a serious |
| * issue, no major consequences are expected as a |
| * result, so just warn the case. |
| */ |
| dev_warn( |
| kbdev->dev, |
| "Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", |
| slot_mask[0]); |
| } |
| } |
| |
| void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, |
| struct kbase_context *kctx, struct list_head *evicted_groups) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *group; |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| u32 slot; |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; |
| |
| lockdep_assert_held(&kctx->csf.lock); |
| mutex_lock(&scheduler->lock); |
| |
| /* This code is only called during reset, so we don't wait for the CSG |
| * slots to be stopped |
| */ |
| WARN_ON(!kbase_reset_gpu_is_active(kbdev)); |
| |
| KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u); |
| for (slot = 0; slot < num_groups; slot++) { |
| group = kbdev->csf.scheduler.csg_slots[slot].resident_group; |
| if (group && group->kctx == kctx) { |
| bool as_fault; |
| |
| term_csg_slot(group); |
| as_fault = cleanup_csg_slot(group); |
| /* remove the group from the scheduler list */ |
| sched_evict_group(group, as_fault, false); |
| /* return the evicted group to the caller */ |
| list_add_tail(&group->link, evicted_groups); |
| set_bit(slot, slot_mask); |
| } |
| } |
| |
| dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", |
| kctx->tgid, kctx->id, num_groups, slot_mask); |
| |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| /** |
| * scheduler_slot_protm_ack - Acknowledging the protected region requests |
| * from the resident group on a given slot. |
| * |
| * The function assumes that the given slot is in stable running state and |
| * has already been judged by the caller on that any pending protected region |
| * requests of the resident group should be acknowledged. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @group: Pointer to the resident group on the given slot. |
| * @slot: The slot that the given group is actively operating on. |
| * |
| * Return: true if the group has pending protm request(s) and is acknowledged. |
| * The caller should arrange to enter the protected mode for servicing |
| * it. Otherwise return false, indicating the group has no pending protm |
| * request. |
| */ |
| static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, |
| struct kbase_queue_group *const group, |
| const int slot) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| bool protm_ack = false; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 max_csi; |
| int i; |
| |
| if (WARN_ON(scheduler->csg_slots[slot].resident_group != group)) |
| return protm_ack; |
| |
| lockdep_assert_held(&scheduler->lock); |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock); |
| |
| max_csi = ginfo->stream_num; |
| for (i = find_first_bit(group->protm_pending_bitmap, max_csi); |
| i < max_csi; |
| i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) { |
| struct kbase_queue *queue = group->bound_queues[i]; |
| |
| clear_bit(i, group->protm_pending_bitmap); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group, |
| queue, group->protm_pending_bitmap[0]); |
| |
| if (!WARN_ON(!queue) && queue->enabled) { |
| struct kbase_csf_cmd_stream_info *stream = |
| &ginfo->streams[i]; |
| u32 cs_protm_ack = kbase_csf_firmware_cs_output( |
| stream, CS_ACK) & |
| CS_ACK_PROTM_PEND_MASK; |
| u32 cs_protm_req = kbase_csf_firmware_cs_input_read( |
| stream, CS_REQ) & |
| CS_REQ_PROTM_PEND_MASK; |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, |
| queue, cs_protm_ack ^ cs_protm_req); |
| |
| if (cs_protm_ack == cs_protm_req) { |
| dev_dbg(kbdev->dev, |
| "PROTM-ack already done for queue-%d group-%d slot-%d", |
| queue->csi_index, group->handle, slot); |
| continue; |
| } |
| |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| cs_protm_ack, |
| CS_ACK_PROTM_PEND_MASK); |
| protm_ack = true; |
| dev_dbg(kbdev->dev, |
| "PROTM-ack for queue-%d, group-%d slot-%d", |
| queue->csi_index, group->handle, slot); |
| } |
| } |
| |
| return protm_ack; |
| } |
| |
| /** |
| * scheduler_group_check_protm_enter - Request the given group to be evaluated |
| * for triggering the protected mode. |
| * |
| * The function assumes the given group is either an active running group or |
| * the scheduler internally maintained field scheduler->top_grp. |
| * |
| * If the GPU is not already running in protected mode and the input group |
| * has protected region requests from its bound queues, the requests are |
| * acknowledged and the GPU is instructed to enter the protected mode. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @input_grp: Pointer to the GPU queue group. |
| */ |
| static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, |
| struct kbase_queue_group *const input_grp) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| bool protm_in_use; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| |
| protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp, |
| protm_in_use); |
| |
| /* Firmware samples the PROTM_PEND ACK bit for CSs when |
| * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit |
| * is set for a CS after Host has sent the PROTM_ENTER |
| * Global request, then there is no guarantee that firmware will |
| * notice that prior to switching to protected mode. And firmware |
| * may not again raise the PROTM_PEND interrupt for that CS |
| * later on. To avoid that uncertainty PROTM_PEND ACK bit |
| * is not set for a CS if the request to enter protected |
| * mode has already been sent. It will be set later (after the exit |
| * from protected mode has taken place) when the group to which |
| * CS is bound becomes the top group. |
| * |
| * The actual decision of entering protected mode is hinging on the |
| * input group is the top priority group, or, in case the previous |
| * top-group is evicted from the scheduler during the tick, its would |
| * be replacement, and that it is currently in a stable state (i.e. the |
| * slot state is running). |
| */ |
| if (!protm_in_use && !WARN_ON(!input_grp)) { |
| const int slot = |
| kbase_csf_scheduler_group_get_slot_locked(input_grp); |
| |
| /* check the input_grp is running and requesting protected mode |
| */ |
| if (slot >= 0 && |
| atomic_read(&scheduler->csg_slots[slot].state) == |
| CSG_SLOT_RUNNING) { |
| if (kctx_as_enabled(input_grp->kctx) && |
| scheduler_slot_protm_ack(kbdev, input_grp, slot)) { |
| /* Option of acknowledging to multiple |
| * CSGs from the same kctx is dropped, |
| * after consulting with the |
| * architecture team. See the comment in |
| * GPUCORE-21394. |
| */ |
| |
| /* Disable the idle timer */ |
| disable_gpu_idle_fw_timer_locked(kbdev |