| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
| /* |
| * |
| * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| #include <mali_kbase.h> |
| #include "mali_kbase_config_defaults.h" |
| #include <mali_kbase_ctx_sched.h> |
| #include <mali_kbase_reset_gpu.h> |
| #include <mali_kbase_as_fault_debugfs.h> |
| #include "mali_kbase_csf.h" |
| #include <tl/mali_kbase_tracepoints.h> |
| #include <backend/gpu/mali_kbase_pm_internal.h> |
| #include <linux/export.h> |
| #include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> |
| #include <uapi/gpu/arm/midgard/mali_base_kernel.h> |
| |
| /* Value to indicate that a queue group is not groups_to_schedule list */ |
| #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) |
| |
| /* Waiting timeout for scheduler state change for descheduling a CSG */ |
| #define CSG_SCHED_STOP_TIMEOUT_MS (50) |
| |
| #define CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS DEFAULT_RESET_TIMEOUT_MS |
| |
| /* Maximum number of endpoints which may run tiler jobs. */ |
| #define CSG_TILER_MAX ((u8)1) |
| |
| /* Maximum dynamic CSG slot priority value */ |
| #define MAX_CSG_SLOT_PRIORITY ((u8)15) |
| |
| /* CSF scheduler time slice value */ |
| #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ |
| |
| /* |
| * CSF scheduler time threshold for converting "tock" requests into "tick" if |
| * they come too close to the end of a tick interval. This avoids scheduling |
| * twice in a row. |
| */ |
| #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \ |
| CSF_SCHEDULER_TIME_TICK_MS |
| |
| #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \ |
| msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS) |
| |
| /* Nanoseconds per millisecond */ |
| #define NS_PER_MS ((u64)1000 * 1000) |
| |
| /* |
| * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock" |
| * requests are not serviced immediately, but shall wait for a minimum time in |
| * order to reduce load on the CSF scheduler thread. |
| */ |
| #define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */ |
| |
| /* CS suspended and is idle (empty ring buffer) */ |
| #define CS_IDLE_FLAG (1 << 0) |
| |
| /* CS suspended and is wait for a CQS condition */ |
| #define CS_WAIT_SYNC_FLAG (1 << 1) |
| |
| /* 2 GPU address space slots are reserved for MCU and privileged context for HW |
| * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293. |
| */ |
| #define NUM_RESERVED_AS_SLOTS (2) |
| |
| static int scheduler_group_schedule(struct kbase_queue_group *group); |
| static void remove_group_from_idle_wait(struct kbase_queue_group *const group); |
| static |
| void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *const group, |
| enum kbase_csf_group_state run_state); |
| static struct kbase_queue_group *scheduler_get_protm_enter_async_group( |
| struct kbase_device *const kbdev, |
| struct kbase_queue_group *const group); |
| static struct kbase_queue_group *get_tock_top_group( |
| struct kbase_csf_scheduler *const scheduler); |
| static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); |
| static int suspend_active_queue_groups(struct kbase_device *kbdev, |
| unsigned long *slot_mask); |
| static void schedule_in_cycle(struct kbase_queue_group *group, bool force); |
| |
| #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) |
| |
| /** |
| * tick_timer_callback() - Callback function for the scheduling tick hrtimer |
| * |
| * @timer: Pointer to the device |
| * |
| * This function will enqueue the scheduling tick work item for immediate |
| * execution, if it has not been queued already. |
| * |
| * Return: enum value to indicate that timer should not be restarted. |
| */ |
| static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) |
| { |
| struct kbase_device *kbdev = container_of(timer, struct kbase_device, |
| csf.scheduler.tick_timer); |
| |
| kbase_csf_scheduler_advance_tick(kbdev); |
| return HRTIMER_NORESTART; |
| } |
| |
| /** |
| * start_tick_timer() - Start the scheduling tick hrtimer. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function will start the scheduling tick hrtimer and is supposed to |
| * be called only from the tick work item function. The tick hrtimer should |
| * should not be active already. |
| */ |
| static void start_tick_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| WARN_ON(scheduler->tick_timer_active); |
| if (likely(!work_pending(&scheduler->tick_work))) { |
| scheduler->tick_timer_active = true; |
| |
| hrtimer_start(&scheduler->tick_timer, |
| HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), |
| HRTIMER_MODE_REL); |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| /** |
| * cancel_tick_timer() - Cancel the scheduling tick hrtimer |
| * |
| * @kbdev: Pointer to the device |
| */ |
| static void cancel_tick_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| scheduler->tick_timer_active = false; |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| hrtimer_cancel(&scheduler->tick_timer); |
| } |
| |
| /** |
| * enqueue_tick_work() - Enqueue the scheduling tick work item |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This function will queue the scheduling tick work item for immediate |
| * execution. This shall only be called when both the tick hrtimer and tick |
| * work item are not active/pending. |
| */ |
| static void enqueue_tick_work(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| WARN_ON(scheduler->tick_timer_active); |
| queue_work(scheduler->wq, &scheduler->tick_work); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) |
| { |
| WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); |
| } |
| |
| static int acquire_doorbell(struct kbase_device *kbdev) |
| { |
| int doorbell_nr; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| doorbell_nr = find_first_zero_bit( |
| kbdev->csf.scheduler.doorbell_inuse_bitmap, |
| CSF_NUM_DOORBELL); |
| |
| if (doorbell_nr >= CSF_NUM_DOORBELL) |
| return KBASEP_USER_DB_NR_INVALID; |
| |
| set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); |
| |
| return doorbell_nr; |
| } |
| |
| static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { |
| release_doorbell(kbdev, group->doorbell_nr); |
| group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; |
| } |
| } |
| |
| static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, |
| struct kbase_queue *queue) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| mutex_lock(&kbdev->csf.reg_lock); |
| |
| if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { |
| queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; |
| /* After this the dummy page would be mapped in */ |
| unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, |
| queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); |
| } |
| |
| mutex_unlock(&kbdev->csf.reg_lock); |
| } |
| |
| static void assign_user_doorbell_to_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID) |
| group->doorbell_nr = acquire_doorbell(kbdev); |
| } |
| |
| static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, |
| struct kbase_queue *const queue) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| mutex_lock(&kbdev->csf.reg_lock); |
| |
| /* If bind operation for the queue hasn't completed yet, then the |
| * the CSI can't be programmed for the queue |
| * (even in stopped state) and so the doorbell also can't be assigned |
| * to it. |
| */ |
| if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) && |
| (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) { |
| WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID); |
| queue->doorbell_nr = queue->group->doorbell_nr; |
| |
| /* After this the real Hw doorbell page would be mapped in */ |
| unmap_mapping_range( |
| kbdev->csf.db_filp->f_inode->i_mapping, |
| queue->db_file_offset << PAGE_SHIFT, |
| PAGE_SIZE, 1); |
| } |
| |
| mutex_unlock(&kbdev->csf.reg_lock); |
| } |
| |
| static void scheduler_doorbell_init(struct kbase_device *kbdev) |
| { |
| int doorbell_nr; |
| |
| bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, |
| CSF_NUM_DOORBELL); |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| /* Reserve doorbell 0 for use by kernel driver */ |
| doorbell_nr = acquire_doorbell(kbdev); |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR); |
| } |
| |
| static u32 get_nr_active_csgs(struct kbase_device *kbdev) |
| { |
| u32 nr_active_csgs; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, |
| kbdev->csf.global_iface.group_num); |
| |
| return nr_active_csgs; |
| } |
| |
| /** |
| * csgs_active - returns true if any of CSG slots are in use |
| * |
| * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
| * |
| * Return: the interface is actively engaged flag. |
| */ |
| static bool csgs_active(struct kbase_device *kbdev) |
| { |
| u32 nr_active_csgs; |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| nr_active_csgs = get_nr_active_csgs(kbdev); |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| /* Right now if any of the CSG interfaces are in use |
| * then we need to assume that there is some work pending. |
| * In future when we have IDLE notifications from firmware implemented |
| * then we would have a better idea of the pending work. |
| */ |
| return (nr_active_csgs != 0); |
| } |
| |
| /** |
| * csg_slot_in_use - returns true if a queue group has been programmed on a |
| * given CSG slot. |
| * |
| * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
| * @slot: Index/number of the CSG slot in question. |
| * |
| * Return: the interface is actively engaged flag. |
| * |
| * Note: Caller must hold the scheduler lock. |
| */ |
| static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL); |
| } |
| |
| static bool queue_group_suspended_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return (group->run_state == KBASE_CSF_GROUP_SUSPENDED || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); |
| } |
| |
| static bool queue_group_idle_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return (group->run_state == KBASE_CSF_GROUP_IDLE || |
| group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); |
| } |
| |
| static bool queue_group_scheduled(struct kbase_queue_group *group) |
| { |
| return (group->run_state != KBASE_CSF_GROUP_INACTIVE && |
| group->run_state != KBASE_CSF_GROUP_TERMINATED && |
| group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); |
| } |
| |
| static bool queue_group_scheduled_locked(struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); |
| |
| return queue_group_scheduled(group); |
| } |
| |
| /** |
| * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode. |
| * |
| * @kbdev: Pointer to the GPU device |
| * |
| * This function waits for the GPU to exit protected mode which is confirmed |
| * when active_protm_grp is set to NULL. |
| */ |
| static void scheduler_wait_protm_quit(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| long remaining; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL, |
| jiffies_to_msecs(wt)); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); |
| |
| if (!remaining) |
| dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); |
| |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL, |
| jiffies_to_msecs(remaining)); |
| } |
| |
| /** |
| * scheduler_force_protm_exit() - Force GPU to exit protected mode. |
| * |
| * @kbdev: Pointer to the GPU device |
| * |
| * This function sends a ping request to the firmware and waits for the GPU |
| * to exit protected mode. |
| */ |
| static void scheduler_force_protm_exit(struct kbase_device *kbdev) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| kbase_csf_firmware_ping(kbdev); |
| scheduler_wait_protm_quit(kbdev); |
| } |
| |
| /** |
| * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up |
| * automatically for periodic tasks. |
| * |
| * @kbdev: Pointer to the device |
| * |
| * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the |
| * CSF scheduler lock to already have been held. |
| * |
| * Return: true if the scheduler is configured to wake up periodically |
| */ |
| static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return kbdev->csf.scheduler.timer_enabled; |
| } |
| |
| static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->gpu_idle_fw_timer_enabled) |
| return; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| |
| /* Update the timer_enabled flag requires holding interrupt_lock */ |
| scheduler->gpu_idle_fw_timer_enabled = true; |
| kbase_csf_firmware_enable_gpu_idle_timer(kbdev); |
| |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| /* Update of the timer_enabled flag requires holding interrupt_lock */ |
| if (scheduler->gpu_idle_fw_timer_enabled) { |
| scheduler->gpu_idle_fw_timer_enabled = false; |
| kbase_csf_firmware_disable_gpu_idle_timer(kbdev); |
| } |
| } |
| |
| static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (!scheduler->gpu_idle_fw_timer_enabled) |
| return; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| disable_gpu_idle_fw_timer_locked(kbdev); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->state == SCHED_SUSPENDED) { |
| dev_dbg(kbdev->dev, "Re-activating the Scheduler"); |
| kbase_csf_scheduler_pm_active(kbdev); |
| scheduler->state = SCHED_INACTIVE; |
| |
| if (kick) |
| scheduler_enable_tick_timer_nolock(kbdev); |
| } |
| } |
| |
| static void scheduler_suspend(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { |
| dev_dbg(kbdev->dev, "Suspending the Scheduler"); |
| kbase_csf_scheduler_pm_idle(kbdev); |
| scheduler->state = SCHED_SUSPENDED; |
| } |
| } |
| |
| /** |
| * update_idle_suspended_group_state() - Move the queue group to a non-idle |
| * suspended state. |
| * @group: Pointer to the queue group. |
| * |
| * This function is called to change the state of queue group to non-idle |
| * suspended state, if the group was suspended when all the queues bound to it |
| * became empty or when some queues got blocked on a sync wait & others became |
| * empty. The group is also moved to the runnable list from idle wait list in |
| * the latter case. |
| * So the function gets called when a queue is kicked or sync wait condition |
| * gets satisfied. |
| */ |
| static void update_idle_suspended_group_state(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int new_val; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { |
| remove_group_from_idle_wait(group); |
| insert_group_to_runnable(scheduler, group, |
| KBASE_CSF_GROUP_SUSPENDED); |
| } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED; |
| |
| /* If scheduler is not suspended and the given group's |
| * static priority (reflected by the scan_seq_num) is inside |
| * the current tick slot-range, schedules an async tock. |
| */ |
| if (scheduler->state != SCHED_SUSPENDED && |
| group->scan_seq_num < scheduler->num_csg_slots_for_tick) |
| schedule_in_cycle(group, true); |
| } else |
| return; |
| |
| new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| |
| int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| if (slot_num >= 0) { |
| if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != |
| group)) |
| return -1; |
| } |
| |
| return slot_num; |
| } |
| |
| int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| unsigned long flags; |
| int slot_num; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| slot_num = kbase_csf_scheduler_group_get_slot_locked(group); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| return slot_num; |
| } |
| |
| static bool kbasep_csf_scheduler_group_is_on_slot_locked( |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (slot_num >= 0) { |
| if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != |
| group)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| int slot_num = group->csg_nr; |
| |
| lockdep_assert_held(&scheduler->interrupt_lock); |
| |
| if (WARN_ON(slot_num < 0)) |
| return false; |
| |
| return test_bit(slot_num, scheduler->csgs_events_enable_mask); |
| } |
| |
| struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( |
| struct kbase_device *kbdev, int slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); |
| |
| return kbdev->csf.scheduler.csg_slots[slot].resident_group; |
| } |
| |
| static int halt_stream_sync(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| struct kbase_csf_cmd_stream_info *stream; |
| int csi_index = queue->csi_index; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| |
| if (WARN_ON(!group) || |
| WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return -EINVAL; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| ginfo = &global_iface->groups[group->csg_nr]; |
| stream = &ginfo->streams[csi_index]; |
| |
| if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == |
| CS_REQ_STATE_START) { |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) |
| == CS_ACK_STATE_START), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d", |
| csi_index, group->handle, group->csg_nr); |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| |
| return -ETIMEDOUT; |
| } |
| |
| remaining = |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| } |
| |
| /* Set state to STOP */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u); |
| kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); |
| |
| /* Timed wait */ |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) |
| == CS_ACK_STATE_STOP), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d", |
| queue->csi_index, group->handle, group->csg_nr); |
| |
| /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU |
| * will be reset as a work-around. |
| */ |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| } |
| return (remaining) ? 0 : -ETIMEDOUT; |
| } |
| |
| static bool can_halt_stream(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| unsigned long flags; |
| bool can_halt; |
| int slot; |
| |
| if (!queue_group_scheduled(group)) |
| return true; |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| slot = kbase_csf_scheduler_group_get_slot_locked(group); |
| can_halt = (slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, |
| flags); |
| |
| return can_halt; |
| } |
| |
| /** |
| * sched_halt_stream() - Stop a GPU queue when its queue group is not running |
| * on a CSG slot. |
| * @queue: Pointer to the GPU queue to stop. |
| * |
| * This function handles stopping gpu queues for groups that are either not on |
| * a CSG slot or are on the slot but undergoing transition to |
| * resume or suspend states. |
| * It waits until the queue group is scheduled on a slot and starts running, |
| * which is needed as groups that were suspended may need to resume all queues |
| * that were enabled and running at the time of suspension. |
| * |
| * Return: 0 on success, or negative on failure. |
| */ |
| static int sched_halt_stream(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = |
| &kbdev->csf.scheduler; |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| bool retry_needed = false; |
| bool retried = false; |
| long remaining; |
| int slot; |
| int err = 0; |
| |
| if (WARN_ON(!group)) |
| return -EINVAL; |
| |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| lockdep_assert_held(&scheduler->lock); |
| |
| slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| if (slot >= 0) { |
| WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); |
| |
| if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { |
| dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", |
| queue->csi_index, group->handle); |
| retry_needed = true; |
| } |
| } |
| retry: |
| /* Update the group state so that it can get scheduled soon */ |
| update_idle_suspended_group_state(group); |
| |
| mutex_unlock(&scheduler->lock); |
| |
| /* This function is called when the queue group is either not on a CSG |
| * slot or is on the slot but undergoing transition. |
| * |
| * To stop the queue, the function needs to wait either for the queue |
| * group to be assigned a CSG slot (and that slot has to reach the |
| * running state) or for the eviction of the queue group from the |
| * scheduler's list. |
| * |
| * In order to evaluate the latter condition, the function doesn't |
| * really need to lock the scheduler, as any update to the run_state |
| * of the queue group by sched_evict_group() would be visible due |
| * to implicit barriers provided by the kernel waitqueue macros. |
| * |
| * The group pointer cannot disappear meanwhile, as the high level |
| * CSF context is locked. Therefore, the scheduler would be |
| * the only one to update the run_state of the group. |
| */ |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, can_halt_stream(kbdev, group), |
| kbase_csf_timeout_in_jiffies( |
| 20 * kbdev->csf.scheduler.csg_scheduling_period_ms)); |
| |
| mutex_lock(&scheduler->lock); |
| |
| if (remaining && queue_group_scheduled_locked(group)) { |
| slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| /* If the group is still on slot and slot is in running state |
| * then explicitly stop the CSI of the |
| * queue. Otherwise there are different cases to consider |
| * |
| * - If the queue group was already undergoing transition to |
| * resume/start state when this function was entered then it |
| * would not have disabled the CSI of the |
| * queue being stopped and the previous wait would have ended |
| * once the slot was in a running state with CS |
| * interface still enabled. |
| * Now the group is going through another transition either |
| * to a suspend state or to a resume state (it could have |
| * been suspended before the scheduler lock was grabbed). |
| * In both scenarios need to wait again for the group to |
| * come on a slot and that slot to reach the running state, |
| * as that would guarantee that firmware will observe the |
| * CSI as disabled. |
| * |
| * - If the queue group was either off the slot or was |
| * undergoing transition to suspend state on entering this |
| * function, then the group would have been resumed with the |
| * queue's CSI in disabled state. |
| * So now if the group is undergoing another transition |
| * (after the resume) then just need to wait for the state |
| * bits in the ACK register of CSI to be |
| * set to STOP value. It is expected that firmware will |
| * process the stop/disable request of the CS |
| * interface after resuming the group before it processes |
| * another state change request of the group. |
| */ |
| if ((slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { |
| err = halt_stream_sync(queue); |
| } else if (retry_needed && !retried) { |
| retried = true; |
| goto retry; |
| } else if (slot >= 0) { |
| struct kbase_csf_global_iface *global_iface = |
| &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &global_iface->groups[slot]; |
| struct kbase_csf_cmd_stream_info *stream = |
| &ginfo->streams[queue->csi_index]; |
| u32 cs_req = |
| kbase_csf_firmware_cs_input_read(stream, CS_REQ); |
| |
| if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != |
| CS_REQ_STATE_STOP)) { |
| /* Timed wait */ |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, |
| (CS_ACK_STATE_GET( |
| kbase_csf_firmware_cs_output( |
| stream, CS_ACK)) == |
| CS_ACK_STATE_STOP), |
| kbdev->csf.fw_timeout_ms); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, |
| "Timed out waiting for queue stop ack on csi %d bound to group %d on slot %d", |
| queue->csi_index, |
| group->handle, group->csg_nr); |
| err = -ETIMEDOUT; |
| } |
| } |
| } |
| } else if (!remaining) { |
| dev_warn(kbdev->dev, "Group-%d failed to get a slot for stopping the queue on csi %d", |
| group->handle, queue->csi_index); |
| err = -ETIMEDOUT; |
| } |
| |
| return err; |
| } |
| |
| int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) |
| { |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| struct kbase_queue_group *group = queue->group; |
| bool const cs_enabled = queue->enabled; |
| int err = 0; |
| |
| if (WARN_ON(!group)) |
| return -EINVAL; |
| |
| kbase_reset_gpu_assert_failed_or_prevented(kbdev); |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| |
| queue->enabled = false; |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); |
| |
| if (cs_enabled && queue_group_scheduled_locked(group)) { |
| struct kbase_csf_csg_slot *const csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| int slot = kbase_csf_scheduler_group_get_slot(group); |
| |
| /* Since the group needs to be resumed in order to stop the queue, |
| * check if GPU needs to be powered up. |
| */ |
| scheduler_wakeup(kbdev, true); |
| |
| if ((slot >= 0) && |
| (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) |
| err = halt_stream_sync(queue); |
| else |
| err = sched_halt_stream(queue); |
| |
| unassign_user_doorbell_from_queue(kbdev, queue); |
| } |
| |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| return err; |
| } |
| |
| static void update_hw_active(struct kbase_queue *queue, bool active) |
| { |
| CSTD_UNUSED(queue); |
| CSTD_UNUSED(active); |
| } |
| |
| static void program_cs_extract_init(struct kbase_queue *queue) |
| { |
| u64 *input_addr = (u64 *)queue->user_io_addr; |
| u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); |
| |
| input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = |
| output_addr[CS_EXTRACT_LO / sizeof(u64)]; |
| } |
| |
| static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, |
| struct kbase_queue *queue) |
| { |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| u32 const glb_version = kbdev->csf.global_iface.version; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| /* If cs_trace_command not supported, nothing to program */ |
| if (glb_version < kbase_csf_interface_version(1, 1, 0)) |
| return; |
| |
| /* Program for cs_trace if enabled. In the current arrangement, it is |
| * possible for the context to enable the cs_trace after some queues |
| * has been registered in cs_trace in disabled state. This is tracked by |
| * the queue's trace buffer base address, which had been validated at the |
| * queue's register_ex call. |
| */ |
| if (kbase_csf_scheduler_queue_has_trace(queue)) { |
| u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( |
| queue->trace_cfg, queue->kctx->as_nr); |
| |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, |
| queue->trace_buffer_size); |
| |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, |
| queue->trace_buffer_base & U32_MAX); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, |
| queue->trace_buffer_base >> 32); |
| |
| kbase_csf_firmware_cs_input( |
| stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, |
| queue->trace_offset_ptr & U32_MAX); |
| kbase_csf_firmware_cs_input( |
| stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, |
| queue->trace_offset_ptr >> 32); |
| } else { |
| /* Place the configuration to the disabled condition */ |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); |
| kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0); |
| } |
| } |
| |
| static void program_cs(struct kbase_device *kbdev, |
| struct kbase_queue *queue, bool ring_csg_doorbell) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| struct kbase_csf_cmd_stream_info *stream; |
| int csi_index = queue->csi_index; |
| u64 user_input; |
| u64 user_output; |
| |
| if (WARN_ON(!group)) |
| return; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; |
| |
| if (WARN_ON(csi_index < 0) || |
| WARN_ON(csi_index >= ginfo->stream_num)) |
| return; |
| |
| assign_user_doorbell_to_queue(kbdev, queue); |
| if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) |
| return; |
| |
| WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); |
| |
| if (queue->enabled && queue_group_suspended_locked(group)) |
| program_cs_extract_init(queue); |
| |
| stream = &ginfo->streams[csi_index]; |
| |
| kbase_csf_firmware_cs_input(stream, CS_BASE_LO, |
| queue->base_addr & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_BASE_HI, |
| queue->base_addr >> 32); |
| kbase_csf_firmware_cs_input(stream, CS_SIZE, |
| queue->size); |
| |
| user_input = (queue->reg->start_pfn << PAGE_SHIFT); |
| kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, |
| user_input & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, |
| user_input >> 32); |
| |
| user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); |
| kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, |
| user_output & 0xFFFFFFFF); |
| kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, |
| user_output >> 32); |
| |
| kbase_csf_firmware_cs_input(stream, CS_CONFIG, |
| (queue->doorbell_nr << 8) | (queue->priority & 0xF)); |
| |
| /* Program the queue's cs_trace configuration */ |
| program_cs_trace_cfg(stream, queue); |
| |
| /* Enable all interrupts for now */ |
| kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); |
| |
| /* |
| * Enable the CSG idle notification once the CS's ringbuffer |
| * becomes empty or the CS becomes sync_idle, waiting sync update |
| * or protected mode switch. |
| */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, |
| CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); |
| |
| /* Set state to START/STOP */ |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); |
| |
| kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, |
| ring_csg_doorbell); |
| update_hw_active(queue, true); |
| } |
| |
| int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) |
| { |
| struct kbase_queue_group *group = queue->group; |
| struct kbase_device *kbdev = queue->kctx->kbdev; |
| bool const cs_enabled = queue->enabled; |
| int err = 0; |
| bool evicted = false; |
| |
| kbase_reset_gpu_assert_prevented(kbdev); |
| lockdep_assert_held(&queue->kctx->csf.lock); |
| |
| if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) |
| return -EINVAL; |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, |
| group->run_state); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group, |
| queue, queue->status_wait); |
| |
| if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { |
| err = -EIO; |
| evicted = true; |
| } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { |
| dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", |
| queue->csi_index, group->handle); |
| } else { |
| err = scheduler_group_schedule(group); |
| |
| if (!err) { |
| queue->enabled = true; |
| if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { |
| if (cs_enabled) { |
| /* In normal situation, when a queue is |
| * already running, the queue update |
| * would be a doorbell kick on user |
| * side. However, if such a kick is |
| * shortly following a start or resume, |
| * the queue may actually in transition |
| * hence the said kick would enter the |
| * kernel as the hw_active flag is yet |
| * to be set. The sheduler needs to |
| * give a kick to the corresponding |
| * user door-bell on such a case. |
| */ |
| kbase_csf_ring_cs_user_doorbell(kbdev, queue); |
| } else |
| program_cs(kbdev, queue, true); |
| } |
| queue_delayed_work(system_long_wq, |
| &kbdev->csf.scheduler.ping_work, |
| msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); |
| } |
| } |
| |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| |
| if (evicted) |
| kbase_csf_term_descheduled_queue_group(group); |
| |
| return err; |
| } |
| |
| static enum kbase_csf_csg_slot_state update_csg_slot_status( |
| struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_csg_slot *csg_slot = |
| &kbdev->csf.scheduler.csg_slots[slot]; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 state; |
| enum kbase_csf_csg_slot_state slot_state; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, |
| CSG_ACK)); |
| slot_state = atomic_read(&csg_slot->state); |
| |
| switch (slot_state) { |
| case CSG_SLOT_READY2RUN: |
| if ((state == CSG_ACK_STATE_START) || |
| (state == CSG_ACK_STATE_RESUME)) { |
| slot_state = CSG_SLOT_RUNNING; |
| atomic_set(&csg_slot->state, slot_state); |
| csg_slot->trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state); |
| dev_dbg(kbdev->dev, "Group %u running on slot %d\n", |
| csg_slot->resident_group->handle, slot); |
| } |
| break; |
| case CSG_SLOT_DOWN2STOP: |
| if ((state == CSG_ACK_STATE_SUSPEND) || |
| (state == CSG_ACK_STATE_TERMINATE)) { |
| slot_state = CSG_SLOT_STOPPED; |
| atomic_set(&csg_slot->state, slot_state); |
| csg_slot->trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); |
| dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", |
| csg_slot->resident_group->handle, slot); |
| } |
| break; |
| case CSG_SLOT_DOWN2STOP_TIMEDOUT: |
| case CSG_SLOT_READY2RUN_TIMEDOUT: |
| case CSG_SLOT_READY: |
| case CSG_SLOT_RUNNING: |
| case CSG_SLOT_STOPPED: |
| break; |
| default: |
| dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state); |
| break; |
| } |
| |
| return slot_state; |
| } |
| |
| static bool csg_slot_running(struct kbase_device *kbdev, s8 slot) |
| { |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING); |
| } |
| |
| static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) |
| { |
| enum kbase_csf_csg_slot_state slot_state; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| slot_state = update_csg_slot_status(kbdev, slot); |
| |
| return (slot_state == CSG_SLOT_STOPPED || |
| slot_state == CSG_SLOT_READY); |
| } |
| |
| static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 state; |
| |
| state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, |
| CSG_ACK)); |
| |
| if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); |
| dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_csg_slot *csg_slot = |
| kbdev->csf.scheduler.csg_slots; |
| s8 slot; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| slot = group->csg_nr; |
| |
| /* When in transition, wait for it to complete */ |
| if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { |
| long remaining = |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| |
| dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| csg_slot_running(kbdev, slot), remaining); |
| if (!remaining) |
| dev_warn(kbdev->dev, |
| "slot %d timed out on up-running\n", slot); |
| } |
| |
| if (csg_slot_running(kbdev, slot)) { |
| unsigned long flags; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &global_iface->groups[slot]; |
| u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : |
| CSG_REQ_STATE_TERMINATE; |
| |
| dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", |
| suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| /* Set state to SUSPEND/TERMINATE */ |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, |
| CSG_REQ_STATE_MASK); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, |
| flags); |
| atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); |
| csg_slot[slot].trigger_jiffies = jiffies; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd); |
| |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| } |
| } |
| |
| static void term_csg_slot(struct kbase_queue_group *group) |
| { |
| halt_csg_slot(group, false); |
| } |
| |
| static void suspend_csg_slot(struct kbase_queue_group *group) |
| { |
| halt_csg_slot(group, true); |
| } |
| |
| /** |
| * evaluate_sync_update() - Evaluate the sync wait condition the GPU command |
| * queue has been blocked on. |
| * |
| * @queue: Pointer to the GPU command queue |
| * |
| * Return: true if sync wait condition is satisfied. |
| */ |
| static bool evaluate_sync_update(struct kbase_queue *queue) |
| { |
| struct kbase_vmap_struct *mapping; |
| bool updated = false; |
| u32 *sync_ptr; |
| u32 sync_wait_cond; |
| u32 sync_current_val; |
| struct kbase_device *kbdev; |
| |
| if (WARN_ON(!queue)) |
| return false; |
| |
| kbdev = queue->kctx->kbdev; |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, |
| &mapping); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group, |
| queue, queue->sync_ptr); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON, |
| queue->group, queue, queue->blocked_reason); |
| |
| if (!sync_ptr) { |
| dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", |
| queue->sync_ptr); |
| goto out; |
| } |
| |
| sync_wait_cond = |
| CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); |
| |
| WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && |
| (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE)); |
| |
| sync_current_val = READ_ONCE(*sync_ptr); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group, |
| queue, sync_current_val); |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group, |
| queue, queue->sync_value); |
| |
| if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && |
| (sync_current_val > queue->sync_value)) || |
| ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && |
| (sync_current_val <= queue->sync_value))) { |
| /* The sync wait condition is satisfied so the group to which |
| * queue is bound can be re-scheduled. |
| */ |
| updated = true; |
| } else { |
| dev_dbg(queue->kctx->kbdev->dev, |
| "sync memory not updated yet(%u)", sync_current_val); |
| } |
| |
| kbase_phy_alloc_mapping_put(queue->kctx, mapping); |
| out: |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED, |
| queue->group, queue, updated); |
| return updated; |
| } |
| |
| /** |
| * save_slot_cs() - Save the state for blocked GPU command queue. |
| * |
| * @ginfo: Pointer to the CSG interface used by the group |
| * the queue is bound to. |
| * @queue: Pointer to the GPU command queue. |
| * |
| * This function will check if GPU command queue is blocked on a sync wait and |
| * evaluate the wait condition. If the wait condition isn't satisfied it would |
| * save the state needed to reevaluate the condition in future. |
| * The group to which queue is bound shall be in idle state. |
| * |
| * Return: true if the queue is blocked on a sync wait operation. |
| */ |
| static |
| bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, |
| struct kbase_queue *queue) |
| { |
| struct kbase_csf_cmd_stream_info *const stream = |
| &ginfo->streams[queue->csi_index]; |
| u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); |
| bool is_waiting = false; |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT, |
| queue->group, queue, status); |
| |
| if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { |
| queue->status_wait = status; |
| queue->sync_ptr = kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_POINTER_LO); |
| queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; |
| queue->sync_value = kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_WAIT_SYNC_VALUE); |
| |
| queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_SCOREBOARDS)); |
| queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_BLOCKED_REASON)); |
| |
| if (!evaluate_sync_update(queue)) { |
| is_waiting = true; |
| } else { |
| /* Sync object already got updated & met the condition |
| * thus it doesn't need to be reevaluated and so can |
| * clear the 'status_wait' here. |
| */ |
| queue->status_wait = 0; |
| } |
| } else { |
| /* Invalidate wait status info that would have been recorded if |
| * this queue was blocked when the group (in idle state) was |
| * suspended previously. After that the group could have been |
| * unblocked due to the kicking of another queue bound to it & |
| * so the wait status info would have stuck with this queue. |
| */ |
| queue->status_wait = 0; |
| } |
| |
| return is_waiting; |
| } |
| |
| /** |
| * Calculate how far in the future an event should be scheduled. |
| * |
| * The objective of this function is making sure that a minimum period of |
| * time is guaranteed between handling two consecutive events. |
| * |
| * This function guarantees a minimum period of time between two consecutive |
| * events: given the minimum period and the distance between the current time |
| * and the last event, the function returns the difference between the two. |
| * However, if more time than the minimum period has already elapsed |
| * since the last event, the function will return 0 to schedule work to handle |
| * the event with the lowest latency possible. |
| * |
| * @last_event: Timestamp of the last event, in jiffies. |
| * @time_now: Timestamp of the new event to handle, in jiffies. |
| * Must be successive to last_event. |
| * @period: Minimum period between two events, in jiffies. |
| * |
| * Return: Time to delay work to handle the current event, in jiffies |
| */ |
| static unsigned long get_schedule_delay(unsigned long last_event, |
| unsigned long time_now, |
| unsigned long period) |
| { |
| const unsigned long t_distance = time_now - last_event; |
| const unsigned long delay_t = (t_distance < period) ? |
| (period - t_distance) : 0; |
| |
| return delay_t; |
| } |
| |
| static void schedule_in_cycle(struct kbase_queue_group *group, bool force) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* Only try to schedule work for this event if no requests are pending, |
| * otherwise the function will end up canceling previous work requests, |
| * and scheduler is configured to wake up periodically (or the schedule |
| * of work needs to be enforced in situation such as entering into |
| * protected mode). |
| */ |
| if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) && |
| !scheduler->tock_pending_request) { |
| const unsigned long delay = |
| get_schedule_delay(scheduler->last_schedule, jiffies, |
| CSF_SCHEDULER_TIME_TOCK_JIFFIES); |
| scheduler->tock_pending_request = true; |
| dev_dbg(kbdev->dev, "Kicking async for group %d\n", |
| group->handle); |
| mod_delayed_work(scheduler->wq, &scheduler->tock_work, delay); |
| } |
| } |
| |
| static |
| void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *const group, |
| enum kbase_csf_group_state run_state) |
| { |
| struct kbase_context *const kctx = group->kctx; |
| struct kbase_device *const kbdev = kctx->kbdev; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); |
| |
| if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) |
| return; |
| |
| group->run_state = run_state; |
| |
| if (run_state == KBASE_CSF_GROUP_RUNNABLE) |
| group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; |
| |
| list_add_tail(&group->link, |
| &kctx->csf.sched.runnable_groups[group->priority]); |
| kctx->csf.sched.num_runnable_grps++; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group, |
| kctx->csf.sched.num_runnable_grps); |
| |
| /* Add the kctx if not yet in runnable kctxs */ |
| if (kctx->csf.sched.num_runnable_grps == 1) { |
| /* First runnable csg, adds to the runnable_kctxs */ |
| INIT_LIST_HEAD(&kctx->csf.link); |
| list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u); |
| } |
| |
| scheduler->total_runnable_grps++; |
| |
| if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && |
| (scheduler->total_runnable_grps == 1 || |
| scheduler->state == SCHED_SUSPENDED)) { |
| dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); |
| /* Fire a scheduling to start the time-slice */ |
| enqueue_tick_work(kbdev); |
| } else |
| schedule_in_cycle(group, false); |
| |
| /* Since a new group has become runnable, check if GPU needs to be |
| * powered up. |
| */ |
| scheduler_wakeup(kbdev, false); |
| } |
| |
| static |
| void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, |
| struct kbase_queue_group *group, |
| enum kbase_csf_group_state run_state) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_queue_group *new_head_grp; |
| struct list_head *list = |
| &kctx->csf.sched.runnable_groups[group->priority]; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(!queue_group_scheduled_locked(group)); |
| |
| group->run_state = run_state; |
| list_del_init(&group->link); |
| |
| if (scheduler->top_grp == group) { |
| /* |
| * Note: this disables explicit rotation in the next scheduling |
| * cycle. However, removing the top_grp is the same as an |
| * implicit rotation (e.g. if we instead rotated the top_ctx |
| * and then remove top_grp) |
| * |
| * This implicit rotation is assumed by the scheduler rotate |
| * functions. |
| */ |
| scheduler->top_grp = NULL; |
| |
| /* |
| * Trigger a scheduling tock for a CSG containing protected |
| * content in case there has been any in order to minimise |
| * latency. |
| */ |
| group = scheduler_get_protm_enter_async_group(kctx->kbdev, |
| NULL); |
| if (group) |
| schedule_in_cycle(group, true); |
| } |
| |
| kctx->csf.sched.num_runnable_grps--; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group, |
| kctx->csf.sched.num_runnable_grps); |
| new_head_grp = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_queue_group, link) : |
| NULL; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp, |
| 0u); |
| |
| if (kctx->csf.sched.num_runnable_grps == 0) { |
| struct kbase_context *new_head_kctx; |
| struct list_head *kctx_list = &scheduler->runnable_kctxs; |
| /* drop the kctx */ |
| list_del_init(&kctx->csf.link); |
| if (scheduler->top_ctx == kctx) |
| scheduler->top_ctx = NULL; |
| KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx, |
| 0u); |
| new_head_kctx = (!list_empty(kctx_list)) ? |
| list_first_entry(kctx_list, struct kbase_context, csf.link) : |
| NULL; |
| KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE, |
| new_head_kctx, 0u); |
| } |
| |
| WARN_ON(scheduler->total_runnable_grps == 0); |
| scheduler->total_runnable_grps--; |
| if (!scheduler->total_runnable_grps) { |
| dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); |
| cancel_tick_timer(kctx->kbdev); |
| WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); |
| if (scheduler->state != SCHED_SUSPENDED) |
| queue_work(system_wq, &scheduler->gpu_idle_work); |
| } |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, |
| scheduler->num_active_address_spaces | |
| (((u64)scheduler->total_runnable_grps) << 32)); |
| } |
| |
| static void insert_group_to_idle_wait(struct kbase_queue_group *const group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE); |
| |
| list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); |
| kctx->csf.sched.num_idle_wait_grps++; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group, |
| kctx->csf.sched.num_idle_wait_grps); |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; |
| dev_dbg(kctx->kbdev->dev, |
| "Group-%d suspended on sync_wait, total wait_groups: %u\n", |
| group->handle, kctx->csf.sched.num_idle_wait_grps); |
| } |
| |
| static void remove_group_from_idle_wait(struct kbase_queue_group *const group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct list_head *list = &kctx->csf.sched.idle_wait_groups; |
| struct kbase_queue_group *new_head_grp; |
| |
| lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); |
| |
| list_del_init(&group->link); |
| WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); |
| kctx->csf.sched.num_idle_wait_grps--; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group, |
| kctx->csf.sched.num_idle_wait_grps); |
| new_head_grp = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_queue_group, link) : |
| NULL; |
| KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT, |
| new_head_grp, 0u); |
| group->run_state = KBASE_CSF_GROUP_INACTIVE; |
| } |
| |
| static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, |
| struct kbase_queue_group *group) |
| { |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (WARN_ON(!group)) |
| return; |
| |
| remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE); |
| insert_group_to_idle_wait(group); |
| } |
| |
| static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { |
| int new_val = |
| atomic_dec_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| } |
| |
| static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->csg_nr < 0); |
| |
| if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { |
| int new_val = |
| atomic_dec_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| } |
| |
| static void update_offslot_non_idle_cnt_on_grp_suspend( |
| struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->state == SCHED_BUSY) { |
| /* active phase or, async entering the protected mode */ |
| if (group->prepared_seq_num >= |
| scheduler->non_idle_scanout_grps) { |
| /* At scanout, it was tagged as on-slot idle */ |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_inc_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| } else { |
| if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_dec_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| } |
| } else { |
| /* async phases */ |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { |
| int new_val = atomic_inc_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| } |
| } |
| |
| static bool confirm_cmd_buf_empty(struct kbase_queue *queue) |
| { |
| bool cs_empty; |
| bool cs_idle; |
| u32 sb_status = 0; |
| |
| struct kbase_device const *const kbdev = queue->group->kctx->kbdev; |
| struct kbase_csf_global_iface const *const iface = |
| &kbdev->csf.global_iface; |
| |
| u32 glb_version = iface->version; |
| |
| u64 *input_addr = (u64 *)queue->user_io_addr; |
| u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); |
| |
| if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { |
| /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ |
| struct kbase_csf_cmd_stream_group_info const *const ginfo = |
| &kbdev->csf.global_iface.groups[queue->group->csg_nr]; |
| struct kbase_csf_cmd_stream_info const *const stream = |
| &ginfo->streams[queue->csi_index]; |
| |
| sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( |
| kbase_csf_firmware_cs_output(stream, |
| CS_STATUS_SCOREBOARDS)); |
| } |
| |
| cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == |
| output_addr[CS_EXTRACT_LO / sizeof(u64)]); |
| cs_idle = cs_empty && (!sb_status); |
| |
| return cs_idle; |
| } |
| |
| static void save_csg_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| u32 state; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; |
| |
| state = |
| CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); |
| |
| if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && |
| (state != CSG_ACK_STATE_TERMINATE))) { |
| u32 max_streams = ginfo->stream_num; |
| u32 i; |
| bool sync_wait = false; |
| bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & |
| CSG_STATUS_STATE_IDLE_MASK; |
| for (i = 0; idle && i < max_streams; i++) { |
| struct kbase_queue *const queue = |
| group->bound_queues[i]; |
| |
| if (!queue || !queue->enabled) |
| continue; |
| |
| if (save_slot_cs(ginfo, queue)) |
| sync_wait = true; |
| else { |
| /* Need to confirm if ringbuffer of the GPU |
| * queue is empty or not. A race can arise |
| * between the flush of GPU queue and suspend |
| * of CSG. If a queue is flushed after FW has |
| * set the IDLE bit in CSG_STATUS_STATE, then |
| * Scheduler will incorrectly consider CSG |
| * as idle. And there may not be any further |
| * flush call for the GPU queue, which would |
| * have de-idled the CSG. |
| */ |
| idle = confirm_cmd_buf_empty(queue); |
| } |
| } |
| |
| if (idle) { |
| /* Take the suspended group out of the runnable_groups |
| * list of the context and move it to the |
| * idle_wait_groups list. |
| */ |
| if (sync_wait) |
| deschedule_idle_wait_group(scheduler, group); |
| else { |
| group->run_state = |
| KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; |
| dev_dbg(kbdev->dev, "Group-%d suspended: idle", |
| group->handle); |
| } |
| } else { |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED; |
| } |
| |
| update_offslot_non_idle_cnt_on_grp_suspend(group); |
| } |
| } |
| |
| /* Cleanup_csg_slot after it has been vacated, ready for next csg run. |
| * Return whether there is a kctx address fault associated with the group |
| * for which the clean-up is done. |
| */ |
| static bool cleanup_csg_slot(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| s8 slot; |
| struct kbase_csf_csg_slot *csg_slot; |
| unsigned long flags; |
| u32 i; |
| bool as_fault = false; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return as_fault; |
| |
| slot = group->csg_nr; |
| csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; |
| ginfo = &global_iface->groups[slot]; |
| |
| /* Now loop through all the bound CSs, and clean them via a stop */ |
| for (i = 0; i < ginfo->stream_num; i++) { |
| struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; |
| |
| if (group->bound_queues[i]) { |
| if (group->bound_queues[i]->enabled) { |
| kbase_csf_firmware_cs_input_mask(stream, |
| CS_REQ, CS_REQ_STATE_STOP, |
| CS_REQ_STATE_MASK); |
| } |
| |
| unassign_user_doorbell_from_queue(kbdev, |
| group->bound_queues[i]); |
| } |
| } |
| |
| unassign_user_doorbell_from_group(kbdev, group); |
| |
| /* The csg does not need cleanup other than drop its AS */ |
| spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); |
| as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); |
| kbase_ctx_sched_release_ctx(kctx); |
| if (unlikely(group->faulted)) |
| as_fault = true; |
| spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); |
| |
| /* now marking the slot is vacant */ |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; |
| clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| kbdev->csf.scheduler.csg_slots_idle_mask[0]); |
| |
| group->csg_nr = KBASEP_CSG_NR_INVALID; |
| set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); |
| clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| csg_slot->trigger_jiffies = jiffies; |
| atomic_set(&csg_slot->state, CSG_SLOT_READY); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); |
| dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", |
| group->handle, slot); |
| |
| KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, |
| kbdev->gpu_props.props.raw_props.gpu_id, slot); |
| |
| return as_fault; |
| } |
| |
| static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_csg_slot *csg_slot; |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| s8 slot; |
| u8 prev_prio; |
| u32 ep_cfg; |
| u32 csg_req; |
| unsigned long flags; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) |
| return; |
| |
| slot = group->csg_nr; |
| csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; |
| ginfo = &kbdev->csf.global_iface.groups[slot]; |
| |
| /* CSGs remaining on-slot can be either idle or runnable. |
| * This also applies in protected mode. |
| */ |
| WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || |
| (group->run_state == KBASE_CSF_GROUP_IDLE))); |
| |
| /* Update consumes a group from scanout */ |
| update_offslot_non_idle_cnt_for_onslot_grp(group); |
| |
| if (csg_slot->priority == prio) |
| return; |
| |
| /* Read the csg_ep_cfg back for updating the priority field */ |
| ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); |
| prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); |
| ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); |
| kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| csg_req ^= CSG_REQ_EP_CFG_MASK; |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, |
| CSG_REQ_EP_CFG_MASK); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| csg_slot->priority = prio; |
| |
| dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", |
| group->handle, group->kctx->tgid, group->kctx->id, slot, |
| prev_prio, prio); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio); |
| |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); |
| } |
| |
| static void program_csg_slot(struct kbase_queue_group *group, s8 slot, |
| u8 prio) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; |
| const u64 shader_core_mask = |
| kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); |
| const u64 tiler_core_mask = |
| kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); |
| const u64 compute_mask = shader_core_mask & group->compute_mask; |
| const u64 fragment_mask = shader_core_mask & group->fragment_mask; |
| const u64 tiler_mask = tiler_core_mask & group->tiler_mask; |
| const u8 num_cores = kbdev->gpu_props.num_cores; |
| const u8 compute_max = min(num_cores, group->compute_max); |
| const u8 fragment_max = min(num_cores, group->fragment_max); |
| const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); |
| struct kbase_csf_cmd_stream_group_info *ginfo; |
| u32 ep_cfg = 0; |
| u32 csg_req; |
| u32 state; |
| int i; |
| unsigned long flags; |
| const u64 normal_suspend_buf = |
| group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; |
| struct kbase_csf_csg_slot *csg_slot = |
| &kbdev->csf.scheduler.csg_slots[slot]; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (WARN_ON(slot < 0) && |
| WARN_ON(slot >= global_iface->group_num)) |
| return; |
| |
| WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); |
| |
| ginfo = &global_iface->groups[slot]; |
| |
| /* Pick an available address space for this context */ |
| mutex_lock(&kbdev->mmu_hw_mutex); |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| kbase_ctx_sched_retain_ctx(kctx); |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| mutex_unlock(&kbdev->mmu_hw_mutex); |
| |
| if (kctx->as_nr == KBASEP_AS_NR_INVALID) { |
| dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", |
| group->handle, kctx->tgid, kctx->id, slot); |
| return; |
| } |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); |
| kbdev->csf.scheduler.csg_slots[slot].resident_group = group; |
| group->csg_nr = slot; |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| assign_user_doorbell_to_group(kbdev, group); |
| |
| /* Now loop through all the bound & kicked CSs, and program them */ |
| for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { |
| struct kbase_queue *queue = group->bound_queues[i]; |
| |
| if (queue) |
| program_cs(kbdev, queue, false); |
| } |
| |
| |
| /* Endpoint programming for CSG */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, |
| compute_mask & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, |
| compute_mask >> 32); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, |
| fragment_mask & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, |
| fragment_mask >> 32); |
| kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, |
| tiler_mask & U32_MAX); |
| |
| |
| ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); |
| ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); |
| ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); |
| ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); |
| kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); |
| |
| /* Program the address space number assigned to the context */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); |
| |
| kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, |
| normal_suspend_buf & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, |
| normal_suspend_buf >> 32); |
| |
| if (group->protected_suspend_buf.reg) { |
| const u64 protm_suspend_buf = |
| group->protected_suspend_buf.reg->start_pfn << |
| PAGE_SHIFT; |
| kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, |
| protm_suspend_buf & U32_MAX); |
| kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, |
| protm_suspend_buf >> 32); |
| } |
| |
| /* Enable all interrupts for now */ |
| kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); |
| |
| spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); |
| csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| csg_req ^= CSG_REQ_EP_CFG_MASK; |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, |
| CSG_REQ_EP_CFG_MASK); |
| |
| /* Set state to START/RESUME */ |
| if (queue_group_suspended_locked(group)) { |
| state = CSG_REQ_STATE_RESUME; |
| } else { |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE); |
| state = CSG_REQ_STATE_START; |
| } |
| |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, |
| state, CSG_REQ_STATE_MASK); |
| spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); |
| |
| /* Update status before rings the door-bell, marking ready => run */ |
| atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); |
| csg_slot->trigger_jiffies = jiffies; |
| csg_slot->priority = prio; |
| |
| /* Trace the programming of the CSG on the slot */ |
| KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev, |
| kbdev->gpu_props.props.raw_props.gpu_id, group->handle, slot); |
| |
| dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", |
| group->handle, kctx->tgid, kctx->id, slot, prio); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group, |
| (((u64)ep_cfg) << 32) | |
| ((((u32)kctx->as_nr) & 0xF) << 16) | |
| (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); |
| |
| kbase_csf_ring_csg_doorbell(kbdev, slot); |
| |
| /* Programming a slot consumes a group from scanout */ |
| update_offslot_non_idle_cnt_for_onslot_grp(group); |
| } |
| |
| static void remove_scheduled_group(struct kbase_device *kbdev, |
| struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| WARN_ON(group->prepared_seq_num == |
| KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); |
| WARN_ON(list_empty(&group->link_to_schedule)); |
| |
| list_del_init(&group->link_to_schedule); |
| scheduler->ngrp_to_schedule--; |
| group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; |
| group->kctx->csf.sched.ngrp_to_schedule--; |
| } |
| |
| static void sched_evict_group(struct kbase_queue_group *group, bool fault, |
| bool update_non_idle_offslot_grps_cnt) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (queue_group_scheduled_locked(group)) { |
| u32 i; |
| |
| if (update_non_idle_offslot_grps_cnt && |
| (group->run_state == KBASE_CSF_GROUP_SUSPENDED || |
| group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { |
| int new_val = atomic_dec_return( |
| &scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| group, new_val); |
| } |
| |
| for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { |
| if (group->bound_queues[i]) |
| group->bound_queues[i]->enabled = false; |
| } |
| |
| if (group->prepared_seq_num != |
| KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) |
| remove_scheduled_group(kbdev, group); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| remove_group_from_idle_wait(group); |
| else { |
| remove_group_from_runnable(scheduler, group, |
| KBASE_CSF_GROUP_INACTIVE); |
| } |
| |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); |
| |
| if (fault) |
| group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group, |
| (((u64)scheduler->total_runnable_grps) << 32) | |
| ((u32)group->run_state)); |
| dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", |
| group->handle, scheduler->total_runnable_grps); |
| /* Notify a group has been evicted */ |
| wake_up_all(&kbdev->csf.event_wait); |
| } |
| } |
| |
| static int term_group_sync(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| int err = 0; |
| |
| term_csg_slot(group); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| csg_slot_stopped_locked(kbdev, group->csg_nr), remaining); |
| |
| if (!remaining) { |
| dev_warn(kbdev->dev, "term request timed out for group %d of context %d_%d on slot %d", |
| group->handle, group->kctx->tgid, |
| group->kctx->id, group->csg_nr); |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| err = -ETIMEDOUT; |
| } |
| |
| return err; |
| } |
| |
| void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) |
| { |
| struct kbase_device *kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| long remaining = |
| kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS); |
| bool force = false; |
| |
| kbase_reset_gpu_assert_failed_or_prevented(kbdev); |
| lockdep_assert_held(&group->kctx->csf.lock); |
| mutex_lock(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); |
| while (queue_group_scheduled_locked(group)) { |
| u32 saved_state = scheduler->state; |
| |
| if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) { |
| sched_evict_group(group, false, true); |
| } else if (saved_state == SCHED_INACTIVE || force) { |
| bool as_faulty; |
| |
| term_group_sync(group); |
| /* Treat the csg been terminated */ |
| as_faulty = cleanup_csg_slot(group); |
| /* remove from the scheduler list */ |
| sched_evict_group(group, as_faulty, false); |
| } |
| |
| /* waiting scheduler state to change */ |
| if (queue_group_scheduled_locked(group)) { |
| mutex_unlock(&scheduler->lock); |
| remaining = wait_event_timeout( |
| kbdev->csf.event_wait, |
| saved_state != scheduler->state, |
| remaining); |
| if (!remaining) { |
| dev_warn(kbdev->dev, "Scheduler state change wait timed out for group %d on slot %d", |
| group->handle, group->csg_nr); |
| force = true; |
| } |
| mutex_lock(&scheduler->lock); |
| } |
| } |
| |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| /** |
| * scheduler_group_schedule() - Schedule a GPU command queue group on firmware |
| * |
| * @group: Pointer to the queue group to be scheduled. |
| * |
| * This function would enable the scheduling of GPU command queue group on |
| * firmware. |
| * |
| * Return: 0 on success, or negative on failure. |
| */ |
| static int scheduler_group_schedule(struct kbase_queue_group *group) |
| { |
| struct kbase_context *kctx = group->kctx; |
| struct kbase_device *kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kctx->csf.lock); |
| lockdep_assert_held(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) |
| update_idle_suspended_group_state(group); |
| else if (queue_group_idle_locked(group)) { |
| WARN_ON(kctx->csf.sched.num_runnable_grps == 0); |
| WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0); |
| |
| if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) |
| update_idle_suspended_group_state(group); |
| else { |
| struct kbase_queue_group *protm_grp; |
| unsigned long flags; |
| |
| WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( |
| group)); |
| |
| group->run_state = KBASE_CSF_GROUP_RUNNABLE; |
| |
| /* A normal mode CSG could be idle onslot during |
| * protected mode. In this case clear the |
| * appropriate bit in csg_slots_idle_mask. |
| */ |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| protm_grp = scheduler->active_protm_grp; |
| if (protm_grp && protm_grp != group) { |
| clear_bit((unsigned int)group->csg_nr, |
| scheduler->csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| scheduler->csg_slots_idle_mask[0]); |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, |
| flags); |
| |
| /* If GPU is in protected mode then any doorbells rang |
| * would have no effect. Check if GPU is in protected |
| * mode and if this group has higher priority than the |
| * active protected mode group. If so prompt the FW |
| * to exit protected mode. |
| */ |
| if (protm_grp && |
| group->scan_seq_num < protm_grp->scan_seq_num) { |
| /* Prompt the FW to exit protected mode */ |
| scheduler_force_protm_exit(kbdev); |
| } |
| } |
| } else if (!queue_group_scheduled_locked(group)) { |
| int new_val; |
| insert_group_to_runnable(&kbdev->csf.scheduler, group, |
| KBASE_CSF_GROUP_RUNNABLE); |
| /* A new group into the scheduler */ |
| new_val = atomic_inc_return( |
| &kbdev->csf.scheduler.non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| |
| /* Since a group has become active now, check if GPU needs to be |
| * powered up. Also rekick the Scheduler. |
| */ |
| scheduler_wakeup(kbdev, true); |
| |
| return 0; |
| } |
| |
| /** |
| * set_max_csg_slots() - Set the number of available CSG slots |
| * |
| * @kbdev: Pointer of the GPU device. |
| * |
| * This function would set/limit the number of CSG slots that |
| * can be used in the given tick/tock. It would be less than the total CSG |
| * slots supported by firmware if the number of GPU address space slots |
| * required to utilize all the CSG slots is more than the available |
| * address space slots. |
| */ |
| static inline void set_max_csg_slots(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; |
| unsigned int max_address_space_slots = |
| kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; |
| |
| WARN_ON(scheduler->num_active_address_spaces > total_csg_slots); |
| |
| if (likely(scheduler->num_active_address_spaces <= |
| max_address_space_slots)) |
| scheduler->num_csg_slots_for_tick = total_csg_slots; |
| } |
| |
| /** |
| * count_active_address_space() - Count the number of GPU address space slots |
| * |
| * @kbdev: Pointer of the GPU device. |
| * @kctx: Pointer of the Kbase context. |
| * |
| * This function would update the counter that is tracking the number of GPU |
| * address space slots that would be required to program the CS |
| * group slots from the groups at the head of groups_to_schedule list. |
| */ |
| static inline void count_active_address_space(struct kbase_device *kbdev, |
| struct kbase_context *kctx) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; |
| unsigned int max_address_space_slots = |
| kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; |
| |
| if (scheduler->ngrp_to_schedule <= total_csg_slots) { |
| if (kctx->csf.sched.ngrp_to_schedule == 1) |
| scheduler->num_active_address_spaces++; |
| |
| if (scheduler->num_active_address_spaces <= |
| max_address_space_slots) |
| scheduler->num_csg_slots_for_tick++; |
| } |
| } |
| |
| /* Two schemes are used in assigning the priority to CSG slots for a given |
| * CSG from the 'groups_to_schedule' list. |
| * This is needed as an idle on-slot group is deprioritized by moving it to |
| * the tail of 'groups_to_schedule' list. As a result it can either get |
| * evicted from the CSG slot in current tick/tock dealing, or its position |
| * can be after the lower priority non-idle groups in the 'groups_to_schedule' |
| * list. The latter case can result in the on-slot subset containing both |
| * non-idle and idle CSGs, and is handled through the 2nd scheme described |
| * below. |
| * |
| * First scheme :- If all the slots are going to be occupied by the non-idle or |
| * idle groups, then a simple assignment of the priority is done as per the |
| * position of a group in the 'groups_to_schedule' list. So maximum priority |
| * gets assigned to the slot of a group which is at the head of the list. |
| * Here the 'groups_to_schedule' list would effectively be ordered as per the |
| * static priority of groups. |
| * |
| * Second scheme :- If the slots are going to be occupied by a mix of idle and |
| * non-idle groups then the priority assignment needs to ensure that the |
| * priority of a slot belonging to a higher priority idle group will always be |
| * greater than the priority of a slot belonging to a lower priority non-idle |
| * group, reflecting the original position of a group in the scan order (i.e |
| * static priority) 'scan_seq_num', which is set during the prepare phase of a |
| * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it |
| * is idle. |
| * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first |
| * 'slots_for_tick' groups in the original scan order are assigned a priority in |
| * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick), |
| * whereas rest of the groups are assigned the priority in the subrange |
| * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher |
| * priority group ends up after the non-idle lower priority groups in the |
| * 'groups_to_schedule' list, it will get a higher slot priority. And this will |
| * enable the FW to quickly start the execution of higher priority group when it |
| * gets de-idled. |
| */ |
| static u8 get_slot_priority(struct kbase_queue_group *group) |
| { |
| struct kbase_csf_scheduler *scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| u8 slot_prio; |
| u32 slots_for_tick = scheduler->num_csg_slots_for_tick; |
| u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots; |
| /* Check if all the slots are going to be occupied by the non-idle or |
| * idle groups. |
| */ |
| if (scheduler->non_idle_scanout_grps >= slots_for_tick || |
| !scheduler->non_idle_scanout_grps) { |
| slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots); |
| } else { |
| /* There will be a mix of idle and non-idle groups. */ |
| if (group->scan_seq_num < slots_for_tick) |
| slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - |
| group->scan_seq_num); |
| else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots)) |
| slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots)); |
| else |
| slot_prio = 0; |
| } |
| return slot_prio; |
| } |
| |
| /** |
| * update_resident_groups_priority() - Update the priority of resident groups |
| * |
| * @kbdev: The GPU device. |
| * |
| * This function will update the priority of all resident queue groups |
| * that are at the head of groups_to_schedule list, preceding the first |
| * non-resident group. |
| * |
| * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on |
| * the priority update. |
| */ |
| static void update_resident_groups_priority(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| u32 num_groups = scheduler->num_csg_slots_for_tick; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| while (!list_empty(&scheduler->groups_to_schedule)) { |
| struct kbase_queue_group *group = |
| list_first_entry(&scheduler->groups_to_schedule, |
| struct kbase_queue_group, |
| link_to_schedule); |
| bool resident = |
| kbasep_csf_scheduler_group_is_on_slot_locked(group); |
| |
| if ((group->prepared_seq_num >= num_groups) || !resident) |
| break; |
| |
| update_csg_slot_priority(group, |
| get_slot_priority(group)); |
| |
| /* Drop the head group from the list */ |
| remove_scheduled_group(kbdev, group); |
| scheduler->remaining_tick_slots--; |
| } |
| } |
| |
| /** |
| * program_group_on_vacant_csg_slot() - Program a non-resident group on the |
| * given vacant CSG slot. |
| * @kbdev: Pointer to the GPU device. |
| * @slot: Vacant CSG slot number. |
| * |
| * This function will program a non-resident group at the head of |
| * kbase_csf_scheduler.groups_to_schedule list on the given vacant |
| * CSG slot, provided the initial position of the non-resident |
| * group in the list is less than the number of CSG slots and there is |
| * an available GPU address space slot. |
| * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after |
| * programming the slot. |
| */ |
| static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, |
| s8 slot) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *const group = |
| list_empty(&scheduler->groups_to_schedule) ? NULL : |
| list_first_entry(&scheduler->groups_to_schedule, |
| struct kbase_queue_group, |
| link_to_schedule); |
| u32 num_groups = scheduler->num_csg_slots_for_tick; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| if (group && (group->prepared_seq_num < num_groups)) { |
| bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group); |
| |
| if (!WARN_ON(ret)) { |
| if (kctx_as_enabled(group->kctx) && !group->faulted) { |
| program_csg_slot(group, slot, |
| get_slot_priority(group)); |
| |
| if (likely(csg_slot_in_use(kbdev, slot))) { |
| /* Drop the head group from the list */ |
| remove_scheduled_group(kbdev, group); |
| scheduler->remaining_tick_slots--; |
| } |
| } else { |
| update_offslot_non_idle_cnt_for_faulty_grp( |
| group); |
| remove_scheduled_group(kbdev, group); |
| } |
| } |
| } |
| } |
| |
| /** |
| * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident |
| * group and update the priority of resident groups. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @slot: Vacant CSG slot number. |
| * |
| * This function will first update the priority of all resident queue groups |
| * that are at the head of groups_to_schedule list, preceding the first |
| * non-resident group, it will then try to program the given CS |
| * group slot with the non-resident group. Finally update the priority of all |
| * resident queue groups following the non-resident group. |
| * |
| * kbase_csf_scheduler.remaining_tick_slots would also be adjusted. |
| */ |
| static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| struct kbase_csf_csg_slot *const csg_slot = |
| scheduler->csg_slots; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY); |
| |
| /* First update priority for already resident groups (if any) |
| * before the non-resident group |
| */ |
| update_resident_groups_priority(kbdev); |
| |
| /* Now consume the vacant slot for the non-resident group */ |
| program_group_on_vacant_csg_slot(kbdev, slot); |
| |
| /* Now update priority for already resident groups (if any) |
| * following the non-resident group |
| */ |
| update_resident_groups_priority(kbdev); |
| } |
| |
| static bool slots_state_changed(struct kbase_device *kbdev, |
| unsigned long *slots_mask, |
| bool (*state_check_func)(struct kbase_device *, s8)) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0}; |
| bool changed = false; |
| u32 i; |
| |
| for_each_set_bit(i, slots_mask, num_groups) { |
| if (state_check_func(kbdev, (s8)i)) { |
| set_bit(i, changed_slots); |
| changed = true; |
| } |
| } |
| |
| if (changed) |
| bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS); |
| |
| return changed; |
| } |
| |
| /** |
| * program_suspending_csg_slots() - Program the CSG slots vacated on suspension |
| * of queue groups running on them. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * |
| * This function will first wait for the ongoing suspension to complete on a |
| * CSG slot and will then program the vacant slot with the |
| * non-resident queue group inside the groups_to_schedule list. |
| * The programming of the non-resident queue group on the vacant slot could |
| * fail due to unavailability of free GPU address space slot and so the |
| * programming is re-attempted after the ongoing suspension has completed |
| * for all the CSG slots. |
| * The priority of resident groups before and after the non-resident group |
| * in the groups_to_schedule list would also be updated. |
| * This would be repeated for all the slots undergoing suspension. |
| * GPU reset would be initiated if the wait for suspend times out. |
| */ |
| static void program_suspending_csg_slots(struct kbase_device *kbdev) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); |
| DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; |
| bool suspend_wait_failed = false; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| /* In the current implementation, csgs_events_enable_mask would be used |
| * only to indicate suspending CSGs. |
| */ |
| bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, |
| MAX_SUPPORTED_CSGS); |
| |
| while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { |
| DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); |
| |
| bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| slots_state_changed(kbdev, changed, |
| csg_slot_stopped_raw), |
| remaining); |
| |
| if (remaining) { |
| u32 i; |
| |
| for_each_set_bit(i, changed, num_groups) { |
| struct kbase_queue_group *group = |
| scheduler->csg_slots[i].resident_group; |
| |
| if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) { |
| continue; |
| } |
| /* The on slot csg is now stopped */ |
| clear_bit(i, slot_mask); |
| |
| if (likely(group)) { |
| bool as_fault; |
| /* Only do save/cleanup if the |
| * group is not terminated during |
| * the sleep. |
| */ |
| save_csg_slot(group); |
| as_fault = cleanup_csg_slot(group); |
| /* If AS fault detected, evict it */ |
| if (as_fault) { |
| sched_evict_group(group, true, true); |
| set_bit(i, evicted_mask); |
| } |
| } |
| |
| program_vacant_csg_slot(kbdev, (s8)i); |
| } |
| } else { |
| u32 i; |
| |
| /* Groups that have failed to suspend in time shall |
| * raise a fatal error as they could no longer be |
| * safely resumed. |
| */ |
| for_each_set_bit(i, slot_mask, num_groups) { |
| struct kbase_queue_group *const group = |
| scheduler->csg_slots[i].resident_group; |
| |
| struct base_gpu_queue_group_error const |
| err_payload = { .error_type = |
| BASE_GPU_QUEUE_GROUP_ERROR_FATAL, |
| .payload = { |
| .fatal_group = { |
| .status = |
| GPU_EXCEPTION_TYPE_SW_FAULT_2, |
| } } }; |
| |
| if (unlikely(group == NULL)) |
| continue; |
| |
| kbase_csf_add_group_fatal_error(group, |
| &err_payload); |
| kbase_event_wakeup(group->kctx); |
| |
| /* TODO GPUCORE-25328: The CSG can't be |
| * terminated, the GPU will be reset as a |
| * work-around. |
| */ |
| dev_warn( |
| kbdev->dev, |
| "Group %d of context %d_%d on slot %u failed to suspend", |
| group->handle, group->kctx->tgid, |
| group->kctx->id, i); |
| |
| /* The group has failed suspension, stop |
| * further examination. |
| */ |
| clear_bit(i, slot_mask); |
| set_bit(i, scheduler->csgs_events_enable_mask); |
| update_offslot_non_idle_cnt_for_onslot_grp( |
| group); |
| } |
| |
| suspend_wait_failed = true; |
| } |
| } |
| |
| if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS)) |
| dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", |
| num_groups, evicted_mask); |
| |
| if (likely(!suspend_wait_failed)) { |
| u32 i; |
| |
| while (scheduler->ngrp_to_schedule && |
| scheduler->remaining_tick_slots) { |
| i = find_first_zero_bit(scheduler->csg_inuse_bitmap, |
| num_groups); |
| if (WARN_ON(i == num_groups)) |
| break; |
| program_vacant_csg_slot(kbdev, (s8)i); |
| if (!csg_slot_in_use(kbdev, (int)i)) { |
| dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); |
| break; |
| } |
| } |
| } else { |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| } |
| } |
| |
| static void suspend_queue_group(struct kbase_queue_group *group) |
| { |
| unsigned long flags; |
| struct kbase_csf_scheduler *const scheduler = |
| &group->kctx->kbdev->csf.scheduler; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| /* This shall be used in program_suspending_csg_slots() where we |
| * assume that whilst CSGs are being suspended, this bitmask is not |
| * used by anything else i.e., it indicates only the CSGs going |
| * through suspension. |
| */ |
| clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| /* If AS fault detected, terminate the group */ |
| if (!kctx_as_enabled(group->kctx) || group->faulted) |
| term_csg_slot(group); |
| else |
| suspend_csg_slot(group); |
| } |
| |
| static void wait_csg_slots_start(struct kbase_device *kbdev) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; |
| u32 i; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| /* extract start slot flags for check */ |
| for (i = 0; i < num_groups; i++) { |
| if (atomic_read(&scheduler->csg_slots[i].state) == |
| CSG_SLOT_READY2RUN) |
| set_bit(i, slot_mask); |
| } |
| |
| while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { |
| DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); |
| |
| bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| slots_state_changed(kbdev, changed, csg_slot_running), |
| remaining); |
| |
| if (remaining) { |
| for_each_set_bit(i, changed, num_groups) { |
| struct kbase_queue_group *group = |
| scheduler->csg_slots[i].resident_group; |
| |
| /* The on slot csg is now running */ |
| clear_bit(i, slot_mask); |
| group->run_state = KBASE_CSF_GROUP_RUNNABLE; |
| } |
| } else { |
| dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n", |
| num_groups, slot_mask); |
| |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| break; |
| } |
| } |
| } |
| |
| /** |
| * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state |
| * flagged after the completion of a CSG status |
| * update command |
| * |
| * This function is called at the start of scheduling tick to check the |
| * idle status of a queue group resident on a CSG slot. |
| * The caller must make sure the corresponding status update command has |
| * been called and completed before checking this status. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @slot: The given slot for checking an occupying resident group's idle |
| * state. |
| * |
| * Return: true if the group resident on slot is idle, otherwise false. |
| */ |
| static bool group_on_slot_is_idle(struct kbase_device *kbdev, |
| unsigned long slot) |
| { |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & |
| CSG_STATUS_STATE_IDLE_MASK; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| return idle; |
| } |
| |
| /** |
| * slots_update_state_changed() - Check the handshake state of a subset of |
| * command group slots. |
| * |
| * Checks the state of a subset of slots selected through the slots_mask |
| * bit_map. Records which slots' handshake completed and send it back in the |
| * slots_done bit_map. |
| * |
| * @kbdev: The GPU device. |
| * @field_mask: The field mask for checking the state in the csg_req/ack. |
| * @slots_mask: A bit_map specifying the slots to check. |
| * @slots_done: A cleared bit_map for returning the slots that |
| * have finished update. |
| * |
| * Return: true if the slots_done is set for at least one slot. |
| * Otherwise false. |
| */ |
| static |
| bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, |
| const unsigned long *slots_mask, unsigned long *slots_done) |
| { |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| bool changed = false; |
| u32 i; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| for_each_set_bit(i, slots_mask, num_groups) { |
| struct kbase_csf_cmd_stream_group_info const *const ginfo = |
| &kbdev->csf.global_iface.groups[i]; |
| u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); |
| |
| state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| |
| if (!(state & field_mask)) { |
| set_bit(i, slots_done); |
| changed = true; |
| } |
| } |
| |
| return changed; |
| } |
| |
| /** |
| * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on |
| * the specified groups. |
| * |
| * This function waits for the acknowledgement of the request that have |
| * already been placed for the CSG slots by the caller. Currently used for |
| * the CSG priority update and status update requests. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @field_mask: The field mask for checking the state in the csg_req/ack. |
| * @slot_mask: Bitmap reflecting the slots, the function will modify |
| * the acknowledged slots by clearing their corresponding |
| * bits. |
| * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out. |
| * |
| * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For |
| * timed out condition with unacknowledged slots, their bits remain |
| * set in the slot_mask. |
| */ |
| static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, |
| u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies) |
| { |
| const u32 num_groups = kbdev->csf.global_iface.group_num; |
| long remaining = wait_in_jiffies; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| while (!bitmap_empty(slot_mask, num_groups) && |
| !kbase_reset_gpu_is_active(kbdev)) { |
| DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 }; |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| slots_update_state_changed(kbdev, field_mask, |
| slot_mask, dones), |
| remaining); |
| |
| if (remaining) |
| bitmap_andnot(slot_mask, slot_mask, dones, num_groups); |
| else |
| /* Timed-out on the wait */ |
| return -ETIMEDOUT; |
| } |
| |
| return 0; |
| } |
| |
| static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) |
| { |
| unsigned long *slot_mask = |
| kbdev->csf.scheduler.csg_slots_prio_update; |
| long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, |
| slot_mask, wait_time); |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (ret != 0) { |
| /* The update timeout is not regarded as a serious |
| * issue, no major consequences are expected as a |
| * result, so just warn the case. |
| */ |
| dev_warn( |
| kbdev->dev, |
| "Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", |
| slot_mask[0]); |
| } |
| } |
| |
| void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, |
| struct kbase_context *kctx, struct list_head *evicted_groups) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *group; |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| u32 slot; |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; |
| |
| lockdep_assert_held(&kctx->csf.lock); |
| mutex_lock(&scheduler->lock); |
| |
| /* This code is only called during reset, so we don't wait for the CSG |
| * slots to be stopped |
| */ |
| WARN_ON(!kbase_reset_gpu_is_active(kbdev)); |
| |
| KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u); |
| for (slot = 0; slot < num_groups; slot++) { |
| group = kbdev->csf.scheduler.csg_slots[slot].resident_group; |
| if (group && group->kctx == kctx) { |
| bool as_fault; |
| |
| term_csg_slot(group); |
| as_fault = cleanup_csg_slot(group); |
| /* remove the group from the scheduler list */ |
| sched_evict_group(group, as_fault, false); |
| /* return the evicted group to the caller */ |
| list_add_tail(&group->link, evicted_groups); |
| set_bit(slot, slot_mask); |
| } |
| } |
| |
| dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", |
| kctx->tgid, kctx->id, num_groups, slot_mask); |
| |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| /** |
| * scheduler_slot_protm_ack - Acknowledging the protected region requests |
| * from the resident group on a given slot. |
| * |
| * The function assumes that the given slot is in stable running state and |
| * has already been judged by the caller on that any pending protected region |
| * requests of the resident group should be acknowledged. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @group: Pointer to the resident group on the given slot. |
| * @slot: The slot that the given group is actively operating on. |
| * |
| * Return: true if the group has pending protm request(s) and is acknowledged. |
| * The caller should arrange to enter the protected mode for servicing |
| * it. Otherwise return false, indicating the group has no pending protm |
| * request. |
| */ |
| static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, |
| struct kbase_queue_group *const group, |
| const int slot) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| bool protm_ack = false; |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[slot]; |
| u32 max_csi; |
| int i; |
| |
| if (WARN_ON(scheduler->csg_slots[slot].resident_group != group)) |
| return protm_ack; |
| |
| lockdep_assert_held(&scheduler->lock); |
| lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock); |
| |
| max_csi = ginfo->stream_num; |
| for (i = find_first_bit(group->protm_pending_bitmap, max_csi); |
| i < max_csi; |
| i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) { |
| struct kbase_queue *queue = group->bound_queues[i]; |
| |
| clear_bit(i, group->protm_pending_bitmap); |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group, |
| queue, group->protm_pending_bitmap[0]); |
| |
| if (!WARN_ON(!queue) && queue->enabled) { |
| struct kbase_csf_cmd_stream_info *stream = |
| &ginfo->streams[i]; |
| u32 cs_protm_ack = kbase_csf_firmware_cs_output( |
| stream, CS_ACK) & |
| CS_ACK_PROTM_PEND_MASK; |
| u32 cs_protm_req = kbase_csf_firmware_cs_input_read( |
| stream, CS_REQ) & |
| CS_REQ_PROTM_PEND_MASK; |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, |
| queue, cs_protm_ack ^ cs_protm_req); |
| |
| if (cs_protm_ack == cs_protm_req) { |
| dev_dbg(kbdev->dev, |
| "PROTM-ack already done for queue-%d group-%d slot-%d", |
| queue->csi_index, group->handle, slot); |
| continue; |
| } |
| |
| kbase_csf_firmware_cs_input_mask(stream, CS_REQ, |
| cs_protm_ack, |
| CS_ACK_PROTM_PEND_MASK); |
| protm_ack = true; |
| dev_dbg(kbdev->dev, |
| "PROTM-ack for queue-%d, group-%d slot-%d", |
| queue->csi_index, group->handle, slot); |
| } |
| } |
| |
| return protm_ack; |
| } |
| |
| /** |
| * scheduler_group_check_protm_enter - Request the given group to be evaluated |
| * for triggering the protected mode. |
| * |
| * The function assumes the given group is either an active running group or |
| * the scheduler internally maintained field scheduler->top_grp. |
| * |
| * If the GPU is not already running in protected mode and the input group |
| * has protected region requests from its bound queues, the requests are |
| * acknowledged and the GPU is instructed to enter the protected mode. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @input_grp: Pointer to the GPU queue group. |
| */ |
| static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, |
| struct kbase_queue_group *const input_grp) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| bool protm_in_use; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| |
| protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp, |
| protm_in_use); |
| |
| /* Firmware samples the PROTM_PEND ACK bit for CSs when |
| * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit |
| * is set for a CS after Host has sent the PROTM_ENTER |
| * Global request, then there is no guarantee that firmware will |
| * notice that prior to switching to protected mode. And firmware |
| * may not again raise the PROTM_PEND interrupt for that CS |
| * later on. To avoid that uncertainty PROTM_PEND ACK bit |
| * is not set for a CS if the request to enter protected |
| * mode has already been sent. It will be set later (after the exit |
| * from protected mode has taken place) when the group to which |
| * CS is bound becomes the top group. |
| * |
| * The actual decision of entering protected mode is hinging on the |
| * input group is the top priority group, or, in case the previous |
| * top-group is evicted from the scheduler during the tick, its would |
| * be replacement, and that it is currently in a stable state (i.e. the |
| * slot state is running). |
| */ |
| if (!protm_in_use && !WARN_ON(!input_grp)) { |
| const int slot = |
| kbase_csf_scheduler_group_get_slot_locked(input_grp); |
| |
| /* check the input_grp is running and requesting protected mode |
| */ |
| if (slot >= 0 && |
| atomic_read(&scheduler->csg_slots[slot].state) == |
| CSG_SLOT_RUNNING) { |
| if (kctx_as_enabled(input_grp->kctx) && |
| scheduler_slot_protm_ack(kbdev, input_grp, slot)) { |
| /* Option of acknowledging to multiple |
| * CSGs from the same kctx is dropped, |
| * after consulting with the |
| * architecture team. See the comment in |
| * GPUCORE-21394. |
| */ |
| |
| /* Disable the idle timer */ |
| disable_gpu_idle_fw_timer_locked(kbdev); |
| |
| /* Switch to protected mode */ |
| scheduler->active_protm_grp = input_grp; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM, |
| input_grp, 0u); |
| |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| kbase_csf_enter_protected_mode(kbdev); |
| return; |
| } |
| } |
| } |
| |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void scheduler_apply(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| const u32 total_csg_slots = kbdev->csf.global_iface.group_num; |
| const u32 available_csg_slots = scheduler->num_csg_slots_for_tick; |
| u32 suspend_cnt = 0; |
| u32 remain_cnt = 0; |
| u32 resident_cnt = 0; |
| struct kbase_queue_group *group; |
| u32 i; |
| u32 spare; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* Suspend those resident groups not in the run list */ |
| for (i = 0; i < total_csg_slots; i++) { |
| group = scheduler->csg_slots[i].resident_group; |
| if (group) { |
| resident_cnt++; |
| if (group->prepared_seq_num >= available_csg_slots) { |
| suspend_queue_group(group); |
| suspend_cnt++; |
| } else |
| remain_cnt++; |
| } |
| } |
| |
| /* Initialize the remaining avialable csg slots for the tick/tock */ |
| scheduler->remaining_tick_slots = available_csg_slots; |
| |
| /* If there are spare slots, apply heads in the list */ |
| spare = (available_csg_slots > resident_cnt) ? |
| (available_csg_slots - resident_cnt) : 0; |
| while (!list_empty(&scheduler->groups_to_schedule)) { |
| group = list_first_entry(&scheduler->groups_to_schedule, |
| struct kbase_queue_group, |
| link_to_schedule); |
| |
| if (kbasep_csf_scheduler_group_is_on_slot_locked(group) && |
| group->prepared_seq_num < available_csg_slots) { |
| /* One of the resident remainders */ |
| update_csg_slot_priority(group, |
| get_slot_priority(group)); |
| } else if (spare != 0) { |
| s8 slot = (s8)find_first_zero_bit( |
| kbdev->csf.scheduler.csg_inuse_bitmap, |
| total_csg_slots); |
| |
| if (WARN_ON(slot >= (s8)total_csg_slots)) |
| break; |
| |
| if (!kctx_as_enabled(group->kctx) || group->faulted) { |
| /* Drop the head group and continue */ |
| update_offslot_non_idle_cnt_for_faulty_grp( |
| group); |
| remove_scheduled_group(kbdev, group); |
| continue; |
| } |
| program_csg_slot(group, slot, |
| get_slot_priority(group)); |
| if (unlikely(!csg_slot_in_use(kbdev, slot))) |
| break; |
| |
| spare--; |
| } else |
| break; |
| |
| /* Drop the head csg from the list */ |
| remove_scheduled_group(kbdev, group); |
| if (!WARN_ON(!scheduler->remaining_tick_slots)) |
| scheduler->remaining_tick_slots--; |
| } |
| |
| /* Dealing with groups currently going through suspend */ |
| program_suspending_csg_slots(kbdev); |
| } |
| |
| static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, |
| struct kbase_context *kctx, int priority) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *group; |
| |
| lockdep_assert_held(&scheduler->lock); |
| if (WARN_ON(priority < 0) || |
| WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) |
| return; |
| |
| if (!kctx_as_enabled(kctx)) |
| return; |
| |
| list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], |
| link) { |
| if (WARN_ON(!list_empty(&group->link_to_schedule))) |
| /* This would be a bug */ |
| list_del_init(&group->link_to_schedule); |
| |
| if (unlikely(group->faulted)) |
| continue; |
| |
| /* Set the scanout sequence number, starting from 0 */ |
| group->scan_seq_num = scheduler->csg_scan_count_for_tick++; |
| |
| if (queue_group_idle_locked(group)) { |
| list_add_tail(&group->link_to_schedule, |
| &scheduler->idle_groups_to_schedule); |
| continue; |
| } |
| |
| if (!scheduler->ngrp_to_schedule) { |
| /* keep the top csg's origin */ |
| scheduler->top_ctx = kctx; |
| scheduler->top_grp = group; |
| } |
| |
| list_add_tail(&group->link_to_schedule, |
| &scheduler->groups_to_schedule); |
| group->prepared_seq_num = scheduler->ngrp_to_schedule++; |
| |
| kctx->csf.sched.ngrp_to_schedule++; |
| count_active_address_space(kbdev, kctx); |
| } |
| } |
| |
| /** |
| * scheduler_rotate_groups() - Rotate the runnable queue groups to provide |
| * fairness of scheduling within a single |
| * kbase_context. |
| * |
| * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned |
| * the highest slot priority) is guaranteed to get the resources that it |
| * needs we only rotate the kbase_context corresponding to it - |
| * kbase_csf_scheduler's top_ctx. |
| * |
| * The priority level chosen for rotation is the one containing the previous |
| * scheduling cycle's kbase_csf_scheduler's top_grp. |
| * |
| * In a 'fresh-slice-cycle' this always corresponds to the highest group |
| * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority |
| * level of the previous scheduling cycle's first runnable kbase_context. |
| * |
| * We choose this priority level because when higher priority work is |
| * scheduled, we should always cause the scheduler to run and do a scan. The |
| * scan always enumerates the highest priority work first (whether that be |
| * based on process priority or group priority), and thus |
| * kbase_csf_scheduler's top_grp will point to the first of those high priority |
| * groups, which necessarily must be the highest priority group in |
| * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick |
| * up that group appropriately. |
| * |
| * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL), |
| * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but |
| * will set up kbase_csf_scheduler's top_ctx again for the next scheduling |
| * cycle. Implicitly, a rotation had already occurred by removing |
| * the kbase_csf_scheduler's top_grp |
| * |
| * If kbase_csf_scheduler's top_grp became idle and all other groups belonging |
| * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's |
| * top_ctx are also idle, then the effect of this will be to rotate idle |
| * groups, which might not actually become resident in the next |
| * scheduling slice. However this is acceptable since a queue group becoming |
| * idle is implicitly a rotation (as above with evicted queue groups), as it |
| * automatically allows a new queue group to take the maximum slot priority |
| * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of |
| * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will |
| * be for a group in the next lowest priority level or in absence of those the |
| * next kbase_context's queue groups. |
| * |
| * @kbdev: Pointer to the GPU device. |
| */ |
| static void scheduler_rotate_groups(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_context *const top_ctx = scheduler->top_ctx; |
| struct kbase_queue_group *const top_grp = scheduler->top_grp; |
| |
| lockdep_assert_held(&scheduler->lock); |
| if (top_ctx && top_grp) { |
| struct list_head *list = |
| &top_ctx->csf.sched.runnable_groups[top_grp->priority]; |
| |
| WARN_ON(top_grp->kctx != top_ctx); |
| if (!WARN_ON(list_empty(list))) { |
| struct kbase_queue_group *new_head_grp; |
| list_move_tail(&top_grp->link, list); |
| new_head_grp = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_queue_group, link) : |
| NULL; |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE, |
| top_grp, top_ctx->csf.sched.num_runnable_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE, |
| new_head_grp, 0u); |
| dev_dbg(kbdev->dev, |
| "groups rotated for a context, num_runnable_groups: %u\n", |
| scheduler->top_ctx->csf.sched.num_runnable_grps); |
| } |
| } |
| } |
| |
| static void scheduler_rotate_ctxs(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct list_head *list = &scheduler->runnable_kctxs; |
| |
| lockdep_assert_held(&scheduler->lock); |
| if (scheduler->top_ctx) { |
| if (!WARN_ON(list_empty(list))) { |
| struct kbase_context *pos; |
| bool found = false; |
| |
| /* Locate the ctx on the list */ |
| list_for_each_entry(pos, list, csf.link) { |
| if (scheduler->top_ctx == pos) { |
| found = true; |
| break; |
| } |
| } |
| |
| if (!WARN_ON(!found)) { |
| struct kbase_context *new_head_kctx; |
| list_move_tail(&pos->csf.link, list); |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos, |
| 0u); |
| new_head_kctx = (!list_empty(list)) ? |
| list_first_entry(list, struct kbase_context, csf.link) : |
| NULL; |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE, |
| new_head_kctx, 0u); |
| dev_dbg(kbdev->dev, "contexts rotated\n"); |
| } |
| } |
| } |
| } |
| |
| /** |
| * scheduler_update_idle_slots_status() - Get the status update for the CSG |
| * slots for which the IDLE notification was received |
| * previously. |
| * |
| * This function sends a CSG status update request for all the CSG slots |
| * present in the bitmap scheduler->csg_slots_idle_mask and wait for the |
| * request to complete. |
| * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by |
| * this function. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @csg_bitmap: Bitmap of the CSG slots for which |
| * the status update request completed successfully. |
| * @failed_csg_bitmap: Bitmap of the CSG slots for which |
| * the status update request timedout. |
| */ |
| static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, |
| unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| const u32 num_groups = kbdev->csf.global_iface.group_num; |
| struct kbase_csf_global_iface *const global_iface = |
| &kbdev->csf.global_iface; |
| unsigned long flags, i; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { |
| struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; |
| struct kbase_queue_group *group = csg_slot->resident_group; |
| struct kbase_csf_cmd_stream_group_info *const ginfo = |
| &global_iface->groups[i]; |
| u32 csg_req; |
| |
| clear_bit(i, scheduler->csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| scheduler->csg_slots_idle_mask[0]); |
| if (WARN_ON(!group)) |
| continue; |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group, |
| i); |
| |
| csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
| csg_req ^= CSG_REQ_STATUS_UPDATE_MASK; |
| kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, |
| CSG_REQ_STATUS_UPDATE_MASK); |
| |
| set_bit(i, csg_bitmap); |
| } |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| /* The groups are aggregated into a single kernel doorbell request */ |
| if (!bitmap_empty(csg_bitmap, num_groups)) { |
| long wt = |
| kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); |
| u32 db_slots = (u32)csg_bitmap[0]; |
| |
| kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots); |
| |
| if (wait_csg_slots_handshake_ack(kbdev, |
| CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) { |
| dev_warn( |
| kbdev->dev, |
| "Timeout on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx", |
| csg_bitmap[0]); |
| |
| /* Store the bitmap of timed out slots */ |
| bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); |
| csg_bitmap[0] = ~csg_bitmap[0] & db_slots; |
| } else { |
| KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL, |
| db_slots); |
| csg_bitmap[0] = db_slots; |
| } |
| } |
| } |
| |
| /** |
| * scheduler_handle_idle_slots() - Update the idle status of queue groups |
| * resident on CSG slots for which the |
| * IDLE notification was received previously. |
| * |
| * This function is called at the start of scheduling tick/tock to reconfirm |
| * the idle status of queue groups resident on CSG slots for |
| * which idle notification was received previously, i.e. all the CSG slots |
| * present in the bitmap scheduler->csg_slots_idle_mask. |
| * The confirmation is done by sending the CSG status update request to the |
| * firmware. On completion, the firmware will mark the idleness at the |
| * slot's interface CSG_STATUS_STATE register accordingly. |
| * |
| * The run state of the groups resident on still idle CSG slots is changed to |
| * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is |
| * updated accordingly. |
| * The bits corresponding to slots for which the status update request timedout |
| * remain set in scheduler->csg_slots_idle_mask. |
| * |
| * @kbdev: Pointer to the GPU device. |
| */ |
| static void scheduler_handle_idle_slots(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| unsigned long flags, i; |
| DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; |
| DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| scheduler_update_idle_slots_status(kbdev, csg_bitmap, |
| failed_csg_bitmap); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| for_each_set_bit(i, csg_bitmap, num_groups) { |
| struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; |
| struct kbase_queue_group *group = csg_slot->resident_group; |
| |
| if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING)) |
| continue; |
| if (WARN_ON(!group)) |
| continue; |
| if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && |
| group->run_state != KBASE_CSF_GROUP_IDLE)) |
| continue; |
| if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) |
| continue; |
| |
| if (group_on_slot_is_idle(kbdev, i)) { |
| group->run_state = KBASE_CSF_GROUP_IDLE; |
| set_bit(i, scheduler->csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, |
| group, scheduler->csg_slots_idle_mask[0]); |
| } else |
| group->run_state = KBASE_CSF_GROUP_RUNNABLE; |
| } |
| |
| bitmap_or(scheduler->csg_slots_idle_mask, |
| scheduler->csg_slots_idle_mask, |
| failed_csg_bitmap, num_groups); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL, |
| scheduler->csg_slots_idle_mask[0]); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } |
| |
| static void scheduler_scan_idle_groups(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *group, *n; |
| |
| list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, |
| link_to_schedule) { |
| |
| WARN_ON(!queue_group_idle_locked(group)); |
| |
| if (!scheduler->ngrp_to_schedule) { |
| /* keep the top csg's origin */ |
| scheduler->top_ctx = group->kctx; |
| scheduler->top_grp = group; |
| } |
| |
| group->prepared_seq_num = scheduler->ngrp_to_schedule++; |
| list_move_tail(&group->link_to_schedule, |
| &scheduler->groups_to_schedule); |
| |
| group->kctx->csf.sched.ngrp_to_schedule++; |
| count_active_address_space(kbdev, group->kctx); |
| } |
| } |
| |
| static void scheduler_rotate(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* Dealing with rotation */ |
| scheduler_rotate_groups(kbdev); |
| scheduler_rotate_ctxs(kbdev); |
| } |
| |
| static struct kbase_queue_group *get_tock_top_group( |
| struct kbase_csf_scheduler *const scheduler) |
| { |
| struct kbase_context *kctx; |
| int i; |
| |
| lockdep_assert_held(&scheduler->lock); |
| for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { |
| list_for_each_entry(kctx, |
| &scheduler->runnable_kctxs, csf.link) { |
| struct kbase_queue_group *group; |
| |
| list_for_each_entry(group, |
| &kctx->csf.sched.runnable_groups[i], |
| link) { |
| if (queue_group_idle_locked(group)) |
| continue; |
| |
| return group; |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, |
| bool is_suspend) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; |
| |
| int ret = suspend_active_queue_groups(kbdev, slot_mask); |
| |
| if (ret) { |
| /* The suspend of CSGs failed, trigger the GPU reset and wait |
| * for it to complete to be in a deterministic state. |
| */ |
| dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", |
| kbdev->csf.global_iface.group_num, slot_mask); |
| |
| if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) |
| kbase_reset_gpu(kbdev); |
| |
| if (is_suspend) { |
| mutex_unlock(&scheduler->lock); |
| kbase_reset_gpu_wait(kbdev); |
| mutex_lock(&scheduler->lock); |
| } |
| return -1; |
| } |
| |
| /* Check if the groups became active whilst the suspend was ongoing, |
| * but only for the case where the system suspend is not in progress |
| */ |
| if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps)) |
| return -1; |
| |
| return 0; |
| } |
| |
| static bool scheduler_idle_suspendable(struct kbase_device *kbdev) |
| { |
| bool suspend; |
| unsigned long flags; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->state == SCHED_SUSPENDED) |
| return false; |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| if (scheduler->total_runnable_grps) { |
| spin_lock(&scheduler->interrupt_lock); |
| |
| /* Check both on-slots and off-slots groups idle status */ |
| suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) && |
| !atomic_read(&scheduler->non_idle_offslot_grps) && |
| kbase_pm_idle_groups_sched_suspendable(kbdev); |
| |
| spin_unlock(&scheduler->interrupt_lock); |
| } else |
| suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); |
| |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| |
| return suspend; |
| } |
| |
| static void gpu_idle_worker(struct work_struct *work) |
| { |
| struct kbase_device *kbdev = container_of( |
| work, struct kbase_device, csf.scheduler.gpu_idle_work); |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| bool reset_active = false; |
| bool scheduler_is_idle_suspendable = false; |
| bool all_groups_suspended = false; |
| |
| KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u); |
| |
| #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ |
| (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8)) |
| |
| if (kbase_reset_gpu_try_prevent(kbdev)) { |
| dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); |
| KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, |
| __ENCODE_KTRACE_INFO(true, false, false)); |
| return; |
| } |
| mutex_lock(&scheduler->lock); |
| |
| /* Cycle completed, disable the firmware idle timer */ |
| disable_gpu_idle_fw_timer(kbdev); |
| scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); |
| reset_active = kbase_reset_gpu_is_active(kbdev); |
| if (scheduler_is_idle_suspendable && !reset_active) { |
| all_groups_suspended = |
| !suspend_active_groups_on_powerdown(kbdev, false); |
| |
| if (all_groups_suspended) { |
| dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now"); |
| scheduler_suspend(kbdev); |
| cancel_tick_timer(kbdev); |
| } else { |
| dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)", |
| atomic_read(&scheduler->non_idle_offslot_grps)); |
| /* Bring forward the next tick */ |
| kbase_csf_scheduler_advance_tick(kbdev); |
| } |
| } |
| |
| mutex_unlock(&scheduler->lock); |
| kbase_reset_gpu_allow(kbdev); |
| KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, |
| __ENCODE_KTRACE_INFO(reset_active, scheduler_is_idle_suspendable, all_groups_suspended)); |
| #undef __ENCODE_KTRACE_INFO |
| } |
| |
| static int scheduler_prepare(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| int i; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* Empty the groups_to_schedule */ |
| while (!list_empty(&scheduler->groups_to_schedule)) { |
| struct kbase_queue_group *grp = |
| list_first_entry(&scheduler->groups_to_schedule, |
| struct kbase_queue_group, |
| link_to_schedule); |
| |
| remove_scheduled_group(kbdev, grp); |
| } |
| |
| /* Pre-scan init scheduler fields */ |
| if (WARN_ON(scheduler->ngrp_to_schedule != 0)) |
| scheduler->ngrp_to_schedule = 0; |
| scheduler->top_ctx = NULL; |
| scheduler->top_grp = NULL; |
| scheduler->csg_scan_count_for_tick = 0; |
| WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule)); |
| scheduler->num_active_address_spaces = 0; |
| scheduler->num_csg_slots_for_tick = 0; |
| bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); |
| |
| /* Scan out to run groups */ |
| for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { |
| struct kbase_context *kctx; |
| |
| list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) |
| scheduler_ctx_scan_groups(kbdev, kctx, i); |
| } |
| |
| /* Update this tick's non-idle groups */ |
| scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; |
| |
| /* Initial number of non-idle off-slot groups, before the scheduler's |
| * scheduler_apply() operation. This gives a sensible start point view |
| * of the tick. It will be subject to up/downs during the scheduler |
| * active phase. |
| */ |
| atomic_set(&scheduler->non_idle_offslot_grps, |
| scheduler->non_idle_scanout_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL, |
| scheduler->non_idle_scanout_grps); |
| |
| /* Adds those idle but runnable groups to the scanout list */ |
| scheduler_scan_idle_groups(kbdev); |
| |
| /* After adding the idle CSGs, the two counts should be the same */ |
| WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule); |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, |
| scheduler->num_active_address_spaces | |
| (((u64)scheduler->ngrp_to_schedule) << 32)); |
| set_max_csg_slots(kbdev); |
| dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n", |
| scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces); |
| return 0; |
| } |
| |
| static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| /* After the scheduler apply operation, the internal variable |
| * scheduler->non_idle_offslot_grps reflects the end-point view |
| * of the count at the end of the active phase. |
| * |
| * Any changes that follow (after the scheduler has dropped the |
| * scheduler->lock), reflects async operations to the scheduler, |
| * such as a group gets killed (evicted) or a new group inserted, |
| * cqs wait-sync triggered state transtion etc. |
| * |
| * The condition for enable the idle timer is that there is no |
| * non-idle groups off-slots. If there is non-idle group off-slot, |
| * the timer should be disabled. |
| */ |
| if (atomic_read(&scheduler->non_idle_offslot_grps)) |
| disable_gpu_idle_fw_timer(kbdev); |
| else |
| enable_gpu_idle_fw_timer(kbdev); |
| } |
| |
| static void schedule_actions(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| struct kbase_queue_group *protm_grp; |
| int ret; |
| bool skip_idle_slots_update; |
| bool new_protm_top_grp = false; |
| |
| kbase_reset_gpu_assert_prevented(kbdev); |
| lockdep_assert_held(&scheduler->lock); |
| |
| ret = kbase_pm_wait_for_desired_state(kbdev); |
| if (ret) { |
| dev_err(kbdev->dev, "Wait for MCU power on failed"); |
| return; |
| } |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| /* Skip updating on-slot idle CSGs if GPU is in protected mode. */ |
| if (!skip_idle_slots_update) |
| scheduler_handle_idle_slots(kbdev); |
| |
| scheduler_prepare(kbdev); |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| protm_grp = scheduler->active_protm_grp; |
| |
| /* Avoid update if the top-group remains unchanged and in protected |
| * mode. For the said case, all the slots update is effectively |
| * competing against the active protected mode group (typically the |
| * top-group). If we update other slots, even on leaving the |
| * top-group slot untouched, the firmware would exit the protected mode |
| * for interacting with the host-driver. After it, as the top-group |
| * would again raise the request for entering protected mode, we would |
| * be actively doing the switching over twice without progressing the |
| * queue jobs. |
| */ |
| if (protm_grp && scheduler->top_grp == protm_grp) { |
| int new_val; |
| dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", |
| protm_grp->handle); |
| new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, |
| protm_grp, new_val); |
| } else if (scheduler->top_grp) { |
| if (protm_grp) |
| dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d", |
| protm_grp->handle); |
| |
| if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap, |
| kbdev->csf.global_iface.groups[0].stream_num)) { |
| dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d", |
| scheduler->top_grp->handle, |
| scheduler->top_grp->kctx->tgid, |
| scheduler->top_grp->kctx->id); |
| |
| /* When entering protected mode all CSG slots can be occupied |
| * but only the protected mode CSG will be running. Any event |
| * that would trigger the execution of an on-slot idle CSG will |
| * need to be handled by the host during protected mode. |
| */ |
| new_protm_top_grp = true; |
| } |
| |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| scheduler_apply(kbdev); |
| |
| /* Post-apply, all the committed groups in this tick are on |
| * slots, time to arrange the idle timer on/off decision. |
| */ |
| scheduler_handle_idle_timer_onoff(kbdev); |
| |
| /* Scheduler is dropping the exec of the previous protm_grp, |
| * Until the protm quit completes, the GPU is effectively |
| * locked in the secure mode. |
| */ |
| if (protm_grp) |
| scheduler_force_protm_exit(kbdev); |
| |
| wait_csg_slots_start(kbdev); |
| wait_csg_slots_finish_prio_update(kbdev); |
| |
| if (new_protm_top_grp) { |
| scheduler_group_check_protm_enter(kbdev, |
| scheduler->top_grp); |
| } |
| |
| return; |
| } |
| |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| return; |
| } |
| |
| static void schedule_on_tock(struct work_struct *work) |
| { |
| struct kbase_device *kbdev = container_of(work, struct kbase_device, |
| csf.scheduler.tock_work.work); |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| int err = kbase_reset_gpu_try_prevent(kbdev); |
| /* Regardless of whether reset failed or is currently happening, exit |
| * early |
| */ |
| if (err) |
| return; |
| |
| mutex_lock(&scheduler->lock); |
| if (scheduler->state == SCHED_SUSPENDED) |
| goto exit_no_schedule_unlock; |
| |
| WARN_ON(!(scheduler->state == SCHED_INACTIVE)); |
| scheduler->state = SCHED_BUSY; |
| |
| /* Undertaking schedule action steps */ |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u); |
| schedule_actions(kbdev); |
| |
| /* Record time information */ |
| scheduler->last_schedule = jiffies; |
| |
| /* Tock is serviced */ |
| scheduler->tock_pending_request = false; |
| |
| scheduler->state = SCHED_INACTIVE; |
| mutex_unlock(&scheduler->lock); |
| kbase_reset_gpu_allow(kbdev); |
| |
| dev_dbg(kbdev->dev, |
| "Waking up for event after schedule-on-tock completes."); |
| wake_up_all(&kbdev->csf.event_wait); |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); |
| return; |
| |
| exit_no_schedule_unlock: |
| mutex_unlock(&scheduler->lock); |
| kbase_reset_gpu_allow(kbdev); |
| } |
| |
| static void schedule_on_tick(struct work_struct *work) |
| { |
| struct kbase_device *kbdev = container_of(work, struct kbase_device, |
| csf.scheduler.tick_work); |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| int err = kbase_reset_gpu_try_prevent(kbdev); |
| /* Regardless of whether reset failed or is currently happening, exit |
| * early |
| */ |
| if (err) |
| return; |
| |
| mutex_lock(&scheduler->lock); |
| |
| WARN_ON(scheduler->tick_timer_active); |
| if (scheduler->state == SCHED_SUSPENDED) |
| goto exit_no_schedule_unlock; |
| |
| scheduler->state = SCHED_BUSY; |
| /* Do scheduling stuff */ |
| scheduler_rotate(kbdev); |
| |
| /* Undertaking schedule action steps */ |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL, |
| scheduler->total_runnable_grps); |
| schedule_actions(kbdev); |
| |
| /* Record time information */ |
| scheduler->last_schedule = jiffies; |
| |
| /* Kicking next scheduling if needed */ |
| if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && |
| (scheduler->total_runnable_grps > 0)) { |
| start_tick_timer(kbdev); |
| dev_dbg(kbdev->dev, |
| "scheduling for next tick, num_runnable_groups:%u\n", |
| scheduler->total_runnable_grps); |
| } |
| |
| scheduler->state = SCHED_INACTIVE; |
| mutex_unlock(&scheduler->lock); |
| kbase_reset_gpu_allow(kbdev); |
| |
| dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); |
| wake_up_all(&kbdev->csf.event_wait); |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, |
| scheduler->total_runnable_grps); |
| return; |
| |
| exit_no_schedule_unlock: |
| mutex_unlock(&scheduler->lock); |
| kbase_reset_gpu_allow(kbdev); |
| } |
| |
| static int wait_csg_slots_suspend(struct kbase_device *kbdev, |
| const unsigned long *slot_mask, |
| unsigned int timeout_ms) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| int err = 0; |
| DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); |
| |
| while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) |
| && remaining) { |
| DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); |
| |
| bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); |
| |
| remaining = wait_event_timeout(kbdev->csf.event_wait, |
| slots_state_changed(kbdev, changed, |
| csg_slot_stopped_locked), |
| remaining); |
| |
| if (remaining) { |
| u32 i; |
| |
| for_each_set_bit(i, changed, num_groups) { |
| struct kbase_queue_group *group; |
| |
| if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) |
| continue; |
| |
| /* The on slot csg is now stopped */ |
| clear_bit(i, slot_mask_local); |
| |
| group = scheduler->csg_slots[i].resident_group; |
| if (likely(group)) { |
| /* Only do save/cleanup if the |
| * group is not terminated during |
| * the sleep. |
| */ |
| save_csg_slot(group); |
| if (cleanup_csg_slot(group)) |
| sched_evict_group(group, true, true); |
| } |
| } |
| } else { |
| dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend, slot_mask: 0x%*pb\n", |
| num_groups, slot_mask_local); |
| err = -ETIMEDOUT; |
| } |
| } |
| |
| return err; |
| } |
| |
| static int suspend_active_queue_groups(struct kbase_device *kbdev, |
| unsigned long *slot_mask) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| u32 slot_num; |
| int ret; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| for (slot_num = 0; slot_num < num_groups; slot_num++) { |
| struct kbase_queue_group *group = |
| scheduler->csg_slots[slot_num].resident_group; |
| |
| if (group) { |
| suspend_queue_group(group); |
| set_bit(slot_num, slot_mask); |
| } |
| } |
| |
| ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); |
| return ret; |
| } |
| |
| static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; |
| int ret; |
| int ret2; |
| |
| mutex_lock(&scheduler->lock); |
| |
| ret = suspend_active_queue_groups(kbdev, slot_mask); |
| |
| if (ret) { |
| dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", |
| kbdev->csf.global_iface.group_num, slot_mask); |
| } |
| |
| /* Need to flush the GPU cache to ensure suspend buffer |
| * contents are not lost on reset of GPU. |
| * Do this even if suspend operation had timed out for some of |
| * the CSG slots. |
| * In case the scheduler already in suspended state, the |
| * cache clean is required as the async reset request from |
| * the debugfs may race against the scheduler suspend operation |
| * due to the extra context ref-count, which prevents the |
| * L2 powering down cache clean operation in the non racing |
| * case. |
| */ |
| kbase_gpu_start_cache_clean(kbdev); |
| ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, |
| kbdev->reset_timeout_ms); |
| if (ret2) { |
| dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset"); |
| if (!ret) |
| ret = ret2; |
| } |
| |
| mutex_unlock(&scheduler->lock); |
| |
| return ret; |
| } |
| |
| /** |
| * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode |
| * groups when reset is done during |
| * protected mode execution. |
| * |
| * @group: Pointer to the device. |
| * |
| * This function is called at the time of GPU reset, before the suspension of |
| * queue groups, to handle the case when the reset is getting performed whilst |
| * GPU is in protected mode. |
| * On entry to protected mode all the groups, except the top group that executes |
| * in protected mode, are implicitly suspended by the FW. Thus this function |
| * simply marks the normal mode groups as suspended (and cleans up the |
| * corresponding CSG slots) to prevent their potential forceful eviction from |
| * the Scheduler. So if GPU was in protected mode and there was no fault, then |
| * only the protected mode group would be suspended in the regular way post exit |
| * from this function. And if GPU was in normal mode, then all on-slot groups |
| * will get suspended in the regular way. |
| * |
| * Return: true if the groups remaining on the CSG slots need to be suspended in |
| * the regular way by sending CSG SUSPEND reqs to FW, otherwise false. |
| */ |
| static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| u32 const num_groups = kbdev->csf.global_iface.group_num; |
| struct kbase_queue_group *protm_grp; |
| bool suspend_on_slot_groups; |
| unsigned long flags; |
| u32 csg_nr; |
| |
| mutex_lock(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| protm_grp = scheduler->active_protm_grp; |
| |
| /* If GPU wasn't in protected mode or had exited it before the GPU reset |
| * then all the on-slot groups can be suspended in the regular way by |
| * sending CSG SUSPEND requests to FW. |
| * If there wasn't a fault for protected mode group, then it would |
| * also need to be suspended in the regular way before the reset. |
| */ |
| suspend_on_slot_groups = !(protm_grp && protm_grp->faulted); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| if (!protm_grp) |
| goto unlock; |
| |
| /* GPU is in protected mode, so all the on-slot groups barring the |
| * the protected mode group can be marked as suspended right away. |
| */ |
| for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { |
| struct kbase_queue_group *const group = |
| kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; |
| int new_val; |
| |
| if (!group || (group == protm_grp)) |
| continue; |
| |
| cleanup_csg_slot(group); |
| group->run_state = KBASE_CSF_GROUP_SUSPENDED; |
| |
| /* Simply treat the normal mode groups as non-idle. The tick |
| * scheduled after the reset will re-initialize the counter |
| * anyways. |
| */ |
| new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, |
| group, new_val); |
| } |
| |
| unlock: |
| mutex_unlock(&scheduler->lock); |
| return suspend_on_slot_groups; |
| } |
| |
| static void scheduler_inner_reset(struct kbase_device *kbdev) |
| { |
| u32 const num_groups = kbdev->csf.global_iface.group_num; |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| unsigned long flags; |
| |
| WARN_ON(csgs_active(kbdev)); |
| |
| /* Cancel any potential queued delayed work(s) */ |
| cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); |
| cancel_tick_timer(kbdev); |
| cancel_work_sync(&scheduler->tick_work); |
| cancel_delayed_work_sync(&scheduler->tock_work); |
| cancel_delayed_work_sync(&scheduler->ping_work); |
| |
| mutex_lock(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); |
| if (scheduler->active_protm_grp) |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, |
| scheduler->active_protm_grp, 0u); |
| scheduler->active_protm_grp = NULL; |
| memset(kbdev->csf.scheduler.csg_slots, 0, |
| num_groups * sizeof(struct kbase_csf_csg_slot)); |
| bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups); |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| scheduler->top_ctx = NULL; |
| scheduler->top_grp = NULL; |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, |
| scheduler->num_active_address_spaces | |
| (((u64)scheduler->total_runnable_grps) << 32)); |
| |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| void kbase_csf_scheduler_reset(struct kbase_device *kbdev) |
| { |
| struct kbase_context *kctx; |
| |
| WARN_ON(!kbase_reset_gpu_is_active(kbdev)); |
| |
| KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u); |
| |
| if (scheduler_handle_reset_in_protected_mode(kbdev) && |
| !suspend_active_queue_groups_on_reset(kbdev)) { |
| /* As all groups have been successfully evicted from the CSG |
| * slots, clear out thee scheduler data fields and return |
| */ |
| scheduler_inner_reset(kbdev); |
| return; |
| } |
| |
| mutex_lock(&kbdev->kctx_list_lock); |
| |
| /* The loop to iterate over the kbase contexts is present due to lock |
| * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock. |
| * CSF ioctls first take kctx->csf.lock which is context-specific and |
| * then take kbdev->csf.scheduler.lock for global actions like assigning |
| * a CSG slot. |
| * If the lock ordering constraint was not there then could have |
| * directly looped over the active queue groups. |
| */ |
| list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { |
| /* Firmware reload would reinitialize the CSG & CS interface IO |
| * pages, so just need to internally mark the currently active |
| * queue groups as terminated (similar to the unexpected OoM |
| * event case). |
| * No further work can now get executed for the active groups |
| * (new groups would have to be created to execute work) and |
| * in near future Clients would be duly informed of this |
| * reset. The resources (like User IO pages, GPU queue memory) |
| * allocated for the associated queues would be freed when the |
| * Clients do the teardown when they become aware of the reset. |
| */ |
| kbase_csf_active_queue_groups_reset(kbdev, kctx); |
| } |
| |
| mutex_unlock(&kbdev->kctx_list_lock); |
| |
| /* After queue groups reset, the scheduler data fields clear out */ |
| scheduler_inner_reset(kbdev); |
| } |
| |
| static void firmware_aliveness_monitor(struct work_struct *work) |
| { |
| struct kbase_device *kbdev = container_of(work, struct kbase_device, |
| csf.scheduler.ping_work.work); |
| int err; |
| |
| /* Ensure that reset will not be occurring while this function is being |
| * executed as otherwise calling kbase_reset_gpu when reset is already |
| * occurring is a programming error. |
| * |
| * We must use the 'try' variant as the Reset worker can try to flush |
| * this workqueue, which would otherwise deadlock here if we tried to |
| * wait for the reset (and thus ourselves) to complete. |
| */ |
| err = kbase_reset_gpu_try_prevent(kbdev); |
| if (err) { |
| /* It doesn't matter whether the value was -EAGAIN or a fatal |
| * error, just stop processing. In case of -EAGAIN, the Reset |
| * worker will restart the scheduler later to resume ping |
| */ |
| return; |
| } |
| |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| |
| #ifdef CONFIG_MALI_DEBUG |
| if (fw_debug) { |
| /* ping requests cause distraction in firmware debugging */ |
| goto exit; |
| } |
| #endif |
| |
| if (kbdev->csf.scheduler.state == SCHED_SUSPENDED) |
| goto exit; |
| |
| if (get_nr_active_csgs(kbdev) != 1) |
| goto exit; |
| |
| if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) |
| goto exit; |
| |
| if (kbase_pm_context_active_handle_suspend(kbdev, |
| KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { |
| /* Suspend pending - no real need to ping */ |
| goto exit; |
| } |
| |
| kbase_pm_wait_for_desired_state(kbdev); |
| |
| err = kbase_csf_firmware_ping_wait(kbdev); |
| |
| if (err) { |
| /* It is acceptable to enqueue a reset whilst we've prevented |
| * them, it will happen after we've allowed them again |
| */ |
| if (kbase_prepare_to_reset_gpu( |
| kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) |
| kbase_reset_gpu(kbdev); |
| } else if (get_nr_active_csgs(kbdev) == 1) { |
| queue_delayed_work(system_long_wq, |
| &kbdev->csf.scheduler.ping_work, |
| msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); |
| } |
| |
| kbase_pm_context_idle(kbdev); |
| exit: |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| kbase_reset_gpu_allow(kbdev); |
| return; |
| } |
| |
| int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, |
| struct kbase_suspend_copy_buffer *sus_buf) |
| { |
| struct kbase_context *const kctx = group->kctx; |
| struct kbase_device *const kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| int err = 0; |
| |
| kbase_reset_gpu_assert_prevented(kbdev); |
| lockdep_assert_held(&kctx->csf.lock); |
| mutex_lock(&scheduler->lock); |
| |
| if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { |
| DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; |
| |
| set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask); |
| |
| if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) |
| suspend_queue_group(group); |
| err = wait_csg_slots_suspend(kbdev, slot_mask, |
| kbdev->csf.fw_timeout_ms); |
| if (err) { |
| dev_warn(kbdev->dev, "Timed out waiting for the group %d to suspend on slot %d", |
| group->handle, group->csg_nr); |
| goto exit; |
| } |
| } |
| |
| if (queue_group_suspended_locked(group)) { |
| unsigned int target_page_nr = 0, i = 0; |
| u64 offset = sus_buf->offset; |
| size_t to_copy = sus_buf->size; |
| |
| if (scheduler->state != SCHED_SUSPENDED) { |
| /* Similar to the case of HW counters, need to flush |
| * the GPU cache before reading from the suspend buffer |
| * pages as they are mapped and cached on GPU side. |
| */ |
| kbase_gpu_start_cache_clean(kbdev); |
| kbase_gpu_wait_cache_clean(kbdev); |
| } else { |
| /* Make sure power down transitions have completed, |
| * i.e. L2 has been powered off as that would ensure |
| * its contents are flushed to memory. |
| * This is needed as Scheduler doesn't wait for the |
| * power down to finish. |
| */ |
| kbase_pm_wait_for_desired_state(kbdev); |
| } |
| |
| for (i = 0; i < PFN_UP(sus_buf->size) && |
| target_page_nr < sus_buf->nr_pages; i++) { |
| struct page *pg = |
| as_page(group->normal_suspend_buf.phy[i]); |
| void *sus_page = kmap(pg); |
| |
| if (sus_page) { |
| kbase_sync_single_for_cpu(kbdev, |
| kbase_dma_addr(pg), |
| PAGE_SIZE, DMA_BIDIRECTIONAL); |
| |
| err = kbase_mem_copy_to_pinned_user_pages( |
| sus_buf->pages, sus_page, |
| &to_copy, sus_buf->nr_pages, |
| &target_page_nr, offset); |
| kunmap(pg); |
| if (err) |
| break; |
| } else { |
| err = -ENOMEM; |
| break; |
| } |
| } |
| schedule_in_cycle(group, false); |
| } else { |
| /* If addr-space fault, the group may have been evicted */ |
| err = -EIO; |
| } |
| |
| exit: |
| mutex_unlock(&scheduler->lock); |
| return err; |
| } |
| |
| KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf); |
| |
| /** |
| * group_sync_updated() - Evaluate sync wait condition of all blocked command |
| * queues of the group. |
| * |
| * @group: Pointer to the command queue group that has blocked command queue(s) |
| * bound to it. |
| * |
| * Return: true if sync wait condition is satisfied for at least one blocked |
| * queue of the group. |
| */ |
| static bool group_sync_updated(struct kbase_queue_group *group) |
| { |
| bool updated = false; |
| int stream; |
| |
| /* Groups can also be blocked on-slot during protected mode. */ |
| WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && |
| group->run_state != KBASE_CSF_GROUP_IDLE); |
| |
| for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { |
| struct kbase_queue *const queue = group->bound_queues[stream]; |
| |
| /* To check the necessity of sync-wait evaluation, |
| * we rely on the cached 'status_wait' instead of reading it |
| * directly from shared memory as the CSG has been already |
| * evicted from the CSG slot, thus this CSG doesn't have |
| * valid information in the shared memory. |
| */ |
| if (queue && queue->enabled && |
| CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) |
| if (evaluate_sync_update(queue)) { |
| updated = true; |
| queue->status_wait = 0; |
| } |
| } |
| |
| return updated; |
| } |
| |
| /** |
| * scheduler_get_protm_enter_async_group() - Check if the GPU queue group |
| * can be now allowed to execute in protected mode. |
| * |
| * @kbdev: Pointer to the GPU device. |
| * @group: Pointer to the GPU queue group. |
| * |
| * This function is called outside the scheduling tick/tock to determine |
| * if the given GPU queue group can now execute in protected mode or not. |
| * If the group pointer passed is NULL then the evaluation is done for the |
| * highest priority group on the scheduler maintained group lists without |
| * tick associated rotation actions. This is referred as the 'top-group' |
| * in a tock action sense. |
| * |
| * It returns the same group pointer, that was passed as an argument, if that |
| * group matches the highest priority group and has pending protected region |
| * requests otherwise NULL is returned. |
| * |
| * If the group pointer passed is NULL then the internal evaluated highest |
| * priority group is returned if that has pending protected region requests |
| * otherwise NULL is returned. |
| * |
| * The evaluated highest priority group may not necessarily be the same as the |
| * scheduler->top_grp. This can happen if there is dynamic de-idle update |
| * during the tick interval for some on-slots groups that were idle during the |
| * scheduler normal scheduling action, where the scheduler->top_grp was set. |
| * The recorded scheduler->top_grp is untouched by this evualuation, so will not |
| * affect the scheduler context/priority list rotation arrangement. |
| * |
| * Return: the pointer to queue group that can currently execute in protected |
| * mode or NULL. |
| */ |
| static struct kbase_queue_group *scheduler_get_protm_enter_async_group( |
| struct kbase_device *const kbdev, |
| struct kbase_queue_group *const group) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *match_grp, *input_grp; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| if (scheduler->state != SCHED_INACTIVE) |
| return NULL; |
| |
| match_grp = get_tock_top_group(scheduler); |
| input_grp = group ? group : match_grp; |
| |
| if (input_grp && (input_grp == match_grp)) { |
| struct kbase_csf_cmd_stream_group_info *ginfo = |
| &kbdev->csf.global_iface.groups[0]; |
| unsigned long *pending = |
| input_grp->protm_pending_bitmap; |
| unsigned long flags; |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| |
| if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || |
| bitmap_empty(pending, ginfo->stream_num)) |
| input_grp = NULL; |
| |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| } else { |
| input_grp = NULL; |
| } |
| |
| return input_grp; |
| } |
| |
| void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) |
| { |
| struct kbase_device *const kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| int err = kbase_reset_gpu_try_prevent(kbdev); |
| /* Regardless of whether reset failed or is currently happening, exit |
| * early |
| */ |
| if (err) |
| return; |
| |
| mutex_lock(&scheduler->lock); |
| |
| /* Check if the group is now eligible for execution in protected mode. */ |
| if (scheduler_get_protm_enter_async_group(kbdev, group)) |
| scheduler_group_check_protm_enter(kbdev, group); |
| |
| mutex_unlock(&scheduler->lock); |
| kbase_reset_gpu_allow(kbdev); |
| } |
| |
| /** |
| * check_sync_update_for_idle_group_protm() - Check the sync wait condition |
| * for all the queues bound to |
| * the given group. |
| * |
| * @group: Pointer to the group that requires evaluation. |
| * |
| * This function is called if the GPU is in protected mode and there are on |
| * slot idle groups with higher priority than the active protected mode group. |
| * This function will evaluate the sync condition, if any, of all the queues |
| * bound to the given group. |
| * |
| * Return true if the sync condition of at least one queue has been satisfied. |
| */ |
| static bool check_sync_update_for_idle_group_protm( |
| struct kbase_queue_group *group) |
| { |
| struct kbase_device *const kbdev = group->kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = |
| &kbdev->csf.scheduler; |
| bool sync_update_done = false; |
| int i; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { |
| struct kbase_queue *queue = group->bound_queues[i]; |
| |
| if (queue && queue->enabled && !sync_update_done) { |
| struct kbase_csf_cmd_stream_group_info *const ginfo = |
| &kbdev->csf.global_iface.groups[group->csg_nr]; |
| struct kbase_csf_cmd_stream_info *const stream = |
| &ginfo->streams[queue->csi_index]; |
| u32 status = kbase_csf_firmware_cs_output( |
| stream, CS_STATUS_WAIT); |
| unsigned long flags; |
| |
| KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, |
| queue->group, queue, status); |
| |
| if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) |
| continue; |
| |
| /* Save the information of sync object of the command |
| * queue so the callback function, 'group_sync_updated' |
| * can evaluate the sync object when it gets updated |
| * later. |
| */ |
| queue->status_wait = status; |
| queue->sync_ptr = kbase_csf_firmware_cs_output( |
| stream, CS_STATUS_WAIT_SYNC_POINTER_LO); |
| queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( |
| stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; |
| queue->sync_value = kbase_csf_firmware_cs_output( |
| stream, CS_STATUS_WAIT_SYNC_VALUE); |
| queue->blocked_reason = |
| CS_STATUS_BLOCKED_REASON_REASON_GET( |
| kbase_csf_firmware_cs_output( |
| stream, |
| CS_STATUS_BLOCKED_REASON)); |
| |
| if (!evaluate_sync_update(queue)) |
| continue; |
| |
| /* Update csg_slots_idle_mask and group's run_state */ |
| if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) { |
| /* Only clear the group's idle flag if it has been dealt |
| * with by the scheduler's tick/tock action, otherwise |
| * leave it untouched. |
| */ |
| spin_lock_irqsave(&scheduler->interrupt_lock, |
| flags); |
| clear_bit((unsigned int)group->csg_nr, |
| scheduler->csg_slots_idle_mask); |
| KBASE_KTRACE_ADD_CSF_GRP( |
| kbdev, CSG_SLOT_IDLE_CLEAR, group, |
| scheduler->csg_slots_idle_mask[0]); |
| spin_unlock_irqrestore( |
| &scheduler->interrupt_lock, flags); |
| group->run_state = KBASE_CSF_GROUP_RUNNABLE; |
| } |
| |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); |
| sync_update_done = true; |
| } |
| } |
| |
| return sync_update_done; |
| } |
| |
| /** |
| * check_sync_update_for_idle_groups_protm() - Check the sync wait condition |
| * for the idle groups on slot |
| * during protected mode. |
| * |
| * @kbdev: Pointer to the GPU device |
| * |
| * This function checks the gpu queues of all the idle groups on slot during |
| * protected mode that has a higher priority than the active protected mode |
| * group. |
| * |
| * Return true if the sync condition of at least one queue in a group has been |
| * satisfied. |
| */ |
| static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| struct kbase_queue_group *protm_grp; |
| bool exit_protm = false; |
| unsigned long flags; |
| u32 num_groups; |
| u32 i; |
| |
| lockdep_assert_held(&scheduler->lock); |
| |
| spin_lock_irqsave(&scheduler->interrupt_lock, flags); |
| protm_grp = scheduler->active_protm_grp; |
| spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); |
| |
| if (!protm_grp) |
| return exit_protm; |
| |
| num_groups = kbdev->csf.global_iface.group_num; |
| |
| for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { |
| struct kbase_csf_csg_slot *csg_slot = |
| &scheduler->csg_slots[i]; |
| struct kbase_queue_group *group = csg_slot->resident_group; |
| |
| if (group->scan_seq_num < protm_grp->scan_seq_num) { |
| /* If sync update has been performed for the group that |
| * has a higher priority than the protm group, then we |
| * need to exit protected mode. |
| */ |
| if (check_sync_update_for_idle_group_protm(group)) |
| exit_protm = true; |
| } |
| } |
| |
| return exit_protm; |
| } |
| |
| /** |
| * check_group_sync_update_worker() - Check the sync wait condition for all the |
| * blocked queue groups |
| * |
| * @work: Pointer to the context-specific work item for evaluating the wait |
| * condition for all the queue groups in idle_wait_groups list. |
| * |
| * This function checks the gpu queues of all the groups present in both |
| * idle_wait_groups list of a context and all on slot idle groups (if GPU |
| * is in protected mode). |
| * If the sync wait condition for at least one queue bound to the group has |
| * been satisfied then the group is moved to the per context list of |
| * runnable groups so that Scheduler can consider scheduling the group |
| * in next tick or exit protected mode. |
| */ |
| static void check_group_sync_update_worker(struct work_struct *work) |
| { |
| struct kbase_context *const kctx = container_of(work, |
| struct kbase_context, csf.sched.sync_update_work); |
| struct kbase_device *const kbdev = kctx->kbdev; |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| |
| mutex_lock(&scheduler->lock); |
| |
| KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u); |
| if (kctx->csf.sched.num_idle_wait_grps != 0) { |
| struct kbase_queue_group *group, *temp; |
| |
| list_for_each_entry_safe(group, temp, |
| &kctx->csf.sched.idle_wait_groups, link) { |
| if (group_sync_updated(group)) { |
| /* Move this group back in to the runnable |
| * groups list of the context. |
| */ |
| update_idle_suspended_group_state(group); |
| KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); |
| } |
| } |
| } else { |
| WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups)); |
| } |
| |
| if (check_sync_update_for_idle_groups_protm(kbdev)) |
| scheduler_force_protm_exit(kbdev); |
| KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u); |
| |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| static |
| enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) |
| { |
| struct kbase_context *const kctx = param; |
| |
| KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u); |
| queue_work(kctx->csf.sched.sync_update_wq, |
| &kctx->csf.sched.sync_update_work); |
| |
| return KBASE_CSF_EVENT_CALLBACK_KEEP; |
| } |
| |
| int kbase_csf_scheduler_context_init(struct kbase_context *kctx) |
| { |
| int priority; |
| int err; |
| |
| for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; |
| ++priority) { |
| INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); |
| } |
| |
| kctx->csf.sched.num_runnable_grps = 0; |
| INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); |
| kctx->csf.sched.num_idle_wait_grps = 0; |
| kctx->csf.sched.ngrp_to_schedule = 0; |
| |
| kctx->csf.sched.sync_update_wq = |
| alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", |
| WQ_HIGHPRI); |
| if (!kctx->csf.sched.sync_update_wq) { |
| dev_err(kctx->kbdev->dev, |
| "Failed to initialize scheduler context workqueue"); |
| return -ENOMEM; |
| } |
| |
| INIT_WORK(&kctx->csf.sched.sync_update_work, |
| check_group_sync_update_worker); |
| |
| err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); |
| |
| if (err) { |
| dev_err(kctx->kbdev->dev, |
| "Failed to register a sync update callback"); |
| destroy_workqueue(kctx->csf.sched.sync_update_wq); |
| } |
| |
| return err; |
| } |
| |
| void kbase_csf_scheduler_context_term(struct kbase_context *kctx) |
| { |
| kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); |
| cancel_work_sync(&kctx->csf.sched.sync_update_work); |
| destroy_workqueue(kctx->csf.sched.sync_update_wq); |
| } |
| |
| int kbase_csf_scheduler_init(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| u32 num_groups = kbdev->csf.global_iface.group_num; |
| |
| bitmap_zero(scheduler->csg_inuse_bitmap, num_groups); |
| bitmap_zero(scheduler->csg_slots_idle_mask, num_groups); |
| |
| scheduler->csg_slots = kcalloc(num_groups, |
| sizeof(*scheduler->csg_slots), GFP_KERNEL); |
| if (!scheduler->csg_slots) { |
| dev_err(kbdev->dev, |
| "Failed to allocate memory for csg slot status array\n"); |
| return -ENOMEM; |
| } |
| |
| return 0; |
| } |
| |
| int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| scheduler->timer_enabled = true; |
| |
| scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI); |
| if (!scheduler->wq) { |
| dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); |
| return -ENOMEM; |
| } |
| |
| INIT_WORK(&scheduler->tick_work, schedule_on_tick); |
| INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); |
| |
| INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); |
| BUILD_BUG_ON(CSF_FIRMWARE_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS); |
| |
| mutex_init(&scheduler->lock); |
| spin_lock_init(&scheduler->interrupt_lock); |
| |
| /* Internal lists */ |
| INIT_LIST_HEAD(&scheduler->runnable_kctxs); |
| INIT_LIST_HEAD(&scheduler->groups_to_schedule); |
| INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule); |
| |
| BUILD_BUG_ON(MAX_SUPPORTED_CSGS > |
| (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); |
| bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); |
| scheduler->state = SCHED_SUSPENDED; |
| scheduler->pm_active_count = 0; |
| scheduler->ngrp_to_schedule = 0; |
| scheduler->total_runnable_grps = 0; |
| scheduler->top_ctx = NULL; |
| scheduler->top_grp = NULL; |
| scheduler->last_schedule = 0; |
| scheduler->tock_pending_request = false; |
| scheduler->active_protm_grp = NULL; |
| scheduler->gpu_idle_fw_timer_enabled = false; |
| scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; |
| scheduler_doorbell_init(kbdev); |
| |
| INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); |
| atomic_set(&scheduler->non_idle_offslot_grps, 0); |
| |
| hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| scheduler->tick_timer.function = tick_timer_callback; |
| scheduler->tick_timer_active = false; |
| |
| return 0; |
| } |
| |
| void kbase_csf_scheduler_term(struct kbase_device *kbdev) |
| { |
| if (kbdev->csf.scheduler.csg_slots) { |
| WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); |
| WARN_ON(csgs_active(kbdev)); |
| flush_work(&kbdev->csf.scheduler.gpu_idle_work); |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED)) |
| scheduler_suspend(kbdev); |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); |
| cancel_tick_timer(kbdev); |
| cancel_work_sync(&kbdev->csf.scheduler.tick_work); |
| cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work); |
| mutex_destroy(&kbdev->csf.scheduler.lock); |
| kfree(kbdev->csf.scheduler.csg_slots); |
| kbdev->csf.scheduler.csg_slots = NULL; |
| } |
| } |
| |
| void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) |
| { |
| if (kbdev->csf.scheduler.wq) |
| destroy_workqueue(kbdev->csf.scheduler.wq); |
| } |
| |
| /** |
| * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer. |
| * |
| * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
| * |
| * This function will restart the scheduler tick so that regular scheduling can |
| * be resumed without any explicit trigger (like kicking of GPU queues). This |
| * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the |
| * CSF scheduler lock to already have been held. |
| */ |
| static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| lockdep_assert_held(&kbdev->csf.scheduler.lock); |
| |
| if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) |
| return; |
| |
| WARN_ON((scheduler->state != SCHED_INACTIVE) && |
| (scheduler->state != SCHED_SUSPENDED)); |
| |
| if (scheduler->total_runnable_grps > 0) { |
| enqueue_tick_work(kbdev); |
| dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); |
| } else if (scheduler->state != SCHED_SUSPENDED) { |
| queue_work(system_wq, &scheduler->gpu_idle_work); |
| } |
| } |
| |
| void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) |
| { |
| mutex_lock(&kbdev->csf.scheduler.lock); |
| scheduler_enable_tick_timer_nolock(kbdev); |
| mutex_unlock(&kbdev->csf.scheduler.lock); |
| } |
| |
| bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| bool enabled; |
| |
| mutex_lock(&scheduler->lock); |
| enabled = scheduler_timer_is_enabled_nolock(kbdev); |
| mutex_unlock(&scheduler->lock); |
| |
| return enabled; |
| } |
| |
| void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, |
| bool enable) |
| { |
| struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; |
| bool currently_enabled; |
| |
| mutex_lock(&scheduler->lock); |
| |
| currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); |
| if (currently_enabled && !enable) { |
| scheduler->timer_enabled = false; |
| cancel_tick_timer(kbdev); |
| cancel_delayed_work(&scheduler->tock_work); |
| mutex_unlock(&scheduler->lock); |
| /* The non-sync version to cancel the normal work item is not |
| * available, so need to drop the lock before cancellation. |
| */ |
| cancel_work_sync(&scheduler->tick_work); |
| } else if (!currently_enabled && enable) { |
| scheduler->timer_enabled = true; |
| |
| scheduler_enable_tick_timer_nolock(kbdev); |
| mutex_unlock(&scheduler->lock); |
| } |
| } |
| |
| void kbase_csf_scheduler_kick(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| mutex_lock(&scheduler->lock); |
| |
| if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) |
| goto out; |
| |
| if (scheduler->total_runnable_grps > 0) { |
| enqueue_tick_work(kbdev); |
| dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); |
| } |
| |
| out: |
| mutex_unlock(&scheduler->lock); |
| } |
| |
| void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| /* Cancel any potential queued delayed work(s) */ |
| cancel_work_sync(&scheduler->tick_work); |
| cancel_delayed_work_sync(&scheduler->tock_work); |
| |
| if (kbase_reset_gpu_prevent_and_wait(kbdev)) { |
| dev_warn(kbdev->dev, |
| "Stop PM suspending for failing to prevent gpu reset.\n"); |
| return; |
| } |
| |
| mutex_lock(&scheduler->lock); |
| |
| disable_gpu_idle_fw_timer(kbdev); |
| |
| if (scheduler->state != SCHED_SUSPENDED) { |
| suspend_active_groups_on_powerdown(kbdev, true); |
| dev_info(kbdev->dev, "Scheduler PM suspend"); |
| scheduler_suspend(kbdev); |
| cancel_tick_timer(kbdev); |
| } |
| mutex_unlock(&scheduler->lock); |
| |
| kbase_reset_gpu_allow(kbdev); |
| } |
| KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend); |
| |
| void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev) |
| { |
| struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
| |
| mutex_lock(&scheduler->lock); |
| |
| if (scheduler->total_runnable_grps > 0) { |
| WARN_ON(scheduler->state != SCHED_SUSPENDED); |
| dev_info(kbdev->dev, "Scheduler PM resume"); |
| scheduler_wakeup(kbdev, true); |
| } |
| mutex_unlock(&scheduler->lock); |
| } |
| KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume); |
| |
| void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) |
| { |
| unsigned long flags; |
| u32 prev_count; |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| prev_count = kbdev->csf.scheduler.pm_active_count++; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| |
| /* On 0 => 1, make a pm_ctx_active request */ |
| if (!prev_count) |
| kbase_pm_context_active(kbdev); |
| else |
| WARN_ON(prev_count == U32_MAX); |
| } |
| KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); |
| |
| void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) |
| { |
| unsigned long flags; |
| u32 prev_count; |
| |
| spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
| prev_count = kbdev->csf.scheduler.pm_active_count--; |
| spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
| |
| if (prev_count == 1) |
| kbase_pm_context_idle(kbdev); |
| else |
| WARN_ON(prev_count == 0); |
| } |
| KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); |