blob: 9990ac588b5fef810c08b29b1739d57bf848e384 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <mali_kbase.h>
#include "mali_kbase_config_defaults.h"
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_as_fault_debugfs.h>
#include "mali_kbase_csf.h"
#include <tl/mali_kbase_tracepoints.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <linux/export.h>
#include <csf/mali_kbase_csf_registers.h>
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_hwaccess_time.h>
#include "mali_kbase_csf_tiler_heap.h"
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
/* This decides the upper limit on the waiting time for the Scheduler
* to exit the sleep state. Usually the value of autosuspend_delay is
* expected to be around 100 milli seconds.
*/
#define MAX_AUTO_SUSPEND_DELAY_MS (5000)
/* Maximum number of endpoints which may run tiler jobs. */
#define CSG_TILER_MAX ((u8)1)
/* Maximum dynamic CSG slot priority value */
#define MAX_CSG_SLOT_PRIORITY ((u8)15)
/* CSF scheduler time slice value */
#define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
/*
* CSF scheduler time threshold for converting "tock" requests into "tick" if
* they come too close to the end of a tick interval. This avoids scheduling
* twice in a row.
*/
#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \
CSF_SCHEDULER_TIME_TICK_MS
#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \
msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS)
/* Nanoseconds per millisecond */
#define NS_PER_MS ((u64)1000 * 1000)
/*
* CSF minimum time to reschedule for a new "tock" request. Bursts of "tock"
* requests are not serviced immediately, but shall wait for a minimum time in
* order to reduce load on the CSF scheduler thread.
*/
#define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
/* CS suspended and is idle (empty ring buffer) */
#define CS_IDLE_FLAG (1 << 0)
/* CS suspended and is wait for a CQS condition */
#define CS_WAIT_SYNC_FLAG (1 << 1)
/* A GPU address space slot is reserved for MCU. */
#define NUM_RESERVED_AS_SLOTS (1)
/* Time to wait for completion of PING req before considering MCU as hung */
#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
/* Heap deferral time in ms from a CSG suspend to be included in reclaim scan list. The
* value corresponds to realtime priority CSGs. Other priority are of derived time value
* from this, with the realtime case the highest delay.
*/
#define HEAP_RECLAIM_PRIO_DEFERRAL_MS (1000)
/* Additional heap deferral time in ms if a CSG suspended is in state of WAIT_SYNC */
#define HEAP_RECLAIM_WAIT_SYNC_DEFERRAL_MS (200)
/* Tiler heap reclaim count size for limiting a count run length */
#define HEAP_RECLAIM_COUNT_BATCH_SIZE (HEAP_SHRINKER_BATCH << 6)
/* Tiler heap reclaim scan (free) method size for limiting a scan run length */
#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7)
static int scheduler_group_schedule(struct kbase_queue_group *group);
static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
static
void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
struct kbase_queue_group *const group,
enum kbase_csf_group_state run_state);
static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
struct kbase_device *const kbdev,
struct kbase_queue_group *const group);
static struct kbase_queue_group *get_tock_top_group(
struct kbase_csf_scheduler *const scheduler);
static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
static int suspend_active_queue_groups(struct kbase_device *kbdev,
unsigned long *slot_mask);
static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
bool system_suspend);
static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
/**
* wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
* scheduling tick/tock to complete before the group deschedule.
*
* @group: Pointer to the group that is being descheduled.
*
* This function blocks the descheduling of the group until the dump on fault is
* completed and scheduling tick/tock has completed.
* To deschedule an on slot group CSG termination request would be sent and that
* might time out if the fault had occurred and also potentially affect the state
* being dumped. Moreover the scheduler lock would be held, so the access to debugfs
* files would get blocked.
* Scheduler lock and 'kctx->csf.lock' are released before this function starts
* to wait. When a request sent by the Scheduler to the FW times out, Scheduler
* would also wait for the dumping to complete and release the Scheduler lock
* before the wait. Meanwhile Userspace can try to delete the group, this function
* would ensure that the group doesn't exit the Scheduler until scheduling
* tick/tock has completed. Though very unlikely, group deschedule can be triggered
* from multiple threads around the same time and after the wait Userspace thread
* can win the race and get the group descheduled and free the memory for group
* pointer before the other threads wake up and notice that group has already been
* descheduled. To avoid the freeing in such a case, a sort of refcount is used
* for the group which is incremented & decremented across the wait.
*/
static void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group)
{
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_context *kctx = group->kctx;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&kctx->csf.lock);
lockdep_assert_held(&scheduler->lock);
if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
return;
while ((!kbase_debug_csf_fault_dump_complete(kbdev) || (scheduler->state == SCHED_BUSY)) &&
queue_group_scheduled_locked(group)) {
group->deschedule_deferred_cnt++;
mutex_unlock(&scheduler->lock);
mutex_unlock(&kctx->csf.lock);
kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&kctx->csf.lock);
mutex_lock(&scheduler->lock);
group->deschedule_deferred_cnt--;
}
#endif
}
/**
* schedule_actions_trigger_df() - Notify the client about the fault and
* wait for the dumping to complete.
*
* @kbdev: Pointer to the device
* @kctx: Pointer to the context associated with the CSG slot for which
* the timeout was seen.
* @error: Error code indicating the type of timeout that occurred.
*
* This function notifies the Userspace client waiting for the faults and wait
* for the Client to complete the dumping.
* The function is called only from Scheduling tick/tock when a request sent by
* the Scheduler to FW times out or from the protm event work item of the group
* when the protected mode entry request times out.
* In the latter case there is no wait done as scheduler lock would be released
* immediately. In the former case the function waits and releases the scheduler
* lock before the wait. It has been ensured that the Scheduler view of the groups
* won't change meanwhile, so no group can enter/exit the Scheduler, become
* runnable or go off slot.
*/
static void schedule_actions_trigger_df(struct kbase_device *kbdev, struct kbase_context *kctx,
enum dumpfault_error_type error)
{
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
if (!kbase_debug_csf_fault_notify(kbdev, kctx, error))
return;
if (unlikely(scheduler->state != SCHED_BUSY)) {
WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE);
return;
}
mutex_unlock(&scheduler->lock);
kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&scheduler->lock);
WARN_ON(scheduler->state != SCHED_BUSY);
#endif
}
#ifdef KBASE_PM_RUNTIME
/**
* wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
* sleeping state.
*
* @kbdev: Pointer to the device
*
* This function waits until the Scheduler has exited the sleep state and
* it is called when an on-slot group is terminated or when the suspend
* buffer of an on-slot group needs to be captured.
*
* Return: 0 when the wait is successful, otherwise an error code.
*/
static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
unsigned int sleep_exit_wait_time;
long remaining;
int ret = 0;
lockdep_assert_held(&scheduler->lock);
WARN_ON(scheduler->state != SCHED_SLEEPING);
/* No point in waiting if autosuspend_delay value is negative.
* For the negative value of autosuspend_delay Driver will directly
* go for the suspend of Scheduler, but the autosuspend_delay value
* could have been changed after the sleep was initiated.
*/
if (autosuspend_delay < 0)
return -EINVAL;
if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
/* Usually Scheduler would remain in sleeping state until the
* auto-suspend timer expires and all active CSGs are suspended.
*/
sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
while ((scheduler->state == SCHED_SLEEPING) && !ret) {
mutex_unlock(&scheduler->lock);
remaining = wait_event_timeout(
kbdev->csf.event_wait,
(scheduler->state != SCHED_SLEEPING),
remaining);
mutex_lock(&scheduler->lock);
if (!remaining && (scheduler->state == SCHED_SLEEPING))
ret = -ETIMEDOUT;
}
return ret;
}
/**
* force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
*
* @kbdev: Pointer to the device
*
* This function will force the Scheduler to exit the sleep state by doing the
* wake up of MCU and suspension of on-slot groups. It is called at the time of
* system suspend.
*
* Return: 0 on success.
*/
static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
int ret = 0;
lockdep_assert_held(&scheduler->lock);
WARN_ON(scheduler->state != SCHED_SLEEPING);
WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
kbase_pm_lock(kbdev);
ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
kbase_pm_unlock(kbdev);
if (ret) {
dev_warn(kbdev->dev,
"[%llu] Wait for MCU wake up failed on forced scheduler suspend",
kbase_backend_get_cycle_cnt(kbdev));
goto out;
}
ret = suspend_active_groups_on_powerdown(kbdev, true);
if (ret)
goto out;
kbase_pm_lock(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.gpu_sleep_mode_active = false;
kbdev->pm.backend.gpu_wakeup_override = false;
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
ret = kbase_pm_wait_for_desired_state(kbdev);
kbase_pm_unlock(kbdev);
if (ret) {
dev_warn(kbdev->dev,
"[%llu] Wait for pm state change failed on forced scheduler suspend",
kbase_backend_get_cycle_cnt(kbdev));
goto out;
}
scheduler->state = SCHED_SUSPENDED;
return 0;
out:
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.exit_gpu_sleep_mode = true;
kbdev->pm.backend.gpu_wakeup_override = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_csf_scheduler_invoke_tick(kbdev);
return ret;
}
#endif
/**
* tick_timer_callback() - Callback function for the scheduling tick hrtimer
*
* @timer: Pointer to the scheduling tick hrtimer
*
* This function will enqueue the scheduling tick work item for immediate
* execution, if it has not been queued already.
*
* Return: enum value to indicate that timer should not be restarted.
*/
static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
{
struct kbase_device *kbdev = container_of(timer, struct kbase_device,
csf.scheduler.tick_timer);
kbase_csf_scheduler_tick_advance(kbdev);
return HRTIMER_NORESTART;
}
/**
* start_tick_timer() - Start the scheduling tick hrtimer.
*
* @kbdev: Pointer to the device
*
* This function will start the scheduling tick hrtimer and is supposed to
* be called only from the tick work item function. The tick hrtimer should
* not be active already.
*/
static void start_tick_timer(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
lockdep_assert_held(&scheduler->lock);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
WARN_ON(scheduler->tick_timer_active);
if (likely(!work_pending(&scheduler->tick_work))) {
scheduler->tick_timer_active = true;
hrtimer_start(&scheduler->tick_timer,
HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
HRTIMER_MODE_REL);
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
/**
* cancel_tick_timer() - Cancel the scheduling tick hrtimer
*
* @kbdev: Pointer to the device
*/
static void cancel_tick_timer(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
scheduler->tick_timer_active = false;
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
hrtimer_cancel(&scheduler->tick_timer);
}
/**
* enqueue_tick_work() - Enqueue the scheduling tick work item
*
* @kbdev: Pointer to the device
*
* This function will queue the scheduling tick work item for immediate
* execution. This shall only be called when both the tick hrtimer and tick
* work item are not active/pending.
*/
static void enqueue_tick_work(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
kbase_csf_scheduler_invoke_tick(kbdev);
}
static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
{
WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
lockdep_assert_held(&kbdev->csf.scheduler.lock);
clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
}
static int acquire_doorbell(struct kbase_device *kbdev)
{
int doorbell_nr;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
doorbell_nr = find_first_zero_bit(
kbdev->csf.scheduler.doorbell_inuse_bitmap,
CSF_NUM_DOORBELL);
if (doorbell_nr >= CSF_NUM_DOORBELL)
return KBASEP_USER_DB_NR_INVALID;
set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
return doorbell_nr;
}
static void unassign_user_doorbell_from_group(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
release_doorbell(kbdev, group->doorbell_nr);
group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
}
}
static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev,
struct kbase_queue *queue)
{
lockdep_assert_held(&kbdev->csf.scheduler.lock);
mutex_lock(&kbdev->csf.reg_lock);
if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
/* After this the dummy page would be mapped in */
unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping,
queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
}
mutex_unlock(&kbdev->csf.reg_lock);
}
static void assign_user_doorbell_to_group(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
group->doorbell_nr = acquire_doorbell(kbdev);
}
static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
struct kbase_queue *const queue)
{
lockdep_assert_held(&kbdev->csf.scheduler.lock);
mutex_lock(&kbdev->csf.reg_lock);
/* If bind operation for the queue hasn't completed yet, then the
* CSI can't be programmed for the queue
* (even in stopped state) and so the doorbell also can't be assigned
* to it.
*/
if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) &&
(queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) {
WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID);
queue->doorbell_nr = queue->group->doorbell_nr;
/* After this the real Hw doorbell page would be mapped in */
unmap_mapping_range(
kbdev->csf.db_filp->f_inode->i_mapping,
queue->db_file_offset << PAGE_SHIFT,
PAGE_SIZE, 1);
}
mutex_unlock(&kbdev->csf.reg_lock);
}
static void scheduler_doorbell_init(struct kbase_device *kbdev)
{
int doorbell_nr;
bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap,
CSF_NUM_DOORBELL);
mutex_lock(&kbdev->csf.scheduler.lock);
/* Reserve doorbell 0 for use by kernel driver */
doorbell_nr = acquire_doorbell(kbdev);
mutex_unlock(&kbdev->csf.scheduler.lock);
WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
}
/**
* update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function updates the EXTRACT offset for all queues which groups have
* been assigned a physical slot. These values could be used to detect a
* queue's true idleness status. This is intended to be an additional check
* on top of the GPU idle notification to account for race conditions.
* This function is supposed to be called only when GPU idle notification
* interrupt is received.
*/
static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
/* All CSGs have the same number of CSs */
size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
size_t i;
lockdep_assert_held(&scheduler->interrupt_lock);
/* csg_slots_idle_mask is not used here for the looping, as it could get
* updated concurrently when Scheduler re-evaluates the idle status of
* the CSGs for which idle notification was received previously.
*/
for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
size_t j;
if (WARN_ON(!group))
continue;
for (j = 0; j < max_streams; ++j) {
struct kbase_queue *const queue = group->bound_queues[j];
if (queue && queue->user_io_addr) {
u64 const *const output_addr =
(u64 const *)(queue->user_io_addr + PAGE_SIZE);
queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
}
}
}
}
static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
{
atomic_set(&scheduler->gpu_no_longer_idle, false);
queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
}
void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
int non_idle_offslot_grps;
bool can_suspend_on_idle;
lockdep_assert_held(&scheduler->interrupt_lock);
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
if (!non_idle_offslot_grps) {
if (can_suspend_on_idle) {
/* fast_gpu_idle_handling is protected by the
* interrupt_lock, which would prevent this from being
* updated whilst gpu_idle_worker() is executing.
*/
scheduler->fast_gpu_idle_handling =
(kbdev->csf.gpu_idle_hysteresis_ms == 0) ||
!kbase_csf_scheduler_all_csgs_idle(kbdev);
/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
* finished. It's queued before to reduce the time it takes till execution
* but it'll eventually be blocked by the scheduler->interrupt_lock.
*/
enqueue_gpu_idle_work(scheduler);
/* The extract offsets are unused in fast GPU idle handling */
if (!scheduler->fast_gpu_idle_handling)
update_on_slot_queues_offsets(kbdev);
}
} else {
/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
kbase_csf_scheduler_tick_advance_nolock(kbdev);
}
}
u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
{
u32 nr_active_csgs;
lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
kbdev->csf.global_iface.group_num);
return nr_active_csgs;
}
u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
{
u32 nr_active_csgs;
unsigned long flags;
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
return nr_active_csgs;
}
/**
* csg_slot_in_use - returns true if a queue group has been programmed on a
* given CSG slot.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @slot: Index/number of the CSG slot in question.
*
* Return: the interface is actively engaged flag.
*
* Note: Caller must hold the scheduler lock.
*/
static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot)
{
lockdep_assert_held(&kbdev->csf.scheduler.lock);
return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL);
}
static bool queue_group_suspended_locked(struct kbase_queue_group *group)
{
lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
return (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE ||
group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
}
static bool queue_group_idle_locked(struct kbase_queue_group *group)
{
lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
return (group->run_state == KBASE_CSF_GROUP_IDLE ||
group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
}
static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
{
lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
return (group->run_state == KBASE_CSF_GROUP_IDLE);
}
static bool can_schedule_idle_group(struct kbase_queue_group *group)
{
return (on_slot_group_idle_locked(group) ||
(group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME));
}
static bool queue_group_scheduled(struct kbase_queue_group *group)
{
return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
group->run_state != KBASE_CSF_GROUP_TERMINATED &&
group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
}
static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
{
lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
return queue_group_scheduled(group);
}
/**
* scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
*
* @kbdev: Pointer to the GPU device
*
* This function waits for the GPU to exit protected mode which is confirmed
* when active_protm_grp is set to NULL.
*
* Return: true on success, false otherwise.
*/
static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
long remaining;
bool success = true;
lockdep_assert_held(&scheduler->lock);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt));
remaining = wait_event_timeout(kbdev->csf.event_wait,
!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
if (unlikely(!remaining)) {
struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp;
struct kbase_context *kctx = group ? group->kctx : NULL;
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
kbase_backend_get_cycle_cnt(kbdev),
kbdev->csf.fw_timeout_ms);
schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT);
success = false;
}
KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining));
return success;
}
/**
* scheduler_force_protm_exit() - Force GPU to exit protected mode.
*
* @kbdev: Pointer to the GPU device
*
* This function sends a ping request to the firmware and waits for the GPU
* to exit protected mode.
*
* If the GPU does not exit protected mode, it is considered as hang.
* A GPU reset would then be triggered.
*/
static void scheduler_force_protm_exit(struct kbase_device *kbdev)
{
unsigned long flags;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
kbase_csf_firmware_ping(kbdev);
if (scheduler_protm_wait_quit(kbdev))
return;
dev_err(kbdev->dev, "Possible GPU hang in Protected mode");
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
if (kbdev->csf.scheduler.active_protm_grp) {
dev_err(kbdev->dev,
"Group-%d of context %d_%d ran in protected mode for too long on slot %d",
kbdev->csf.scheduler.active_protm_grp->handle,
kbdev->csf.scheduler.active_protm_grp->kctx->tgid,
kbdev->csf.scheduler.active_protm_grp->kctx->id,
kbdev->csf.scheduler.active_protm_grp->csg_nr);
}
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
/* The GPU could be stuck in Protected mode. To prevent a hang,
* a GPU reset is performed.
*/
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
}
/**
* scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
* automatically for periodic tasks.
*
* @kbdev: Pointer to the device
*
* This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the
* CSF scheduler lock to already have been held.
*
* Return: true if the scheduler is configured to wake up periodically
*/
static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->csf.scheduler.lock);
return kbdev->csf.scheduler.timer_enabled;
}
/**
* scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
* Scheduler
*
* @kbdev: Pointer to the device
* @suspend_handler: Handler code for how to handle a suspend that might occur.
*
* This function is usually called when Scheduler needs to be activated.
* The PM reference count is acquired for the Scheduler and the power on
* of GPU is initiated.
*
* Return: 0 if successful or a negative error code on failure.
*/
static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
enum kbase_pm_suspend_handler suspend_handler)
{
unsigned long flags;
u32 prev_count;
int ret = 0;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
prev_count = kbdev->csf.scheduler.pm_active_count;
if (!WARN_ON(prev_count == U32_MAX))
kbdev->csf.scheduler.pm_active_count++;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* On 0 => 1, make a pm_ctx_active request */
if (!prev_count) {
ret = kbase_pm_context_active_handle_suspend(kbdev,
suspend_handler);
/* Invoke the PM state machines again as the change in MCU
* desired status, due to the update of scheduler.pm_active_count,
* may be missed by the thread that called pm_wait_for_desired_state()
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (ret)
kbdev->csf.scheduler.pm_active_count--;
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
return ret;
}
#ifdef KBASE_PM_RUNTIME
/**
* scheduler_pm_active_after_sleep() - Acquire the PM reference count for
* Scheduler
*
* @kbdev: Pointer to the device
* @flags: flags containing previous interrupt state
*
* This function is called when Scheduler needs to be activated from the
* sleeping state.
* The PM reference count is acquired for the Scheduler and the wake up of
* MCU is initiated. It resets the flag that indicates to the MCU state
* machine that MCU needs to be put in sleep state.
*
* Note: This function shall be called with hwaccess lock held and it will
* release that lock.
*
* Return: zero when the PM reference was taken and non-zero when the
* system is being suspending/suspended.
*/
static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
unsigned long flags)
{
u32 prev_count;
int ret = 0;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
lockdep_assert_held(&kbdev->hwaccess_lock);
prev_count = kbdev->csf.scheduler.pm_active_count;
if (!WARN_ON(prev_count == U32_MAX))
kbdev->csf.scheduler.pm_active_count++;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* On 0 => 1, make a pm_ctx_active request */
if (!prev_count) {
ret = kbase_pm_context_active_handle_suspend(kbdev,
KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (ret)
kbdev->csf.scheduler.pm_active_count--;
else
kbdev->pm.backend.gpu_sleep_mode_active = false;
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
return ret;
}
#endif
/**
* scheduler_pm_idle() - Release the PM reference count held by Scheduler
*
* @kbdev: Pointer to the device
*
* This function is usually called after Scheduler is suspended.
* The PM reference count held by the Scheduler is released to trigger the
* power down of GPU.
*/
static void scheduler_pm_idle(struct kbase_device *kbdev)
{
unsigned long flags;
u32 prev_count;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
prev_count = kbdev->csf.scheduler.pm_active_count;
if (!WARN_ON(prev_count == 0))
kbdev->csf.scheduler.pm_active_count--;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (prev_count == 1) {
kbase_pm_context_idle(kbdev);
/* Invoke the PM state machines again as the change in MCU
* desired status, due to the update of scheduler.pm_active_count,
* may be missed by the thread that called pm_wait_for_desired_state()
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
#ifdef KBASE_PM_RUNTIME
/**
* scheduler_pm_idle_before_sleep() - Release the PM reference count and
* trigger the tranistion to sleep state.
*
* @kbdev: Pointer to the device
*
* This function is called on the GPU idle notification. It releases the
* Scheduler's PM reference count and sets the flag to indicate to the
* MCU state machine that MCU needs to be put in sleep state.
*/
static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
{
unsigned long flags;
u32 prev_count;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
prev_count = kbdev->csf.scheduler.pm_active_count;
if (!WARN_ON(prev_count == 0))
kbdev->csf.scheduler.pm_active_count--;
kbdev->pm.backend.gpu_sleep_mode_active = true;
kbdev->pm.backend.exit_gpu_sleep_mode = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (prev_count == 1) {
kbase_pm_context_idle(kbdev);
/* Invoke the PM state machines again as the change in MCU
* desired status, due to the update of scheduler.pm_active_count,
* may be missed by the thread that called pm_wait_for_desired_state()
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
#endif
static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
int ret;
lockdep_assert_held(&scheduler->lock);
if ((scheduler->state != SCHED_SUSPENDED) &&
(scheduler->state != SCHED_SLEEPING))
return;
if (scheduler->state == SCHED_SUSPENDED) {
dev_dbg(kbdev->dev,
"Re-activating the Scheduler after suspend");
ret = scheduler_pm_active_handle_suspend(kbdev,
KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
} else {
#ifdef KBASE_PM_RUNTIME
unsigned long flags;
dev_dbg(kbdev->dev,
"Re-activating the Scheduler out of sleep");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
ret = scheduler_pm_active_after_sleep(kbdev, flags);
/* hwaccess_lock is released in the previous function call. */
#endif
}
if (ret) {
/* GPUCORE-29850 would add the handling for the case where
* Scheduler could not be activated due to system suspend.
*/
dev_info(kbdev->dev,
"Couldn't wakeup Scheduler due to system suspend");
return;
}
scheduler->state = SCHED_INACTIVE;
if (kick)
scheduler_enable_tick_timer_nolock(kbdev);
}
static void scheduler_suspend(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
dev_dbg(kbdev->dev, "Suspending the Scheduler");
scheduler_pm_idle(kbdev);
scheduler->state = SCHED_SUSPENDED;
}
}
/**
* update_idle_suspended_group_state() - Move the queue group to a non-idle
* suspended state.
* @group: Pointer to the queue group.
*
* This function is called to change the state of queue group to non-idle
* suspended state, if the group was suspended when all the queues bound to it
* became empty or when some queues got blocked on a sync wait & others became
* empty. The group is also moved to the runnable list from idle wait list in
* the latter case.
* So the function gets called when a queue is kicked or sync wait condition
* gets satisfied.
*/
static void update_idle_suspended_group_state(struct kbase_queue_group *group)
{
struct kbase_csf_scheduler *scheduler =
&group->kctx->kbdev->csf.scheduler;
int new_val;
lockdep_assert_held(&scheduler->lock);
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) {
remove_group_from_idle_wait(group);
insert_group_to_runnable(scheduler, group,
KBASE_CSF_GROUP_SUSPENDED);
} else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
/* If scheduler is not suspended and the given group's
* static priority (reflected by the scan_seq_num) is inside
* the current tick slot-range, or there are some on_slot
* idle groups, schedule an async tock.
*/
if (scheduler->state != SCHED_SUSPENDED) {
unsigned long flags;
int n_idle;
int n_used;
int n_slots =
group->kctx->kbdev->csf.global_iface.group_num;
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
n_slots);
n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
n_slots);
spin_unlock_irqrestore(&scheduler->interrupt_lock,
flags);
if (n_idle ||
n_used < scheduler->num_csg_slots_for_tick ||
group->scan_seq_num <
scheduler->num_csg_slots_for_tick)
schedule_in_cycle(group, true);
}
} else
return;
new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
new_val);
}
int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
{
struct kbase_csf_scheduler *scheduler =
&group->kctx->kbdev->csf.scheduler;
int slot_num = group->csg_nr;
lockdep_assert_held(&scheduler->interrupt_lock);
if (slot_num >= 0) {
if (WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
group))
return -1;
}
return slot_num;
}
int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group)
{
struct kbase_csf_scheduler *scheduler =
&group->kctx->kbdev->csf.scheduler;
unsigned long flags;
int slot_num;
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
return slot_num;
}
/* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
*
* @group: GPU queue group to be checked
*
* This function needs to be called with scheduler's lock held
*
* Return: true if @group is on slot.
*/
static bool kbasep_csf_scheduler_group_is_on_slot_locked(
struct kbase_queue_group *group)
{
struct kbase_csf_scheduler *scheduler =
&group->kctx->kbdev->csf.scheduler;
int slot_num = group->csg_nr;
lockdep_assert_held(&scheduler->lock);
if (slot_num >= 0) {
if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
group))
return true;
}
return false;
}
bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csf_scheduler *scheduler =
&group->kctx->kbdev->csf.scheduler;
int slot_num = group->csg_nr;
lockdep_assert_held(&scheduler->interrupt_lock);
if (WARN_ON(slot_num < 0))
return false;
return test_bit(slot_num, scheduler->csgs_events_enable_mask);
}
struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
struct kbase_device *kbdev, int slot)
{
lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
return kbdev->csf.scheduler.csg_slots[slot].resident_group;
}
static int halt_stream_sync(struct kbase_queue *queue)
{
struct kbase_queue_group *group = queue->group;
struct kbase_device *kbdev = queue->kctx->kbdev;
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
struct kbase_csf_cmd_stream_group_info *ginfo;
struct kbase_csf_cmd_stream_info *stream;
int csi_index = queue->csi_index;
long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
unsigned long flags;
if (WARN_ON(!group) ||
WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
return -EINVAL;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
ginfo = &global_iface->groups[group->csg_nr];
stream = &ginfo->streams[csi_index];
if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) ==
CS_REQ_STATE_START) {
remaining = wait_event_timeout(kbdev->csf.event_wait,
(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
== CS_ACK_STATE_START), remaining);
if (!remaining) {
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
csi_index, group->handle, group->csg_nr);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
return -ETIMEDOUT;
}
remaining =
kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
}
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
/* Set state to STOP */
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
/* Timed wait */
remaining = wait_event_timeout(kbdev->csf.event_wait,
(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
== CS_ACK_STATE_STOP), remaining);
if (!remaining) {
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
queue->csi_index, group->handle, group->csg_nr);
/* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
* will be reset as a work-around.
*/
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
}
return (remaining) ? 0 : -ETIMEDOUT;
}
static bool can_halt_stream(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csf_csg_slot *const csg_slot =
kbdev->csf.scheduler.csg_slots;
unsigned long flags;
bool can_halt;
int slot;
if (!queue_group_scheduled(group))
return true;
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
slot = kbase_csf_scheduler_group_get_slot_locked(group);
can_halt = (slot >= 0) &&
(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
flags);
return can_halt;
}
/**
* sched_halt_stream() - Stop a GPU queue when its queue group is not running
* on a CSG slot.
* @queue: Pointer to the GPU queue to stop.
*
* This function handles stopping gpu queues for groups that are either not on
* a CSG slot or are on the slot but undergoing transition to
* resume or suspend states.
* It waits until the queue group is scheduled on a slot and starts running,
* which is needed as groups that were suspended may need to resume all queues
* that were enabled and running at the time of suspension.
*
* Return: 0 on success, or negative on failure.
*/
static int sched_halt_stream(struct kbase_queue *queue)
{
struct kbase_queue_group *group = queue->group;
struct kbase_device *kbdev = queue->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler =
&kbdev->csf.scheduler;
struct kbase_csf_csg_slot *const csg_slot =
kbdev->csf.scheduler.csg_slots;
bool retry_needed = false;
bool retried = false;
long remaining;
int slot;
int err = 0;
const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
if (WARN_ON(!group))
return -EINVAL;
lockdep_assert_held(&queue->kctx->csf.lock);
lockdep_assert_held(&scheduler->lock);
slot = kbase_csf_scheduler_group_get_slot(group);
if (slot >= 0) {
WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state",
queue->csi_index, group->handle);
retry_needed = true;
}
}
retry:
/* Update the group state so that it can get scheduled soon */
update_idle_suspended_group_state(group);
mutex_unlock(&scheduler->lock);
/* This function is called when the queue group is either not on a CSG
* slot or is on the slot but undergoing transition.
*
* To stop the queue, the function needs to wait either for the queue
* group to be assigned a CSG slot (and that slot has to reach the
* running state) or for the eviction of the queue group from the
* scheduler's list.
*
* In order to evaluate the latter condition, the function doesn't
* really need to lock the scheduler, as any update to the run_state
* of the queue group by sched_evict_group() would be visible due
* to implicit barriers provided by the kernel waitqueue macros.
*
* The group pointer cannot disappear meanwhile, as the high level
* CSF context is locked. Therefore, the scheduler would be
* the only one to update the run_state of the group.
*/
remaining = wait_event_timeout(
kbdev->csf.event_wait, can_halt_stream(kbdev, group),
kbase_csf_timeout_in_jiffies(group_schedule_timeout));
mutex_lock(&scheduler->lock);
if (remaining && queue_group_scheduled_locked(group)) {
slot = kbase_csf_scheduler_group_get_slot(group);
/* If the group is still on slot and slot is in running state
* then explicitly stop the CSI of the
* queue. Otherwise there are different cases to consider
*
* - If the queue group was already undergoing transition to
* resume/start state when this function was entered then it
* would not have disabled the CSI of the
* queue being stopped and the previous wait would have ended
* once the slot was in a running state with CS
* interface still enabled.
* Now the group is going through another transition either
* to a suspend state or to a resume state (it could have
* been suspended before the scheduler lock was grabbed).
* In both scenarios need to wait again for the group to
* come on a slot and that slot to reach the running state,
* as that would guarantee that firmware will observe the
* CSI as disabled.
*
* - If the queue group was either off the slot or was
* undergoing transition to suspend state on entering this
* function, then the group would have been resumed with the
* queue's CSI in disabled state.
* So now if the group is undergoing another transition
* (after the resume) then just need to wait for the state
* bits in the ACK register of CSI to be
* set to STOP value. It is expected that firmware will
* process the stop/disable request of the CS
* interface after resuming the group before it processes
* another state change request of the group.
*/
if ((slot >= 0) &&
(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) {
err = halt_stream_sync(queue);
} else if (retry_needed && !retried) {
retried = true;
goto retry;
} else if (slot >= 0) {
struct kbase_csf_global_iface *global_iface =
&kbdev->csf.global_iface;
struct kbase_csf_cmd_stream_group_info *ginfo =
&global_iface->groups[slot];
struct kbase_csf_cmd_stream_info *stream =
&ginfo->streams[queue->csi_index];
u32 cs_req =
kbase_csf_firmware_cs_input_read(stream, CS_REQ);
if (!WARN_ON(CS_REQ_STATE_GET(cs_req) !=
CS_REQ_STATE_STOP)) {
/* Timed wait */
remaining = wait_event_timeout(
kbdev->csf.event_wait,
(CS_ACK_STATE_GET(
kbase_csf_firmware_cs_output(
stream, CS_ACK)) ==
CS_ACK_STATE_STOP),
kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
if (!remaining) {
dev_warn(kbdev->dev,
"[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
queue->csi_index,
group->handle, group->csg_nr);
err = -ETIMEDOUT;
}
}
}
} else if (!remaining) {
dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
kbase_backend_get_cycle_cnt(kbdev),
group->handle, queue->csi_index,
group_schedule_timeout);
err = -ETIMEDOUT;
}
return err;
}
/**
* scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
* queue needs to be stopped.
*
* @queue: Pointer the GPU command queue
*
* This function is called when the CSI to which GPU queue is bound needs to
* be stopped. For that the corresponding queue group needs to be resident on
* the CSG slot and MCU firmware should be running. So this function makes the
* Scheduler exit the sleeping or suspended state.
*/
static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
scheduler_wakeup(kbdev, true);
/* Wait for MCU firmware to start running */
if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
dev_warn(
kbdev->dev,
"[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
kbase_backend_get_cycle_cnt(kbdev),
queue->csi_index, queue->group->handle,
queue->kctx->tgid, queue->kctx->id,
queue->group->csg_nr);
}
}
int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
struct kbase_queue_group *group = queue->group;
bool const cs_enabled = queue->enabled;
int err = 0;
if (WARN_ON(!group))
return -EINVAL;
kbase_reset_gpu_assert_failed_or_prevented(kbdev);
lockdep_assert_held(&queue->kctx->csf.lock);
mutex_lock(&kbdev->csf.scheduler.lock);
queue->enabled = false;
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled);
if (cs_enabled && queue_group_scheduled_locked(group)) {
struct kbase_csf_csg_slot *const csg_slot =
kbdev->csf.scheduler.csg_slots;
int slot = kbase_csf_scheduler_group_get_slot(group);
/* Since the group needs to be resumed in order to stop the queue,
* check if GPU needs to be powered up.
*/
scheduler_activate_on_queue_stop(queue);
if ((slot >= 0) &&
(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
err = halt_stream_sync(queue);
else
err = sched_halt_stream(queue);
unassign_user_doorbell_from_queue(kbdev, queue);
}
mutex_unlock(&kbdev->csf.scheduler.lock);
return err;
}
static void update_hw_active(struct kbase_queue *queue, bool active)
{
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
if (queue && queue->enabled) {
u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
output_addr[CS_ACTIVE / sizeof(u32)] = active;
}
#else
CSTD_UNUSED(queue);
CSTD_UNUSED(active);
#endif
}
static void program_cs_extract_init(struct kbase_queue *queue)
{
u64 *input_addr = (u64 *)queue->user_io_addr;
u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] =
output_addr[CS_EXTRACT_LO / sizeof(u64)];
}
static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream,
struct kbase_queue *queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
u32 const glb_version = kbdev->csf.global_iface.version;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
/* If cs_trace_command not supported, nothing to program */
if (glb_version < kbase_csf_interface_version(1, 1, 0))
return;
/* Program for cs_trace if enabled. In the current arrangement, it is
* possible for the context to enable the cs_trace after some queues
* has been registered in cs_trace in disabled state. This is tracked by
* the queue's trace buffer base address, which had been validated at the
* queue's register_ex call.
*/
if (kbase_csf_scheduler_queue_has_trace(queue)) {
u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET(
queue->trace_cfg, queue->kctx->as_nr);
kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg);
kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE,
queue->trace_buffer_size);
kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO,
queue->trace_buffer_base & U32_MAX);
kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI,
queue->trace_buffer_base >> 32);
kbase_csf_firmware_cs_input(
stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO,
queue->trace_offset_ptr & U32_MAX);
kbase_csf_firmware_cs_input(
stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI,
queue->trace_offset_ptr >> 32);
} else {
/* Place the configuration to the disabled condition */
kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0);
kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0);
}
}
static void program_cs(struct kbase_device *kbdev,
struct kbase_queue *queue, bool ring_csg_doorbell)
{
struct kbase_queue_group *group = queue->group;
struct kbase_csf_cmd_stream_group_info *ginfo;
struct kbase_csf_cmd_stream_info *stream;
int csi_index = queue->csi_index;
unsigned long flags;
u64 user_input;
u64 user_output;
if (WARN_ON(!group))
return;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
return;
ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
if (WARN_ON(csi_index < 0) ||
WARN_ON(csi_index >= ginfo->stream_num))
return;
assign_user_doorbell_to_queue(kbdev, queue);
if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
return;
WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
if (queue->enabled && queue_group_suspended_locked(group))
program_cs_extract_init(queue);
stream = &ginfo->streams[csi_index];
kbase_csf_firmware_cs_input(stream, CS_BASE_LO,
queue->base_addr & 0xFFFFFFFF);
kbase_csf_firmware_cs_input(stream, CS_BASE_HI,
queue->base_addr >> 32);
kbase_csf_firmware_cs_input(stream, CS_SIZE,
queue->size);
user_input = (queue->reg->start_pfn << PAGE_SHIFT);
kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
user_input & 0xFFFFFFFF);
kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
user_input >> 32);
user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
user_output & 0xFFFFFFFF);
kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
user_output >> 32);
kbase_csf_firmware_cs_input(stream, CS_CONFIG,
(queue->doorbell_nr << 8) | (queue->priority & 0xF));
/* Program the queue's cs_trace configuration */
program_cs_trace_cfg(stream, queue);
/* Enable all interrupts for now */
kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
/* The fault bit could be misaligned between CS_REQ and CS_ACK if the
* acknowledgment was deferred due to dump on fault and the group was
* removed from the CSG slot before the fault could be acknowledged.
*/
if (queue->enabled) {
u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK);
}
/*
* Enable the CSG idle notification once the CS's ringbuffer
* becomes empty or the CS becomes sync_idle, waiting sync update
* or protected mode switch.
*/
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
/* Set state to START/STOP */
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
ring_csg_doorbell);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
update_hw_active(queue, true);
}
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
{
struct kbase_queue_group *group = queue->group;
struct kbase_device *kbdev = queue->kctx->kbdev;
bool const cs_enabled = queue->enabled;
int err = 0;
bool evicted = false;
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&queue->kctx->csf.lock);
if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
return -EINVAL;
mutex_lock(&kbdev->csf.scheduler.lock);
#if IS_ENABLED(CONFIG_DEBUG_FS)
if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) {
mutex_unlock(&kbdev->csf.scheduler.lock);
return -EBUSY;
}
#endif
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
group->run_state);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
queue->status_wait);
if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
err = -EIO;
evicted = true;
} else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
&& CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked",
queue->csi_index, group->handle);
} else {
err = scheduler_group_schedule(group);
if (!err) {
queue->enabled = true;
if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
if (cs_enabled) {
/* In normal situation, when a queue is
* already running, the queue update
* would be a doorbell kick on user
* side. However, if such a kick is
* shortly following a start or resume,
* the queue may actually in transition
* hence the said kick would enter the
* kernel as the hw_active flag is yet
* to be set. The scheduler needs to
* give a kick to the corresponding
* user door-bell on such a case.
*/
kbase_csf_ring_cs_user_doorbell(kbdev, queue);
} else
program_cs(kbdev, queue, true);
}
queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(kbase_get_timeout_ms(
kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
}
}
mutex_unlock(&kbdev->csf.scheduler.lock);
if (evicted)
kbase_csf_term_descheduled_queue_group(group);
return err;
}
static enum kbase_csf_csg_slot_state update_csg_slot_status(
struct kbase_device *kbdev, s8 slot)
{
struct kbase_csf_csg_slot *csg_slot =
&kbdev->csf.scheduler.csg_slots[slot];
struct kbase_csf_cmd_stream_group_info *ginfo =
&kbdev->csf.global_iface.groups[slot];
u32 state;
enum kbase_csf_csg_slot_state slot_state;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
CSG_ACK));
slot_state = atomic_read(&csg_slot->state);
switch (slot_state) {
case CSG_SLOT_READY2RUN:
if ((state == CSG_ACK_STATE_START) ||
(state == CSG_ACK_STATE_RESUME)) {
slot_state = CSG_SLOT_RUNNING;
atomic_set(&csg_slot->state, slot_state);
csg_slot->trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
state);
dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
csg_slot->resident_group->handle, slot);
}
break;
case CSG_SLOT_DOWN2STOP:
if ((state == CSG_ACK_STATE_SUSPEND) ||
(state == CSG_ACK_STATE_TERMINATE)) {
slot_state = CSG_SLOT_STOPPED;
atomic_set(&csg_slot->state, slot_state);
csg_slot->trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state);
dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n",
csg_slot->resident_group->handle, slot);
}
break;
case CSG_SLOT_DOWN2STOP_TIMEDOUT:
case CSG_SLOT_READY2RUN_TIMEDOUT:
case CSG_SLOT_READY:
case CSG_SLOT_RUNNING:
case CSG_SLOT_STOPPED:
break;
default:
dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state);
break;
}
return slot_state;
}
static bool csg_slot_running(struct kbase_device *kbdev, s8 slot)
{
lockdep_assert_held(&kbdev->csf.scheduler.lock);
return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING);
}
static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot)
{
enum kbase_csf_csg_slot_state slot_state;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
slot_state = update_csg_slot_status(kbdev, slot);
return (slot_state == CSG_SLOT_STOPPED ||
slot_state == CSG_SLOT_READY);
}
static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot)
{
struct kbase_csf_cmd_stream_group_info *ginfo =
&kbdev->csf.global_iface.groups[slot];
u32 state;
state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
CSG_ACK));
if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) {
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state);
dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot);
return true;
}
return false;
}
static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
struct kbase_csf_csg_slot *csg_slot =
kbdev->csf.scheduler.csg_slots;
s8 slot;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
return;
slot = group->csg_nr;
/* When in transition, wait for it to complete */
if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
long remaining =
kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot);
remaining = wait_event_timeout(kbdev->csf.event_wait,
csg_slot_running(kbdev, slot), remaining);
if (!remaining)
dev_warn(kbdev->dev,
"[%llu] slot %d timeout (%d ms) on up-running\n",
kbase_backend_get_cycle_cnt(kbdev),
slot, kbdev->csf.fw_timeout_ms);
}
if (csg_slot_running(kbdev, slot)) {
unsigned long flags;
struct kbase_csf_cmd_stream_group_info *ginfo =
&global_iface->groups[slot];
u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
CSG_REQ_STATE_TERMINATE;
dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d",
suspend, group->handle, group->kctx->tgid, group->kctx->id, slot);
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
/* Set state to SUSPEND/TERMINATE */
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd,
CSG_REQ_STATE_MASK);
kbase_csf_ring_csg_doorbell(kbdev, slot);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
flags);
atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
csg_slot[slot].trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
}
}
static void term_csg_slot(struct kbase_queue_group *group)
{
halt_csg_slot(group, false);
}
static void suspend_csg_slot(struct kbase_queue_group *group)
{
halt_csg_slot(group, true);
}
/**
* evaluate_sync_update() - Evaluate the sync wait condition the GPU command
* queue has been blocked on.
*
* @queue: Pointer to the GPU command queue
*
* Return: true if sync wait condition is satisfied.
*/
static bool evaluate_sync_update(struct kbase_queue *queue)
{
struct kbase_vmap_struct *mapping;
bool updated = false;
u32 *sync_ptr;
u32 sync_wait_cond;
u32 sync_current_val;
struct kbase_device *kbdev;
if (WARN_ON(!queue))
return false;
kbdev = queue->kctx->kbdev;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
&mapping);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue,
queue->sync_ptr);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue,
queue->blocked_reason);
if (!sync_ptr) {
dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
queue->sync_ptr);
goto out;
}
sync_wait_cond =
CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
(sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
sync_current_val = READ_ONCE(*sync_ptr);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
sync_current_val);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue,
queue->sync_value);
if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
(sync_current_val > queue->sync_value)) ||
((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
(sync_current_val <= queue->sync_value))) {
/* The sync wait condition is satisfied so the group to which
* queue is bound can be re-scheduled.
*/
updated = true;
} else {
dev_dbg(queue->kctx->kbdev->dev,
"sync memory not updated yet(%u)", sync_current_val);
}
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
out:
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated);
return updated;
}
/**
* save_slot_cs() - Save the state for blocked GPU command queue.
*
* @ginfo: Pointer to the CSG interface used by the group
* the queue is bound to.
* @queue: Pointer to the GPU command queue.
*
* This function will check if GPU command queue is blocked on a sync wait and
* evaluate the wait condition. If the wait condition isn't satisfied it would
* save the state needed to reevaluate the condition in future.
* The group to which queue is bound shall be in idle state.
*
* Return: true if the queue is blocked on a sync wait operation.
*/
static
bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
struct kbase_queue *queue)
{
struct kbase_csf_cmd_stream_info *const stream =
&ginfo->streams[queue->csi_index];
u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
bool is_waiting = false;
#if IS_ENABLED(CONFIG_DEBUG_FS)
u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
queue->saved_cmd_ptr = cmd_ptr;
#endif
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
queue, status);
if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
queue->status_wait = status;
queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
CS_STATUS_WAIT_SYNC_POINTER_LO);
queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream,
CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
queue->sync_value = kbase_csf_firmware_cs_output(stream,
CS_STATUS_WAIT_SYNC_VALUE);
queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
kbase_csf_firmware_cs_output(stream,
CS_STATUS_SCOREBOARDS));
queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET(
kbase_csf_firmware_cs_output(stream,
CS_STATUS_BLOCKED_REASON));
if (!evaluate_sync_update(queue)) {
is_waiting = true;
} else {
/* Sync object already got updated & met the condition
* thus it doesn't need to be reevaluated and so can
* clear the 'status_wait' here.
*/
queue->status_wait = 0;
}
} else {
/* Invalidate wait status info that would have been recorded if
* this queue was blocked when the group (in idle state) was
* suspended previously. After that the group could have been
* unblocked due to the kicking of another queue bound to it &
* so the wait status info would have stuck with this queue.
*/
queue->status_wait = 0;
}
return is_waiting;
}
static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
/* Only try to schedule work for this event if no requests are pending,
* otherwise the function will end up canceling previous work requests,
* and scheduler is configured to wake up periodically (or the schedule
* of work needs to be enforced in situation such as entering into
* protected mode).
*/
if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
dev_dbg(kbdev->dev, "Kicking async for group %d\n",
group->handle);
kbase_csf_scheduler_invoke_tock(kbdev);
}
}
static
void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
struct kbase_queue_group *const group,
enum kbase_csf_group_state run_state)
{
struct kbase_context *const kctx = group->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
lockdep_assert_held(&scheduler->lock);
WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
return;
group->run_state = run_state;
if (run_state == KBASE_CSF_GROUP_RUNNABLE)
group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
list_add_tail(&group->link,
&kctx->csf.sched.runnable_groups[group->priority]);
kctx->csf.sched.num_runnable_grps++;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group,
kctx->csf.sched.num_runnable_grps);
/* Add the kctx if not yet in runnable kctxs */
if (kctx->csf.sched.num_runnable_grps == 1) {
/* First runnable csg, adds to the runnable_kctxs */
INIT_LIST_HEAD(&kctx->csf.link);
list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u);
}
scheduler->total_runnable_grps++;
if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
(scheduler->total_runnable_grps == 1 ||
scheduler->state == SCHED_SUSPENDED ||
scheduler->state == SCHED_SLEEPING)) {
dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
/* Fire a scheduling to start the time-slice */
enqueue_tick_work(kbdev);
} else
schedule_in_cycle(group, false);
/* Since a new group has become runnable, check if GPU needs to be
* powered up.
*/
scheduler_wakeup(kbdev, false);
}
static
void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
struct kbase_queue_group *group,
enum kbase_csf_group_state run_state)
{
struct kbase_context *kctx = group->kctx;
struct kbase_queue_group *new_head_grp;
struct list_head *list =
&kctx->csf.sched.runnable_groups[group->priority];
unsigned long flags;
lockdep_assert_held(&scheduler->lock);
WARN_ON(!queue_group_scheduled_locked(group));
group->run_state = run_state;
list_del_init(&group->link);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
/* The below condition will be true when the group running in protected
* mode is being terminated but the protected mode exit interrupt wasn't
* received. This can happen if the FW got stuck during protected mode
* for some reason (like GPU page fault or some internal error).
* In normal cases FW is expected to send the protected mode exit
* interrupt before it handles the CSG termination request.
*/
if (unlikely(scheduler->active_protm_grp == group)) {
/* CSG slot cleanup should have happened for the pmode group */
WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
/* Initiate a GPU reset, in case it wasn't initiated yet,
* in order to rectify the anomaly.
*/
if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kctx->kbdev);
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT,
scheduler->active_protm_grp, 0u);
scheduler->active_protm_grp = NULL;
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
if (scheduler->top_grp == group) {
/*
* Note: this disables explicit rotation in the next scheduling
* cycle. However, removing the top_grp is the same as an
* implicit rotation (e.g. if we instead rotated the top_ctx
* and then remove top_grp)
*
* This implicit rotation is assumed by the scheduler rotate
* functions.
*/
scheduler->top_grp = NULL;
/*
* Trigger a scheduling tock for a CSG containing protected
* content in case there has been any in order to minimize
* latency.
*/
group = scheduler_get_protm_enter_async_group(kctx->kbdev,
NULL);
if (group)
schedule_in_cycle(group, true);
}
kctx->csf.sched.num_runnable_grps--;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group,
kctx->csf.sched.num_runnable_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
if (kctx->csf.sched.num_runnable_grps == 0) {
struct kbase_context *new_head_kctx;
struct list_head *kctx_list = &scheduler->runnable_kctxs;
/* drop the kctx */
list_del_init(&kctx->csf.link);
if (scheduler->top_ctx == kctx)
scheduler->top_ctx = NULL;
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u);
new_head_kctx = (!list_empty(kctx_list)) ?
list_first_entry(kctx_list, struct kbase_context, csf.link) :
NULL;
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u);
}
WARN_ON(scheduler->total_runnable_grps == 0);
scheduler->total_runnable_grps--;
if (!scheduler->total_runnable_grps) {
dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
cancel_tick_timer(kctx->kbdev);
WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
if (scheduler->state != SCHED_SUSPENDED)
enqueue_gpu_idle_work(scheduler);
}
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
scheduler->num_active_address_spaces |
(((u64)scheduler->total_runnable_grps) << 32));
}
static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
{
struct kbase_context *kctx = group->kctx;
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE);
list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
kctx->csf.sched.num_idle_wait_grps++;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
kctx->csf.sched.num_idle_wait_grps);
group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
dev_dbg(kctx->kbdev->dev,
"Group-%d suspended on sync_wait, total wait_groups: %u\n",
group->handle, kctx->csf.sched.num_idle_wait_grps);
}
static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
{
struct kbase_context *kctx = group->kctx;
struct list_head *list = &kctx->csf.sched.idle_wait_groups;
struct kbase_queue_group *new_head_grp;
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
list_del_init(&group->link);
WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
kctx->csf.sched.num_idle_wait_grps--;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group,
kctx->csf.sched.num_idle_wait_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
}
static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
struct kbase_queue_group *group)
{
lockdep_assert_held(&scheduler->lock);
if (WARN_ON(!group))
return;
remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE);
insert_group_to_idle_wait(group);
}
static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
WARN_ON(group->csg_nr < 0);
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
static void update_offslot_non_idle_cnt_on_grp_suspend(
struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
if (scheduler->state == SCHED_BUSY) {
/* active phase or, async entering the protected mode */
if (group->prepared_seq_num >=
scheduler->non_idle_scanout_grps) {
/* At scanout, it was tagged as on-slot idle */
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC,
group, new_val);
}
} else {
if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC,
group, new_val);
}
}
} else {
/* async phases */
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
new_val);
}
}
}
static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
{
bool cs_empty;
bool cs_idle;
u32 sb_status = 0;
struct kbase_device const *const kbdev = queue->group->kctx->kbdev;
struct kbase_csf_global_iface const *const iface =
&kbdev->csf.global_iface;
u32 glb_version = iface->version;
u64 const *input_addr = (u64 const *)queue->user_io_addr;
u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
struct kbase_csf_cmd_stream_group_info const *const ginfo =
&kbdev->csf.global_iface.groups[queue->group->csg_nr];
struct kbase_csf_cmd_stream_info const *const stream =
&ginfo->streams[queue->csi_index];
sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
kbase_csf_firmware_cs_output(stream,
CS_STATUS_SCOREBOARDS));
}
cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
output_addr[CS_EXTRACT_LO / sizeof(u64)]);
cs_idle = cs_empty && (!sb_status);
return cs_idle;
}
static void detach_from_sched_reclaim_mgr(struct kbase_context *kctx)
{
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
lockdep_assert_held(&scheduler->lock);
if (!list_empty(&heap_info->mgr_link)) {
WARN_ON(!heap_info->flags);
list_del_init(&heap_info->mgr_link);
if (heap_info->flags & CSF_CTX_RECLAIM_CANDI_FLAG)
WARN_ON(atomic_sub_return(heap_info->nr_est_pages,
&scheduler->reclaim_mgr.est_cand_pages) < 0);
if (heap_info->flags & CSF_CTX_RECLAIM_SCAN_FLAG)
WARN_ON(atomic_sub_return(heap_info->nr_scan_pages,
&scheduler->reclaim_mgr.mgr_scan_pages) < 0);
dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_detach: ctx_%d_%d, flags = 0x%x\n",
kctx->tgid, kctx->id, heap_info->flags);
/* Clear on detaching */
heap_info->nr_est_pages = 0;
heap_info->nr_scan_pages = 0;
heap_info->flags = 0;
}
}
static void attach_to_sched_reclaim_mgr(struct kbase_context *kctx)
{
struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info;
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
if (WARN_ON(!list_empty(&heap_info->mgr_link)))
list_del_init(&heap_info->mgr_link);
list_add_tail(&heap_info->mgr_link, &scheduler->reclaim_mgr.candidate_ctxs);
/* Read the kctx's tiler heap estimate of pages, this separates it away
* from the kctx's tiler heap side updates/changes. The value remains static
* for the duration of this kctx on the reclaim manager's candidate_ctxs list.
*/
heap_info->nr_est_pages = (u32)atomic_read(&kctx->csf.tiler_heaps.est_count_pages);
atomic_add(heap_info->nr_est_pages, &scheduler->reclaim_mgr.est_cand_pages);
heap_info->attach_jiffies = jiffies;
heap_info->flags = CSF_CTX_RECLAIM_CANDI_FLAG;
dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages = %u\n",
kctx->tgid, kctx->id, heap_info->nr_est_pages);
}
static void update_kctx_heap_info_on_grp_on_slot(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
heap_info->on_slot_grps++;
/* If the kctx transitioned on-slot CSGs: 0 => 1, detach the kctx scheduler->reclaim_mgr */
if (heap_info->on_slot_grps == 1) {
dev_dbg(kctx->kbdev->dev,
"CSG_%d_%d_%d on-slot, remove kctx from reclaim manager\n",
group->kctx->tgid, group->kctx->id, group->handle);
detach_from_sched_reclaim_mgr(kctx);
}
}
static void update_kctx_heap_info_on_grp_evict(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_kctx_heap_info *const heap_info = &kctx->csf.sched.heap_info;
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
const u32 num_groups = kctx->kbdev->csf.global_iface.group_num;
u32 on_slot_grps = 0;
u32 i;
lockdep_assert_held(&scheduler->lock);
/* Group eviction from the scheduler is a bit more complex, but fairly less
* frequent in operations. Taking the opportunity to actually count the
* on-slot CSGs from the given kctx, for robustness and clearer code logic.
*/
for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
struct kbase_queue_group *grp = csg_slot->resident_group;
if (unlikely(!grp))
continue;
if (grp->kctx == kctx)
on_slot_grps++;
}
heap_info->on_slot_grps = on_slot_grps;
/* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */
if (!heap_info->on_slot_grps) {
if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) {
/* The kctx has other operational CSGs, attach it if not yet done */
if (list_empty(&heap_info->mgr_link)) {
dev_dbg(kctx->kbdev->dev,
"CSG_%d_%d_%d evict, add kctx to reclaim manager\n",
group->kctx->tgid, group->kctx->id, group->handle);
attach_to_sched_reclaim_mgr(kctx);
}
} else {
/* The kctx is a zombie after the group eviction, drop it out */
dev_dbg(kctx->kbdev->dev,
"CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager\n",
group->kctx->tgid, group->kctx->id, group->handle);
detach_from_sched_reclaim_mgr(kctx);
}
}
}
static void update_kctx_heap_info_on_grp_suspend(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_kctx_heap_info *heap_info = &kctx->csf.sched.heap_info;
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
if (!WARN_ON(heap_info->on_slot_grps == 0))
heap_info->on_slot_grps--;
/* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */
if (heap_info->on_slot_grps == 0) {
dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager\n",
group->kctx->tgid, group->kctx->id, group->handle);
attach_to_sched_reclaim_mgr(kctx);
}
}
static void save_csg_slot(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
struct kbase_csf_cmd_stream_group_info *ginfo;
u32 state;
lockdep_assert_held(&scheduler->lock);
if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
return;
ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
state =
CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK));
if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) &&
(state != CSG_ACK_STATE_TERMINATE))) {
u32 max_streams = ginfo->stream_num;
u32 i;
bool sync_wait = false;
bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
CSG_STATUS_STATE_IDLE_MASK;
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
for (i = 0; i < max_streams; i++)
update_hw_active(group->bound_queues[i], false);
#endif /* CONFIG_MALI_NO_MALI */
for (i = 0; idle && i < max_streams; i++) {
struct kbase_queue *const queue =
group->bound_queues[i];
if (!queue || !queue->enabled)
continue;
if (save_slot_cs(ginfo, queue))
sync_wait = true;
else {
/* Need to confirm if ringbuffer of the GPU
* queue is empty or not. A race can arise
* between the flush of GPU queue and suspend
* of CSG. If a queue is flushed after FW has
* set the IDLE bit in CSG_STATUS_STATE, then
* Scheduler will incorrectly consider CSG
* as idle. And there may not be any further
* flush call for the GPU queue, which would
* have de-idled the CSG.
*/
idle = confirm_cmd_buf_empty(queue);
}
}
if (idle) {
/* Take the suspended group out of the runnable_groups
* list of the context and move it to the
* idle_wait_groups list.
*/
if (sync_wait)
deschedule_idle_wait_group(scheduler, group);
else {
group->run_state =
KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
dev_dbg(kbdev->dev, "Group-%d suspended: idle",
group->handle);
}
} else {
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
}
update_offslot_non_idle_cnt_on_grp_suspend(group);
update_kctx_heap_info_on_grp_suspend(group);
}
}
/* Cleanup_csg_slot after it has been vacated, ready for next csg run.
* Return whether there is a kctx address fault associated with the group
* for which the clean-up is done.
*/
static bool cleanup_csg_slot(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
struct kbase_csf_cmd_stream_group_info *ginfo;
s8 slot;
struct kbase_csf_csg_slot *csg_slot;
unsigned long flags;
u32 i;
bool as_fault = false;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
return as_fault;
slot = group->csg_nr;
csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
ginfo = &global_iface->groups[slot];
/* Now loop through all the bound CSs, and clean them via a stop */
for (i = 0; i < ginfo->stream_num; i++) {
struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i];
if (group->bound_queues[i]) {
if (group->bound_queues[i]->enabled) {
kbase_csf_firmware_cs_input_mask(stream,
CS_REQ, CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
}
unassign_user_doorbell_from_queue(kbdev,
group->bound_queues[i]);
}
}
unassign_user_doorbell_from_group(kbdev, group);
/* The csg does not need cleanup other than drop its AS */
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT);
kbase_ctx_sched_release_ctx(kctx);
if (unlikely(group->faulted))
as_fault = true;
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
/* now marking the slot is vacant */
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL;
clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
kbdev->csf.scheduler.csg_slots_idle_mask[0]);
group->csg_nr = KBASEP_CSG_NR_INVALID;
set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask);
clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
csg_slot->trigger_jiffies = jiffies;
atomic_set(&csg_slot->state, CSG_SLOT_READY);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot);
dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n",
group->handle, slot);
KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
kbdev->gpu_props.props.raw_props.gpu_id, slot);
return as_fault;
}
static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_csg_slot *csg_slot;
struct kbase_csf_cmd_stream_group_info *ginfo;
s8 slot;
u8 prev_prio;
u32 ep_cfg;
u32 csg_req;
unsigned long flags;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
return;
slot = group->csg_nr;
csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
ginfo = &kbdev->csf.global_iface.groups[slot];
/* CSGs remaining on-slot can be either idle or runnable.
* This also applies in protected mode.
*/
WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) ||
(group->run_state == KBASE_CSF_GROUP_IDLE)));
/* Update consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
if (csg_slot->priority == prio)
return;
/* Read the csg_ep_cfg back for updating the priority field */
ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ);
prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg);
ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
csg_req ^= CSG_REQ_EP_CFG_MASK;
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
CSG_REQ_EP_CFG_MASK);
kbase_csf_ring_csg_doorbell(kbdev, slot);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
csg_slot->priority = prio;
dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n",
group->handle, group->kctx->tgid, group->kctx->id, slot,
prev_prio, prio);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio);
set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
}
static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
u8 prio)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
const u64 shader_core_mask =
kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER);
const u64 tiler_core_mask =
kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER);
const u64 compute_mask = shader_core_mask & group->compute_mask;
const u64 fragment_mask = shader_core_mask & group->fragment_mask;
const u64 tiler_mask = tiler_core_mask & group->tiler_mask;
const u8 num_cores = kbdev->gpu_props.num_cores;
const u8 compute_max = min(num_cores, group->compute_max);
const u8 fragment_max = min(num_cores, group->fragment_max);
const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max);
struct kbase_csf_cmd_stream_group_info *ginfo;
u32 ep_cfg = 0;
u32 csg_req;
u32 state;
int i;
unsigned long flags;
const u64 normal_suspend_buf =
group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
struct kbase_csf_csg_slot *csg_slot =
&kbdev->csf.scheduler.csg_slots[slot];
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ON(slot < 0) &&
WARN_ON(slot >= global_iface->group_num))
return;
WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
ginfo = &global_iface->groups[slot];
/* Pick an available address space for this context */
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_ctx_sched_retain_ctx(kctx);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
group->handle, kctx->tgid, kctx->id, slot);
return;
}
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
kbdev->csf.scheduler.csg_slots[slot].resident_group = group;
group->csg_nr = slot;
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
assign_user_doorbell_to_group(kbdev, group);
/* Now loop through all the bound & kicked CSs, and program them */
for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
struct kbase_queue *queue = group->bound_queues[i];
if (queue)
program_cs(kbdev, queue, false);
}
/* Endpoint programming for CSG */
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO,
compute_mask & U32_MAX);
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI,
compute_mask >> 32);
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO,
fragment_mask & U32_MAX);
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI,
fragment_mask >> 32);
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
tiler_mask & U32_MAX);
/* Register group UID with firmware */
kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
group->group_uid);
ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
/* Program the address space number assigned to the context */
kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr);
kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO,
normal_suspend_buf & U32_MAX);
kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
normal_suspend_buf >> 32);
if (group->protected_suspend_buf.reg) {
const u64 protm_suspend_buf =
group->protected_suspend_buf.reg->start_pfn <<
PAGE_SHIFT;
kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
protm_suspend_buf & U32_MAX);
kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
protm_suspend_buf >> 32);
}
/* Enable all interrupts for now */
kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
csg_req ^= CSG_REQ_EP_CFG_MASK;
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
CSG_REQ_EP_CFG_MASK);
/* Set state to START/RESUME */
if (queue_group_suspended_locked(group)) {
state = CSG_REQ_STATE_RESUME;
} else {
WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE);
state = CSG_REQ_STATE_START;
}
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
state, CSG_REQ_STATE_MASK);
kbase_csf_ring_csg_doorbell(kbdev, slot);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
/* Update status before rings the door-bell, marking ready => run */
atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN);
csg_slot->trigger_jiffies = jiffies;
csg_slot->priority = prio;
/* Trace the programming of the CSG on the slot */
KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
group->handle, kctx->tgid, kctx->id, slot, prio);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group,
(((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) |
(state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
/* Update the heap reclaim manager */
update_kctx_heap_info_on_grp_on_slot(group);
/* Programming a slot consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
}
static void remove_scheduled_group(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
WARN_ON(group->prepared_seq_num ==
KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID);
WARN_ON(list_empty(&group->link_to_schedule));
list_del_init(&group->link_to_schedule);
scheduler->ngrp_to_schedule--;
group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
group->kctx->csf.sched.ngrp_to_schedule--;
}
static void sched_evict_group(struct kbase_queue_group *group, bool fault,
bool update_non_idle_offslot_grps_cnt)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (queue_group_scheduled_locked(group)) {
u32 i;
if (update_non_idle_offslot_grps_cnt &&
(group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group,
new_val);
}
for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
if (group->bound_queues[i])
group->bound_queues[i]->enabled = false;
}
if (group->prepared_seq_num !=
KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
remove_scheduled_group(kbdev, group);
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
remove_group_from_idle_wait(group);
else {
remove_group_from_runnable(scheduler, group,
KBASE_CSF_GROUP_INACTIVE);
}
WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
if (fault)
group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
(((u64)scheduler->total_runnable_grps) << 32) |
((u32)group->run_state));
dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
group->handle, scheduler->total_runnable_grps);
/* Notify a group has been evicted */
wake_up_all(&kbdev->csf.event_wait);
}
update_kctx_heap_info_on_grp_evict(group);
}
static int term_group_sync(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
int err = 0;
term_csg_slot(group);
remaining = wait_event_timeout(kbdev->csf.event_wait,
group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr),
remaining);
if (unlikely(!remaining)) {
enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT;
dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
group->handle, group->kctx->tgid,
group->kctx->id, group->csg_nr);
if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
error_type = DF_PING_REQUEST_TIMEOUT;
kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
err = -ETIMEDOUT;
}
return err;
}
void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
bool wait_for_termination = true;
bool on_slot;
kbase_reset_gpu_assert_failed_or_prevented(kbdev);
lockdep_assert_held(&group->kctx->csf.lock);
mutex_lock(&scheduler->lock);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
wait_for_dump_complete_on_group_deschedule(group);
if (!queue_group_scheduled_locked(group))
goto unlock;
on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
#ifdef KBASE_PM_RUNTIME
/* If the queue group is on slot and Scheduler is in SLEEPING state,
* then we need to wake up the Scheduler to exit the sleep state rather
* than waiting for the runtime suspend or power down of GPU.
* The group termination is usually triggered in the context of Application
* thread and it has been seen that certain Apps can destroy groups at
* random points and not necessarily when the App is exiting.
*/
if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
scheduler_wakeup(kbdev, true);
/* Wait for MCU firmware to start running */
if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
dev_warn(
kbdev->dev,
"[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
kbase_backend_get_cycle_cnt(kbdev),
group->handle, group->kctx->tgid,
group->kctx->id, group->csg_nr);
/* No point in waiting for CSG termination if MCU didn't
* become active.
*/
wait_for_termination = false;
}
}
#endif
if (!on_slot) {
sched_evict_group(group, false, true);
} else {
bool as_faulty;
if (likely(wait_for_termination))
term_group_sync(group);
else
term_csg_slot(group);
/* Treat the csg been terminated */
as_faulty = cleanup_csg_slot(group);
/* remove from the scheduler list */
sched_evict_group(group, as_faulty, false);
}
WARN_ON(queue_group_scheduled_locked(group));
unlock:
mutex_unlock(&scheduler->lock);
}
/**
* scheduler_group_schedule() - Schedule a GPU command queue group on firmware
*
* @group: Pointer to the queue group to be scheduled.
*
* This function would enable the scheduling of GPU command queue group on
* firmware.
*
* Return: 0 on success, or negative on failure.
*/
static int scheduler_group_schedule(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&kctx->csf.lock);
lockdep_assert_held(&scheduler->lock);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state);
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
update_idle_suspended_group_state(group);
else if (queue_group_idle_locked(group)) {
WARN_ON(kctx->csf.sched.num_runnable_grps == 0);
WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0);
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
update_idle_suspended_group_state(group);
else {
struct kbase_queue_group *protm_grp;
unsigned long flags;
WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(
group));
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
/* A normal mode CSG could be idle onslot during
* protected mode. In this case clear the
* appropriate bit in csg_slots_idle_mask.
*/
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
protm_grp = scheduler->active_protm_grp;
if (protm_grp && protm_grp != group) {
clear_bit((unsigned int)group->csg_nr,
scheduler->csg_slots_idle_mask);
/* Request the update to confirm the condition inferred. */
group->reevaluate_idle_status = true;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
scheduler->csg_slots_idle_mask[0]);
}
spin_unlock_irqrestore(&scheduler->interrupt_lock,
flags);
/* If GPU is in protected mode then any doorbells rang
* would have no effect. Check if GPU is in protected
* mode and if this group has higher priority than the
* active protected mode group. If so prompt the FW
* to exit protected mode.
*/
if (protm_grp &&
group->scan_seq_num < protm_grp->scan_seq_num) {
/* Prompt the FW to exit protected mode */
scheduler_force_protm_exit(kbdev);
}
}
} els