blob: 979299ff3815af7a3b440abc9579f765a26f5c5c [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
/*
* CSF GPU HWC backend firmware interface APIs.
*/
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <device/mali_kbase_device.h>
#include "mali_kbase_hwcnt_gpu.h"
#include "mali_kbase_hwcnt_types.h"
#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
#include "csf/mali_kbase_csf_firmware.h"
#include "mali_kbase_hwcnt_backend_csf_if_fw.h"
#include "mali_kbase_hwaccess_time.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <linux/log2.h>
#include "mali_kbase_ccswe.h"
/** The number of nanoseconds in a second. */
#define NSECS_IN_SEC 1000000000ull /* ns */
/* Ring buffer virtual address start at 4GB */
#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
/**
* struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
* used to save the manual and
* auto HWC samples from
* firmware.
* @gpu_dump_base: Starting GPU base address of the ring buffer.
* @cpu_dump_base: Starting CPU address for the mapping.
* @buf_count: Buffer count in the ring buffer, MUST be power of 2.
* @as_nr: Address space number for the memory mapping.
* @phys: Physical memory allocation used by the mapping.
* @num_pages: Size of the mapping, in memory pages.
*/
struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
u64 gpu_dump_base;
void *cpu_dump_base;
size_t buf_count;
u32 as_nr;
struct tagged_addr *phys;
size_t num_pages;
};
/**
* struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
* interface, used to communicate
* with firmware.
* @kbdev: KBase device.
* @buf_bytes: The size in bytes for each buffer in the ring buffer.
* @clk_cnt: The number of clock domains in the system.
* The maximum is 64.
* @clk_enable_map: Bitmask of enabled clocks
* @rate_listener: Clock rate listener callback state.
* @ccswe_shader_cores: Shader cores cycle count software estimator.
*/
struct kbase_hwcnt_backend_csf_if_fw_ctx {
struct kbase_device *kbdev;
size_t buf_bytes;
u8 clk_cnt;
u64 clk_enable_map;
struct kbase_clk_rate_listener rate_listener;
struct kbase_ccswe ccswe_shader_cores;
};
static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
WARN_ON(!ctx);
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
}
static void
kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long *flags)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
WARN_ON(!ctx);
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
kbase_csf_scheduler_spin_lock(kbdev, flags);
}
static void kbasep_hwcnt_backend_csf_if_fw_unlock(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
WARN_ON(!ctx);
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
/**
* kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
*
* @rate_listener: Callback state
* @clk_index: Clock index
* @clk_rate_hz: Clock frequency(hz)
*/
static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
struct kbase_clk_rate_listener *rate_listener, u32 clk_index,
u32 clk_rate_hz)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
container_of(rate_listener,
struct kbase_hwcnt_backend_csf_if_fw_ctx,
rate_listener);
u64 timestamp_ns;
if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
return;
timestamp_ns = ktime_get_raw_ns();
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns,
clk_rate_hz);
}
/**
* kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
*
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
* @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
*/
static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map)
{
struct kbase_device *kbdev = fw_ctx->kbdev;
if (kbase_hwcnt_clk_enable_map_enabled(
clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
/* software estimation for non-top clock domains */
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
const struct kbase_clk_data *clk_data =
rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
u32 cur_freq;
unsigned long flags;
u64 timestamp_ns;
timestamp_ns = ktime_get_raw_ns();
spin_lock_irqsave(&rtm->lock, flags);
cur_freq = (u32)clk_data->clock_val;
kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores,
timestamp_ns, cur_freq);
kbase_clk_rate_trace_manager_subscribe_no_lock(
rtm, &fw_ctx->rate_listener);
spin_unlock_irqrestore(&rtm->lock, flags);
}
fw_ctx->clk_enable_map = clk_enable_map;
}
/**
* kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
*
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
*/
static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
{
struct kbase_device *kbdev = fw_ctx->kbdev;
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
u64 clk_enable_map = fw_ctx->clk_enable_map;
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map,
KBASE_CLOCK_DOMAIN_SHADER_CORES))
kbase_clk_rate_trace_manager_unsubscribe(
rtm, &fw_ctx->rate_listener);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
u32 prfcnt_size;
u32 prfcnt_hw_size = 0;
u32 prfcnt_fw_size = 0;
u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
KBASE_HWCNT_VALUE_BYTES;
WARN_ON(!ctx);
WARN_ON(!prfcnt_info);
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
prfcnt_hw_size = (prfcnt_size & 0xFF) << 8;
prfcnt_fw_size = (prfcnt_size >> 16) << 8;
fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
prfcnt_info->prfcnt_block_size = prfcnt_block_size;
prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
prfcnt_info->core_mask =
kbdev->gpu_props.props.coherency_info.group[0].core_mask;
prfcnt_info->clk_cnt = fw_ctx->clk_cnt;
prfcnt_info->clearing_samples = true;
/* Block size must be multiple of counter size. */
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_BYTES) !=
0);
/* Total size must be multiple of block size. */
WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
0);
}
static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
void **cpu_dump_base,
struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
{
struct kbase_device *kbdev;
struct tagged_addr *phys;
struct page **page_list;
void *cpu_addr;
int ret;
int i;
size_t num_pages;
u64 flags;
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
pgprot_t cpu_map_prot = PAGE_KERNEL;
u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
WARN_ON(!cpu_dump_base);
WARN_ON(!out_ring_buf);
kbdev = fw_ctx->kbdev;
/* The buffer count must be power of 2 */
if (!is_power_of_2(buf_count))
return -EINVAL;
/* alignment failure */
if (gpu_va_base & (2048 - 1))
return -EINVAL;
fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
if (!fw_ring_buf)
return -ENOMEM;
num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
if (!phys)
goto phys_alloc_error;
page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
if (!page_list)
goto page_list_alloc_error;
/* Get physical page for the buffer */
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
if (ret != num_pages)
goto phys_mem_pool_alloc_error;
/* Get the CPU virtual address */
for (i = 0; i < num_pages; i++)
page_list[i] = as_page(phys[i]);
cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
if (!cpu_addr)
goto vmap_error;
flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
/* Update MMU table */
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, phys, num_pages,
flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
if (ret)
goto mmu_insert_failed;
kfree(page_list);
fw_ring_buf->gpu_dump_base = gpu_va_base;
fw_ring_buf->cpu_dump_base = cpu_addr;
fw_ring_buf->phys = phys;
fw_ring_buf->num_pages = num_pages;
fw_ring_buf->buf_count = buf_count;
fw_ring_buf->as_nr = MCU_AS_NR;
*cpu_dump_base = fw_ring_buf->cpu_dump_base;
*out_ring_buf =
(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
return 0;
mmu_insert_failed:
vunmap(cpu_addr);
vmap_error:
kbase_mem_pool_free_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false, false);
phys_mem_pool_alloc_error:
kfree(page_list);
page_list_alloc_error:
kfree(phys);
phys_alloc_error:
kfree(fw_ring_buf);
return -ENOMEM;
}
static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
u32 buf_index_first, u32 buf_index_last, bool for_cpu)
{
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
size_t i;
size_t pg_first;
size_t pg_last;
u64 start_address;
u64 stop_address;
u32 ring_buf_index_first;
u32 ring_buf_index_last;
WARN_ON(!ctx);
WARN_ON(!ring_buf);
/* The index arguments for this function form an inclusive, exclusive
* range.
* However, when masking back to the available buffers we will make this
* inclusive at both ends so full flushes are not 0 -> 0.
*/
ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
ring_buf_index_last =
(buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
/* The start address is the offset of the first buffer. */
start_address = fw_ctx->buf_bytes * ring_buf_index_first;
pg_first = start_address >> PAGE_SHIFT;
/* The stop address is the last byte in the final buffer. */
stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
pg_last = stop_address >> PAGE_SHIFT;
/* Check whether the buffer range wraps. */
if (start_address > stop_address) {
/* sync the first part to the end of ring buffer. */
for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
struct page *pg = as_page(fw_ring_buf->phys[i]);
if (for_cpu) {
kbase_sync_single_for_cpu(fw_ctx->kbdev,
kbase_dma_addr(pg),
PAGE_SIZE,
DMA_BIDIRECTIONAL);
} else {
kbase_sync_single_for_device(fw_ctx->kbdev,
kbase_dma_addr(pg),
PAGE_SIZE,
DMA_BIDIRECTIONAL);
}
}
/* second part starts from page 0. */
pg_first = 0;
}
for (i = pg_first; i <= pg_last; i++) {
struct page *pg = as_page(fw_ring_buf->phys[i]);
if (for_cpu) {
kbase_sync_single_for_cpu(fw_ctx->kbdev,
kbase_dma_addr(pg), PAGE_SIZE,
DMA_BIDIRECTIONAL);
} else {
kbase_sync_single_for_device(fw_ctx->kbdev,
kbase_dma_addr(pg),
PAGE_SIZE,
DMA_BIDIRECTIONAL);
}
}
}
static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
CSTD_UNUSED(ctx);
return ktime_get_raw_ns();
}
static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
{
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
if (!fw_ring_buf)
return;
if (fw_ring_buf->phys) {
u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
WARN_ON(kbase_mmu_teardown_pages(
fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages,
MCU_AS_NR));
vunmap(fw_ring_buf->cpu_dump_base);
kbase_mem_pool_free_pages(
&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
fw_ring_buf->num_pages, fw_ring_buf->phys, false,
false);
kfree(fw_ring_buf->phys);
kfree(fw_ring_buf);
}
}
static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
struct kbase_hwcnt_backend_csf_if_enable *enable)
{
u32 prfcnt_config;
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
WARN_ON(!ctx);
WARN_ON(!ring_buf);
WARN_ON(!enable);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
kbdev = fw_ctx->kbdev;
global_iface = &kbdev->csf.global_iface;
/* Configure */
prfcnt_config = fw_ring_buf->buf_count;
prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
/* Configure the ring buffer base address */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
fw_ring_buf->as_nr);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
fw_ring_buf->gpu_dump_base & U32_MAX);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
fw_ring_buf->gpu_dump_base >> 32);
/* Set extract position to 0 */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
/* Configure the enable bitmap */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN,
enable->fe_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN,
enable->shader_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN,
enable->mmu_l2_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN,
enable->tiler_bm);
/* Configure the HWC set and buffer size */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG,
prfcnt_config);
kbdev->csf.hwcnt.enable_pending = true;
/* Unmask the interrupts */
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
/* Enable the HWC */
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
(1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
GLB_REQ_PRFCNT_ENABLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
GLB_PRFCNT_CONFIG);
kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
enable->clk_enable_map);
}
static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
kbdev = fw_ctx->kbdev;
global_iface = &kbdev->csf.global_iface;
/* Disable the HWC */
kbdev->csf.hwcnt.enable_pending = true;
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
GLB_REQ_PRFCNT_ENABLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
/* mask the interrupts */
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
/* In case we have a previous request in flight when the disable
* happens.
*/
kbdev->csf.hwcnt.request_pending = false;
kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
}
static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
u32 glb_req;
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
kbdev = fw_ctx->kbdev;
global_iface = &kbdev->csf.global_iface;
/* Trigger dumping */
kbdev->csf.hwcnt.request_pending = true;
glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
GLB_REQ_PRFCNT_SAMPLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
u32 *insert_index)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
WARN_ON(!extract_index);
WARN_ON(!insert_index);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
*extract_index = kbase_csf_firmware_global_input_read(
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
*insert_index = kbase_csf_firmware_global_output(
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
}
static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
WARN_ON(!ctx);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
/* Set the raw extract index to release the buffer back to the ring
* buffer.
*/
kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
GLB_PRFCNT_EXTRACT, extract_idx);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
u64 clk_enable_map)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
u8 clk;
u64 timestamp_ns = ktime_get_raw_ns();
WARN_ON(!ctx);
WARN_ON(!cycle_counts);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
if (!(clk_enable_map & (1ull << clk)))
continue;
if (clk == KBASE_CLOCK_DOMAIN_TOP) {
/* Read cycle count for top clock domain. */
kbase_backend_get_gpu_time_norequest(
fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL);
} else {
/* Estimate cycle count for non-top clock domain. */
cycle_counts[clk] = kbase_ccswe_cycle_at(
&fw_ctx->ccswe_shader_cores, timestamp_ns);
}
}
}
/**
* kbasep_hwcnt_backedn_csf_if_fw_cts_destroy() - Destroy a CSF FW interface context.
*
* @fw_ctx: Pointer to context to destroy.
*/
static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
{
if (!fw_ctx)
return;
kfree(fw_ctx);
}
/**
* kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
*
* @kbdev: Non_NULL pointer to kbase device.
* @out_ctx: Non-NULL pointer to where info is stored on success.
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
struct kbase_device *kbdev,
struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
{
u8 clk;
int errcode = -ENOMEM;
struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
WARN_ON(!kbdev);
WARN_ON(!out_ctx);
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
goto error;
ctx->kbdev = kbdev;
/* Determine the number of available clock domains. */
for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
if (kbdev->pm.clk_rtm.clks[clk] == NULL)
break;
}
ctx->clk_cnt = clk;
ctx->clk_enable_map = 0;
kbase_ccswe_init(&ctx->ccswe_shader_cores);
ctx->rate_listener.notify =
kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
*out_ctx = ctx;
return 0;
error:
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
return errcode;
}
void kbase_hwcnt_backend_csf_if_fw_destroy(
struct kbase_hwcnt_backend_csf_if *if_fw)
{
if (!if_fw)
return;
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
memset(if_fw, 0, sizeof(*if_fw));
}
int kbase_hwcnt_backend_csf_if_fw_create(
struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw)
{
int errcode;
struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
if (!kbdev || !if_fw)
return -EINVAL;
errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
if (errcode)
return errcode;
if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
if_fw->assert_lock_held =
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
if_fw->get_gpu_cycle_count =
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
if_fw->set_extract_index =
kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
return 0;
}