| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
| /* |
| * |
| * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| /* |
| * CSF GPU HWC backend firmware interface APIs. |
| */ |
| |
| #include <mali_kbase.h> |
| #include <gpu/mali_kbase_gpu_regmap.h> |
| #include <device/mali_kbase_device.h> |
| #include "mali_kbase_hwcnt_gpu.h" |
| #include "mali_kbase_hwcnt_types.h" |
| #include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h> |
| |
| #include "csf/mali_kbase_csf_firmware.h" |
| #include "mali_kbase_hwcnt_backend_csf_if_fw.h" |
| #include "mali_kbase_hwaccess_time.h" |
| #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" |
| |
| #include <linux/log2.h> |
| #include "mali_kbase_ccswe.h" |
| |
| |
| /** The number of nanoseconds in a second. */ |
| #define NSECS_IN_SEC 1000000000ull /* ns */ |
| |
| /* Ring buffer virtual address start at 4GB */ |
| #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) |
| |
| /** |
| * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface |
| * used to save the manual and |
| * auto HWC samples from |
| * firmware. |
| * @gpu_dump_base: Starting GPU base address of the ring buffer. |
| * @cpu_dump_base: Starting CPU address for the mapping. |
| * @buf_count: Buffer count in the ring buffer, MUST be power of 2. |
| * @as_nr: Address space number for the memory mapping. |
| * @phys: Physical memory allocation used by the mapping. |
| * @num_pages: Size of the mapping, in memory pages. |
| */ |
| struct kbase_hwcnt_backend_csf_if_fw_ring_buf { |
| u64 gpu_dump_base; |
| void *cpu_dump_base; |
| size_t buf_count; |
| u32 as_nr; |
| struct tagged_addr *phys; |
| size_t num_pages; |
| }; |
| |
| /** |
| * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF |
| * interface, used to communicate |
| * with firmware. |
| * @kbdev: KBase device. |
| * @buf_bytes: The size in bytes for each buffer in the ring buffer. |
| * @clk_cnt: The number of clock domains in the system. |
| * The maximum is 64. |
| * @clk_enable_map: Bitmask of enabled clocks |
| * @rate_listener: Clock rate listener callback state. |
| * @ccswe_shader_cores: Shader cores cycle count software estimator. |
| */ |
| struct kbase_hwcnt_backend_csf_if_fw_ctx { |
| struct kbase_device *kbdev; |
| size_t buf_bytes; |
| u8 clk_cnt; |
| u64 clk_enable_map; |
| struct kbase_clk_rate_listener rate_listener; |
| struct kbase_ccswe ccswe_shader_cores; |
| }; |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; |
| struct kbase_device *kbdev; |
| |
| WARN_ON(!ctx); |
| |
| fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| kbdev = fw_ctx->kbdev; |
| |
| kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
| } |
| |
| static void |
| kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, |
| unsigned long *flags) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; |
| struct kbase_device *kbdev; |
| |
| WARN_ON(!ctx); |
| |
| fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| kbdev = fw_ctx->kbdev; |
| |
| kbase_csf_scheduler_spin_lock(kbdev, flags); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_unlock( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; |
| struct kbase_device *kbdev; |
| |
| WARN_ON(!ctx); |
| |
| fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| kbdev = fw_ctx->kbdev; |
| |
| kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
| kbase_csf_scheduler_spin_unlock(kbdev, flags); |
| } |
| |
| /** |
| * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback |
| * |
| * @rate_listener: Callback state |
| * @clk_index: Clock index |
| * @clk_rate_hz: Clock frequency(hz) |
| */ |
| static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change( |
| struct kbase_clk_rate_listener *rate_listener, u32 clk_index, |
| u32 clk_rate_hz) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| container_of(rate_listener, |
| struct kbase_hwcnt_backend_csf_if_fw_ctx, |
| rate_listener); |
| u64 timestamp_ns; |
| |
| if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) |
| return; |
| |
| timestamp_ns = ktime_get_raw_ns(); |
| kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, |
| clk_rate_hz); |
| } |
| |
| /** |
| * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking |
| * |
| * @fw_ctx: Non-NULL pointer to CSF firmware interface context. |
| * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. |
| */ |
| static void kbasep_hwcnt_backend_csf_if_fw_cc_enable( |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map) |
| { |
| struct kbase_device *kbdev = fw_ctx->kbdev; |
| |
| if (kbase_hwcnt_clk_enable_map_enabled( |
| clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { |
| /* software estimation for non-top clock domains */ |
| struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; |
| const struct kbase_clk_data *clk_data = |
| rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; |
| u32 cur_freq; |
| unsigned long flags; |
| u64 timestamp_ns; |
| |
| timestamp_ns = ktime_get_raw_ns(); |
| |
| spin_lock_irqsave(&rtm->lock, flags); |
| |
| cur_freq = (u32)clk_data->clock_val; |
| kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); |
| kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, |
| timestamp_ns, cur_freq); |
| |
| kbase_clk_rate_trace_manager_subscribe_no_lock( |
| rtm, &fw_ctx->rate_listener); |
| |
| spin_unlock_irqrestore(&rtm->lock, flags); |
| } |
| |
| fw_ctx->clk_enable_map = clk_enable_map; |
| } |
| |
| /** |
| * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking |
| * |
| * @fw_ctx: Non-NULL pointer to CSF firmware interface context. |
| */ |
| static void kbasep_hwcnt_backend_csf_if_fw_cc_disable( |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) |
| { |
| struct kbase_device *kbdev = fw_ctx->kbdev; |
| struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; |
| u64 clk_enable_map = fw_ctx->clk_enable_map; |
| |
| if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, |
| KBASE_CLOCK_DOMAIN_SHADER_CORES)) |
| kbase_clk_rate_trace_manager_unsubscribe( |
| rtm, &fw_ctx->rate_listener); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, |
| struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; |
| struct kbase_device *kbdev; |
| u32 prfcnt_size; |
| u32 prfcnt_hw_size = 0; |
| u32 prfcnt_fw_size = 0; |
| u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * |
| KBASE_HWCNT_VALUE_BYTES; |
| |
| WARN_ON(!ctx); |
| WARN_ON(!prfcnt_info); |
| |
| fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| kbdev = fw_ctx->kbdev; |
| prfcnt_size = kbdev->csf.global_iface.prfcnt_size; |
| prfcnt_hw_size = (prfcnt_size & 0xFF) << 8; |
| prfcnt_fw_size = (prfcnt_size >> 16) << 8; |
| fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; |
| |
| |
| prfcnt_info->dump_bytes = fw_ctx->buf_bytes; |
| prfcnt_info->prfcnt_block_size = prfcnt_block_size; |
| prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; |
| prfcnt_info->core_mask = |
| kbdev->gpu_props.props.coherency_info.group[0].core_mask; |
| |
| prfcnt_info->clk_cnt = fw_ctx->clk_cnt; |
| prfcnt_info->clearing_samples = true; |
| |
| /* Block size must be multiple of counter size. */ |
| WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_BYTES) != |
| 0); |
| /* Total size must be multiple of block size. */ |
| WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != |
| 0); |
| } |
| |
| static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, |
| void **cpu_dump_base, |
| struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) |
| { |
| struct kbase_device *kbdev; |
| struct tagged_addr *phys; |
| struct page **page_list; |
| void *cpu_addr; |
| int ret; |
| int i; |
| size_t num_pages; |
| u64 flags; |
| struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; |
| |
| pgprot_t cpu_map_prot = PAGE_KERNEL; |
| u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; |
| |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| |
| WARN_ON(!ctx); |
| WARN_ON(!cpu_dump_base); |
| WARN_ON(!out_ring_buf); |
| |
| kbdev = fw_ctx->kbdev; |
| |
| /* The buffer count must be power of 2 */ |
| if (!is_power_of_2(buf_count)) |
| return -EINVAL; |
| |
| /* alignment failure */ |
| if (gpu_va_base & (2048 - 1)) |
| return -EINVAL; |
| |
| fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL); |
| if (!fw_ring_buf) |
| return -ENOMEM; |
| |
| num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count); |
| phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); |
| if (!phys) |
| goto phys_alloc_error; |
| |
| page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); |
| if (!page_list) |
| goto page_list_alloc_error; |
| |
| /* Get physical page for the buffer */ |
| ret = kbase_mem_pool_alloc_pages( |
| &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, |
| phys, false); |
| if (ret != num_pages) |
| goto phys_mem_pool_alloc_error; |
| |
| /* Get the CPU virtual address */ |
| for (i = 0; i < num_pages; i++) |
| page_list[i] = as_page(phys[i]); |
| |
| cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); |
| if (!cpu_addr) |
| goto vmap_error; |
| |
| flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | |
| KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); |
| |
| /* Update MMU table */ |
| ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, |
| gpu_va_base >> PAGE_SHIFT, phys, num_pages, |
| flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW); |
| if (ret) |
| goto mmu_insert_failed; |
| |
| kfree(page_list); |
| |
| fw_ring_buf->gpu_dump_base = gpu_va_base; |
| fw_ring_buf->cpu_dump_base = cpu_addr; |
| fw_ring_buf->phys = phys; |
| fw_ring_buf->num_pages = num_pages; |
| fw_ring_buf->buf_count = buf_count; |
| fw_ring_buf->as_nr = MCU_AS_NR; |
| |
| *cpu_dump_base = fw_ring_buf->cpu_dump_base; |
| *out_ring_buf = |
| (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; |
| |
| |
| return 0; |
| |
| mmu_insert_failed: |
| vunmap(cpu_addr); |
| vmap_error: |
| kbase_mem_pool_free_pages( |
| &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, |
| phys, false, false); |
| phys_mem_pool_alloc_error: |
| kfree(page_list); |
| page_list_alloc_error: |
| kfree(phys); |
| phys_alloc_error: |
| kfree(fw_ring_buf); |
| return -ENOMEM; |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, |
| struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, |
| u32 buf_index_first, u32 buf_index_last, bool for_cpu) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = |
| (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| size_t i; |
| size_t pg_first; |
| size_t pg_last; |
| u64 start_address; |
| u64 stop_address; |
| u32 ring_buf_index_first; |
| u32 ring_buf_index_last; |
| |
| WARN_ON(!ctx); |
| WARN_ON(!ring_buf); |
| |
| /* The index arguments for this function form an inclusive, exclusive |
| * range. |
| * However, when masking back to the available buffers we will make this |
| * inclusive at both ends so full flushes are not 0 -> 0. |
| */ |
| ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); |
| ring_buf_index_last = |
| (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); |
| |
| /* The start address is the offset of the first buffer. */ |
| start_address = fw_ctx->buf_bytes * ring_buf_index_first; |
| pg_first = start_address >> PAGE_SHIFT; |
| |
| /* The stop address is the last byte in the final buffer. */ |
| stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1; |
| pg_last = stop_address >> PAGE_SHIFT; |
| |
| /* Check whether the buffer range wraps. */ |
| if (start_address > stop_address) { |
| /* sync the first part to the end of ring buffer. */ |
| for (i = pg_first; i < fw_ring_buf->num_pages; i++) { |
| struct page *pg = as_page(fw_ring_buf->phys[i]); |
| |
| if (for_cpu) { |
| kbase_sync_single_for_cpu(fw_ctx->kbdev, |
| kbase_dma_addr(pg), |
| PAGE_SIZE, |
| DMA_BIDIRECTIONAL); |
| } else { |
| kbase_sync_single_for_device(fw_ctx->kbdev, |
| kbase_dma_addr(pg), |
| PAGE_SIZE, |
| DMA_BIDIRECTIONAL); |
| } |
| } |
| |
| /* second part starts from page 0. */ |
| pg_first = 0; |
| } |
| |
| for (i = pg_first; i <= pg_last; i++) { |
| struct page *pg = as_page(fw_ring_buf->phys[i]); |
| |
| if (for_cpu) { |
| kbase_sync_single_for_cpu(fw_ctx->kbdev, |
| kbase_dma_addr(pg), PAGE_SIZE, |
| DMA_BIDIRECTIONAL); |
| } else { |
| kbase_sync_single_for_device(fw_ctx->kbdev, |
| kbase_dma_addr(pg), |
| PAGE_SIZE, |
| DMA_BIDIRECTIONAL); |
| } |
| } |
| } |
| |
| static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx) |
| { |
| CSTD_UNUSED(ctx); |
| return ktime_get_raw_ns(); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, |
| struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = |
| (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| |
| if (!fw_ring_buf) |
| return; |
| |
| if (fw_ring_buf->phys) { |
| u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; |
| |
| WARN_ON(kbase_mmu_teardown_pages( |
| fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, |
| gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages, |
| MCU_AS_NR)); |
| |
| vunmap(fw_ring_buf->cpu_dump_base); |
| |
| kbase_mem_pool_free_pages( |
| &fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], |
| fw_ring_buf->num_pages, fw_ring_buf->phys, false, |
| false); |
| |
| kfree(fw_ring_buf->phys); |
| |
| kfree(fw_ring_buf); |
| } |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, |
| struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, |
| struct kbase_hwcnt_backend_csf_if_enable *enable) |
| { |
| u32 prfcnt_config; |
| struct kbase_device *kbdev; |
| struct kbase_csf_global_iface *global_iface; |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = |
| (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; |
| |
| WARN_ON(!ctx); |
| WARN_ON(!ring_buf); |
| WARN_ON(!enable); |
| kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); |
| |
| kbdev = fw_ctx->kbdev; |
| global_iface = &kbdev->csf.global_iface; |
| |
| /* Configure */ |
| prfcnt_config = fw_ring_buf->buf_count; |
| prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; |
| |
| /* Configure the ring buffer base address */ |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, |
| fw_ring_buf->as_nr); |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, |
| fw_ring_buf->gpu_dump_base & U32_MAX); |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, |
| fw_ring_buf->gpu_dump_base >> 32); |
| |
| /* Set extract position to 0 */ |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); |
| |
| /* Configure the enable bitmap */ |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, |
| enable->fe_bm); |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, |
| enable->shader_bm); |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, |
| enable->mmu_l2_bm); |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, |
| enable->tiler_bm); |
| |
| /* Configure the HWC set and buffer size */ |
| kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, |
| prfcnt_config); |
| |
| kbdev->csf.hwcnt.enable_pending = true; |
| |
| /* Unmask the interrupts */ |
| kbase_csf_firmware_global_input_mask( |
| global_iface, GLB_ACK_IRQ_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); |
| kbase_csf_firmware_global_input_mask( |
| global_iface, GLB_ACK_IRQ_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); |
| kbase_csf_firmware_global_input_mask( |
| global_iface, GLB_ACK_IRQ_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); |
| kbase_csf_firmware_global_input_mask( |
| global_iface, GLB_ACK_IRQ_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, |
| GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); |
| |
| /* Enable the HWC */ |
| kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, |
| (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT), |
| GLB_REQ_PRFCNT_ENABLE_MASK); |
| kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); |
| |
| prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, |
| GLB_PRFCNT_CONFIG); |
| |
| kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, |
| enable->clk_enable_map); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx) |
| { |
| struct kbase_device *kbdev; |
| struct kbase_csf_global_iface *global_iface; |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| |
| WARN_ON(!ctx); |
| kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); |
| |
| kbdev = fw_ctx->kbdev; |
| global_iface = &kbdev->csf.global_iface; |
| |
| /* Disable the HWC */ |
| kbdev->csf.hwcnt.enable_pending = true; |
| kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, |
| GLB_REQ_PRFCNT_ENABLE_MASK); |
| kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); |
| |
| /* mask the interrupts */ |
| kbase_csf_firmware_global_input_mask( |
| global_iface, GLB_ACK_IRQ_MASK, 0, |
| GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); |
| kbase_csf_firmware_global_input_mask( |
| global_iface, GLB_ACK_IRQ_MASK, 0, |
| GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); |
| kbase_csf_firmware_global_input_mask( |
| global_iface, GLB_ACK_IRQ_MASK, 0, |
| GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); |
| |
| /* In case we have a previous request in flight when the disable |
| * happens. |
| */ |
| kbdev->csf.hwcnt.request_pending = false; |
| |
| kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_dump_request( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx) |
| { |
| u32 glb_req; |
| struct kbase_device *kbdev; |
| struct kbase_csf_global_iface *global_iface; |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| |
| WARN_ON(!ctx); |
| kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); |
| |
| kbdev = fw_ctx->kbdev; |
| global_iface = &kbdev->csf.global_iface; |
| |
| /* Trigger dumping */ |
| kbdev->csf.hwcnt.request_pending = true; |
| glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); |
| glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; |
| kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, |
| GLB_REQ_PRFCNT_SAMPLE_MASK); |
| kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, |
| u32 *insert_index) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| |
| WARN_ON(!ctx); |
| WARN_ON(!extract_index); |
| WARN_ON(!insert_index); |
| kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); |
| |
| *extract_index = kbase_csf_firmware_global_input_read( |
| &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT); |
| *insert_index = kbase_csf_firmware_global_output( |
| &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| |
| WARN_ON(!ctx); |
| kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); |
| |
| /* Set the raw extract index to release the buffer back to the ring |
| * buffer. |
| */ |
| kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, |
| GLB_PRFCNT_EXTRACT, extract_idx); |
| } |
| |
| static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( |
| struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, |
| u64 clk_enable_map) |
| { |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; |
| u8 clk; |
| u64 timestamp_ns = ktime_get_raw_ns(); |
| |
| WARN_ON(!ctx); |
| WARN_ON(!cycle_counts); |
| kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); |
| |
| for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { |
| if (!(clk_enable_map & (1ull << clk))) |
| continue; |
| |
| if (clk == KBASE_CLOCK_DOMAIN_TOP) { |
| /* Read cycle count for top clock domain. */ |
| kbase_backend_get_gpu_time_norequest( |
| fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL); |
| } else { |
| /* Estimate cycle count for non-top clock domain. */ |
| cycle_counts[clk] = kbase_ccswe_cycle_at( |
| &fw_ctx->ccswe_shader_cores, timestamp_ns); |
| } |
| } |
| } |
| |
| /** |
| * kbasep_hwcnt_backedn_csf_if_fw_cts_destroy() - Destroy a CSF FW interface context. |
| * |
| * @fw_ctx: Pointer to context to destroy. |
| */ |
| static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) |
| { |
| if (!fw_ctx) |
| return; |
| |
| kfree(fw_ctx); |
| } |
| |
| /** |
| * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context. |
| * |
| * @kbdev: Non_NULL pointer to kbase device. |
| * @out_ctx: Non-NULL pointer to where info is stored on success. |
| * Return: 0 on success, else error code. |
| */ |
| static int kbasep_hwcnt_backend_csf_if_fw_ctx_create( |
| struct kbase_device *kbdev, |
| struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) |
| { |
| u8 clk; |
| int errcode = -ENOMEM; |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; |
| |
| WARN_ON(!kbdev); |
| WARN_ON(!out_ctx); |
| |
| ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); |
| if (!ctx) |
| goto error; |
| |
| ctx->kbdev = kbdev; |
| |
| /* Determine the number of available clock domains. */ |
| for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { |
| if (kbdev->pm.clk_rtm.clks[clk] == NULL) |
| break; |
| } |
| ctx->clk_cnt = clk; |
| |
| ctx->clk_enable_map = 0; |
| kbase_ccswe_init(&ctx->ccswe_shader_cores); |
| ctx->rate_listener.notify = |
| kbasep_hwcnt_backend_csf_if_fw_on_freq_change; |
| |
| *out_ctx = ctx; |
| |
| return 0; |
| error: |
| kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx); |
| return errcode; |
| } |
| |
| void kbase_hwcnt_backend_csf_if_fw_destroy( |
| struct kbase_hwcnt_backend_csf_if *if_fw) |
| { |
| if (!if_fw) |
| return; |
| |
| kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( |
| (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx); |
| memset(if_fw, 0, sizeof(*if_fw)); |
| } |
| |
| int kbase_hwcnt_backend_csf_if_fw_create( |
| struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw) |
| { |
| int errcode; |
| struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; |
| |
| if (!kbdev || !if_fw) |
| return -EINVAL; |
| |
| errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx); |
| if (errcode) |
| return errcode; |
| |
| if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; |
| if_fw->assert_lock_held = |
| kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; |
| if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; |
| if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; |
| if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; |
| if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; |
| if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; |
| if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; |
| if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; |
| if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; |
| if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; |
| if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; |
| if_fw->get_gpu_cycle_count = |
| kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; |
| if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; |
| if_fw->set_extract_index = |
| kbasep_hwcnt_backend_csf_if_fw_set_extract_index; |
| |
| return 0; |
| } |