blob: 3f06a10f7fed4e15d2718045fe3a7bab3dbdb8b5 [file] [log] [blame]
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* GPU backend instrumentation APIs.
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_hwaccess_instr.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
/**
* kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
* hardware
*
* @kbdev: Kbase device
*/
static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
{
unsigned long flags;
unsigned long pm_flags;
u32 irq_mask;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_REQUEST_CLEAN);
/* Enable interrupt */
spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask | CLEAN_CACHES_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
/* clean&invalidate the caches so we're sure the mmu tables for the dump
* buffer is valid */
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_CLEAN_INV_CACHES, NULL);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
struct kbase_context *kctx,
struct kbase_uk_hwcnt_setup *setup)
{
unsigned long flags, pm_flags;
int err = -EINVAL;
u32 irq_mask;
int ret;
u64 shader_cores_needed;
u32 prfcnt_config;
shader_cores_needed = kbase_pm_get_present_cores(kbdev,
KBASE_PM_CORE_SHADER);
/* alignment failure */
if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
goto out_err;
/* Override core availability policy to ensure all cores are available
*/
kbase_pm_ca_instr_enable(kbdev);
/* Request the cores early on synchronously - we'll release them on any
* errors (e.g. instrumentation already active) */
kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is already enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out_unrequest_cores;
}
/* Enable interrupt */
spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
PRFCNT_SAMPLE_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
/* In use, this context is the owner */
kbdev->hwcnt.kctx = kctx;
/* Remember the dump address so we can reprogram it later */
kbdev->hwcnt.addr = setup->dump_buffer;
/* Request the clean */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
kbdev->hwcnt.backend.triggered = 0;
/* Clean&invalidate the caches so we're sure the mmu tables for the dump
* buffer is valid */
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
&kbdev->hwcnt.backend.cache_clean_work);
KBASE_DEBUG_ASSERT(ret);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
/* Wait for cacheclean to complete */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_IDLE);
kbase_pm_request_l2_caches(kbdev);
/* Configure */
prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
{
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
if (arch_v6)
prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
}
#endif
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
setup->dump_buffer & 0xFFFFFFFF, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
setup->dump_buffer >> 32, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
setup->jm_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
setup->shader_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
setup->mmu_l2_bm, kctx);
/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
* HW counter dump. */
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
kctx);
else
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
setup->tiler_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
*/
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
setup->tiler_bm, kctx);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
err = 0;
dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
return err;
out_unrequest_cores:
kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
out_err:
return err;
}
int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
{
unsigned long flags, pm_flags;
int err = -EINVAL;
u32 irq_mask;
struct kbase_device *kbdev = kctx->kbdev;
while (1) {
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is not enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out;
}
if (kbdev->hwcnt.kctx != kctx) {
/* Instrumentation has been setup for another context */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out;
}
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
break;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
/* Ongoing dump/setup - wait for its completion */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
}
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
kbdev->hwcnt.backend.triggered = 0;
/* Disable interrupt */
spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
/* Disable the counters */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
kbdev->hwcnt.kctx = NULL;
kbdev->hwcnt.addr = 0ULL;
kbase_pm_ca_instr_disable(kbdev);
kbase_pm_unrequest_cores(kbdev, true,
kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
kbase_pm_release_l2_caches(kbdev);
dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
kctx);
err = 0;
out:
return err;
}
int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
{
unsigned long flags;
int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.kctx != kctx) {
/* The instrumentation has been setup for another context */
goto unlock;
}
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
/* HW counters are disabled or another dump is ongoing, or we're
* resetting */
goto unlock;
}
kbdev->hwcnt.backend.triggered = 0;
/* Mark that we're dumping - the PF handler can signal that we faulted
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
/* Reconfigure the dump address */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
kbdev->hwcnt.addr >> 32, NULL);
/* Start dumping */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
kbdev->hwcnt.addr, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_SAMPLE, kctx);
dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
err = 0;
unlock:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
bool * const success)
{
unsigned long flags;
bool complete = false;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
*success = true;
complete = true;
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
*success = false;
complete = true;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return complete;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
void kbasep_cache_clean_worker(struct work_struct *data)
{
struct kbase_device *kbdev;
unsigned long flags;
kbdev = container_of(data, struct kbase_device,
hwcnt.backend.cache_clean_work);
mutex_lock(&kbdev->cacheclean_lock);
kbasep_instr_hwcnt_cacheclean(kbdev);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Wait for our condition, and any reset to complete */
while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
wait_event(kbdev->hwcnt.backend.cache_clean_wait,
kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_CLEANING);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
}
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_CLEANED);
/* All finished and idle */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
mutex_unlock(&kbdev->cacheclean_lock);
}
void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
int ret;
/* Always clean and invalidate the cache after a successful dump
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
&kbdev->hwcnt.backend.cache_clean_work);
KBASE_DEBUG_ASSERT(ret);
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
void kbase_clean_caches_done(struct kbase_device *kbdev)
{
u32 irq_mask;
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
unsigned long flags;
unsigned long pm_flags;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Disable interrupt */
spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
/* Wakeup... */
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
/* Only wake if we weren't resetting */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
}
int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
unsigned long flags;
int err;
/* Wait for dump & cacheclean to complete */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
err = -EINVAL;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
} else {
/* Dump done */
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_IDLE);
err = 0;
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
{
unsigned long flags;
int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Check it's the context previously set up and we're not already
* dumping */
if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_IDLE)
goto out;
/* Clear the counters */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_CLEAR, kctx);
err = 0;
out:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
int kbase_instr_backend_init(struct kbase_device *kbdev)
{
int ret = 0;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
init_waitqueue_head(&kbdev->hwcnt.backend.wait);
init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
kbasep_cache_clean_worker);
kbdev->hwcnt.backend.triggered = 0;
kbdev->hwcnt.backend.cache_clean_wq =
alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
ret = -EINVAL;
return ret;
}
void kbase_instr_backend_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
}