| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
| /* |
| * |
| * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| /* NOTES: |
| * - A default GPU can be compiled in during the build, by defining |
| * CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that |
| * insmod mali_kbase.ko with no arguments after a build with "scons |
| * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be |
| * overridden by passing the 'no_mali_gpu' argument to insmod. |
| * |
| * - if CONFIG_MALI_ERROR_INJECT is defined the error injection system is |
| * activated. |
| */ |
| |
| /* Implementation of failure injection system: |
| * |
| * Error conditions are generated by gpu_generate_error(). |
| * According to CONFIG_MALI_ERROR_INJECT definition gpu_generate_error() either |
| * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or |
| * checks if there is (in error_track_list) an error configuration to be set for |
| * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined). |
| * Each error condition will trigger a specific "state" for a certain set of |
| * registers as per Midgard Architecture Specifications doc. |
| * |
| * According to Midgard Architecture Specifications doc the following registers |
| * are always affected by error conditions: |
| * |
| * JOB Exception: |
| * JOB_IRQ_RAWSTAT |
| * JOB<n> STATUS AREA |
| * |
| * MMU Exception: |
| * MMU_IRQ_RAWSTAT |
| * AS<n>_FAULTSTATUS |
| * AS<n>_FAULTADDRESS |
| * |
| * GPU Exception: |
| * GPU_IRQ_RAWSTAT |
| * GPU_FAULTSTATUS |
| * GPU_FAULTADDRESS |
| * |
| * For further clarification on the model behaviour upon specific error |
| * conditions the user may refer to the Midgard Architecture Specification |
| * document |
| */ |
| #include <mali_kbase.h> |
| #include <gpu/mali_kbase_gpu_regmap.h> |
| #include <backend/gpu/mali_kbase_model_dummy.h> |
| #include <mali_kbase_mem_linux.h> |
| |
| #if MALI_USE_CSF |
| #include <csf/mali_kbase_csf_firmware.h> |
| |
| /* Index of the last value register for each type of core, with the 1st value |
| * register being at index 0. |
| */ |
| #define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1) |
| |
| /* Array for storing the value of SELECT register for each type of core */ |
| static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM]; |
| static bool ipa_control_timer_enabled; |
| #endif |
| |
| #define LO_MASK(M) ((M) & 0xFFFFFFFF) |
| #define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) |
| |
| static u32 get_implementation_register(u32 reg) |
| { |
| switch (reg) { |
| case GPU_CONTROL_REG(SHADER_PRESENT_LO): |
| return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT); |
| case GPU_CONTROL_REG(TILER_PRESENT_LO): |
| return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); |
| case GPU_CONTROL_REG(L2_PRESENT_LO): |
| return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); |
| case GPU_CONTROL_REG(STACK_PRESENT_LO): |
| return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT); |
| |
| case GPU_CONTROL_REG(SHADER_PRESENT_HI): |
| case GPU_CONTROL_REG(TILER_PRESENT_HI): |
| case GPU_CONTROL_REG(L2_PRESENT_HI): |
| case GPU_CONTROL_REG(STACK_PRESENT_HI): |
| /* *** FALLTHROUGH *** */ |
| default: |
| return 0; |
| } |
| } |
| |
| struct { |
| spinlock_t access_lock; |
| #if !MALI_USE_CSF |
| unsigned long prfcnt_base; |
| #endif /* !MALI_USE_CSF */ |
| u32 *prfcnt_base_cpu; |
| |
| u32 time; |
| |
| struct gpu_model_prfcnt_en prfcnt_en; |
| |
| u64 l2_present; |
| u64 shader_present; |
| |
| #if !MALI_USE_CSF |
| u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; |
| #else |
| u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; |
| #endif /* !MALI_USE_CSF */ |
| u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; |
| u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * |
| KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; |
| u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * |
| KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; |
| |
| } performance_counters = { |
| .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT, |
| .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, |
| }; |
| |
| struct job_slot { |
| int job_active; |
| int job_queued; |
| int job_complete_irq_asserted; |
| int job_irq_mask; |
| int job_disabled; |
| }; |
| |
| /** |
| * struct control_reg_values_t - control register values specific to the GPU being 'emulated' |
| * @name: GPU name |
| * @gpu_id: GPU ID to report |
| * @as_present: Bitmap of address spaces present |
| * @thread_max_threads: Maximum number of threads per core |
| * @thread_max_workgroup_size: Maximum number of threads per workgroup |
| * @thread_max_barrier_size: Maximum number of threads per barrier |
| * @thread_features: Thread features, NOT INCLUDING the 2 |
| * most-significant bits, which are always set to |
| * IMPLEMENTATION_MODEL. |
| * @core_features: Core features |
| * @tiler_features: Tiler features |
| * @mmu_features: MMU features |
| * @gpu_features_lo: GPU features (low) |
| * @gpu_features_hi: GPU features (high) |
| */ |
| struct control_reg_values_t { |
| const char *name; |
| u32 gpu_id; |
| u32 as_present; |
| u32 thread_max_threads; |
| u32 thread_max_workgroup_size; |
| u32 thread_max_barrier_size; |
| u32 thread_features; |
| u32 core_features; |
| u32 tiler_features; |
| u32 mmu_features; |
| u32 gpu_features_lo; |
| u32 gpu_features_hi; |
| }; |
| |
| struct dummy_model_t { |
| int reset_completed; |
| int reset_completed_mask; |
| #if !MALI_USE_CSF |
| int prfcnt_sample_completed; |
| #endif /* !MALI_USE_CSF */ |
| int power_changed_mask; /* 2bits: _ALL,_SINGLE */ |
| int power_changed; /* 1bit */ |
| bool clean_caches_completed; |
| bool clean_caches_completed_irq_enabled; |
| int power_on; /* 6bits: SHADER[4],TILER,L2 */ |
| u32 stack_power_on_lo; |
| u32 coherency_enable; |
| unsigned int job_irq_js_state; |
| struct job_slot slots[NUM_SLOTS]; |
| const struct control_reg_values_t *control_reg_values; |
| u32 l2_config; |
| void *data; |
| }; |
| |
| void gpu_device_set_data(void *model, void *data) |
| { |
| struct dummy_model_t *dummy = (struct dummy_model_t *)model; |
| |
| dummy->data = data; |
| } |
| |
| void *gpu_device_get_data(void *model) |
| { |
| struct dummy_model_t *dummy = (struct dummy_model_t *)model; |
| |
| return dummy->data; |
| } |
| |
| #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 |
| |
| /* SCons should pass in a default GPU, but other ways of building (e.g. |
| * in-tree) won't, so define one here in case. |
| */ |
| #ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU |
| #define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" |
| #endif |
| |
| static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; |
| module_param(no_mali_gpu, charp, 0000); |
| MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); |
| |
| /* Construct a value for the THREAD_FEATURES register, *except* the two most |
| * significant bits, which are set to IMPLEMENTATION_MODEL in |
| * midgard_model_read_reg(). |
| */ |
| #if MALI_USE_CSF |
| #define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ |
| ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) |
| #else |
| #define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ |
| ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) |
| #endif |
| |
| /* Array associating GPU names with control register values. The first |
| * one is used in the case of no match. |
| */ |
| static const struct control_reg_values_t all_control_reg_values[] = { |
| { |
| .name = "tMIx", |
| .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tHEx", |
| .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tSIx", |
| .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x300, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), |
| .tiler_features = 0x209, |
| .mmu_features = 0x2821, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tDVx", |
| .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x300, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), |
| .tiler_features = 0x209, |
| .mmu_features = 0x2821, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tNOx", |
| .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tGOx_r0p0", |
| .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tGOx_r1p0", |
| .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), |
| .core_features = 0x2, |
| .tiler_features = 0x209, |
| .mmu_features = 0x2823, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tTRx", |
| .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tNAx", |
| .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tBEx", |
| .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tBAx", |
| .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tDUx", |
| .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tODx", |
| .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tGRx", |
| .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .core_features = 0x0, /* core_1e16fma2tex */ |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tVAx", |
| .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x180, |
| .thread_max_workgroup_size = 0x180, |
| .thread_max_barrier_size = 0x180, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), |
| .core_features = 0x0, /* core_1e16fma2tex */ |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0, |
| .gpu_features_hi = 0, |
| }, |
| { |
| .name = "tTUx", |
| .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0), |
| .as_present = 0xFF, |
| .thread_max_threads = 0x800, |
| .thread_max_workgroup_size = 0x400, |
| .thread_max_barrier_size = 0x400, |
| .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0), |
| .core_features = 0x0, /* core_1e32fma2tex */ |
| .tiler_features = 0x809, |
| .mmu_features = 0x2830, |
| .gpu_features_lo = 0xf, |
| .gpu_features_hi = 0, |
| }, |
| }; |
| |
| struct error_status_t hw_error_status; |
| |
| #if MALI_USE_CSF |
| static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, |
| u32 cnt_idx, bool is_low_word) |
| { |
| u64 *counters_data; |
| u32 core_count = 0; |
| u32 event_index; |
| u64 value = 0; |
| u32 core; |
| unsigned long flags; |
| |
| if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM)) |
| return 0; |
| |
| if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)) |
| return 0; |
| |
| event_index = |
| (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF; |
| |
| /* Currently only primary counter blocks are supported */ |
| if (WARN_ON(event_index >= 64)) |
| return 0; |
| |
| /* The actual events start index 4 onwards. Spec also says PRFCNT_EN, |
| * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for |
| * IPA counters. If selected, the value returned for them will be zero. |
| */ |
| if (WARN_ON(event_index <= 3)) |
| return 0; |
| |
| event_index -= 4; |
| |
| spin_lock_irqsave(&performance_counters.access_lock, flags); |
| |
| switch (core_type) { |
| case KBASE_IPA_CORE_TYPE_CSHW: |
| core_count = 1; |
| counters_data = performance_counters.cshw_counters; |
| break; |
| case KBASE_IPA_CORE_TYPE_MEMSYS: |
| core_count = hweight64(performance_counters.l2_present); |
| counters_data = performance_counters.l2_counters; |
| break; |
| case KBASE_IPA_CORE_TYPE_TILER: |
| core_count = 1; |
| counters_data = performance_counters.tiler_counters; |
| break; |
| case KBASE_IPA_CORE_TYPE_SHADER: |
| core_count = hweight64(performance_counters.shader_present); |
| counters_data = performance_counters.shader_counters; |
| break; |
| default: |
| WARN(1, "Invalid core_type %d\n", core_type); |
| break; |
| } |
| |
| for (core = 0; core < core_count; core++) { |
| value += counters_data[event_index]; |
| event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; |
| } |
| |
| spin_unlock_irqrestore(&performance_counters.access_lock, flags); |
| |
| if (is_low_word) |
| return (value & U32_MAX); |
| else |
| return (value >> 32); |
| } |
| #endif /* MALI_USE_CSF */ |
| |
| /** |
| * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values |
| * |
| * Sets all performance counter values to zero. The performance counter access |
| * lock must be held when calling this function. |
| */ |
| static void gpu_model_clear_prfcnt_values_nolock(void) |
| { |
| lockdep_assert_held(&performance_counters.access_lock); |
| #if !MALI_USE_CSF |
| memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters)); |
| #else |
| memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters)); |
| #endif /* !MALI_USE_CSF */ |
| memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters)); |
| memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters)); |
| memset(performance_counters.shader_counters, 0, |
| sizeof(performance_counters.shader_counters)); |
| } |
| |
| #if MALI_USE_CSF |
| void gpu_model_clear_prfcnt_values(void) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&performance_counters.access_lock, flags); |
| gpu_model_clear_prfcnt_values_nolock(); |
| spin_unlock_irqrestore(&performance_counters.access_lock, flags); |
| } |
| KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values); |
| #endif /* MALI_USE_CSF */ |
| |
| /** |
| * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer |
| * |
| * @values: Array of values to be written out |
| * @out_index: Index into performance counter buffer |
| * @block_count: Number of blocks to dump |
| * @prfcnt_enable_mask: Counter enable mask |
| * @blocks_present: Available blocks bit mask |
| * |
| * The performance counter access lock must be held before calling this |
| * function. |
| */ |
| static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count, |
| u32 prfcnt_enable_mask, u64 blocks_present) |
| { |
| u32 block_idx, counter; |
| u32 counter_value = 0; |
| u32 *prfcnt_base; |
| u32 index = 0; |
| |
| lockdep_assert_held(&performance_counters.access_lock); |
| |
| prfcnt_base = performance_counters.prfcnt_base_cpu; |
| |
| for (block_idx = 0; block_idx < block_count; block_idx++) { |
| /* only dump values if core is present */ |
| if (!(blocks_present & (1 << block_idx))) { |
| #if MALI_USE_CSF |
| /* if CSF dump zeroed out block */ |
| memset(&prfcnt_base[*out_index], 0, |
| KBASE_DUMMY_MODEL_BLOCK_SIZE); |
| *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK; |
| #endif /* MALI_USE_CSF */ |
| continue; |
| } |
| |
| /* write the header */ |
| prfcnt_base[*out_index] = performance_counters.time++; |
| prfcnt_base[*out_index+2] = prfcnt_enable_mask; |
| *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS; |
| |
| /* write the counters */ |
| for (counter = 0; |
| counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE; |
| counter++) { |
| /* HW counter values retrieved through |
| * PRFCNT_SAMPLE request are of 32 bits only. |
| */ |
| counter_value = (u32)values[index++]; |
| if (KBASE_DUMMY_MODEL_COUNTER_ENABLED( |
| prfcnt_enable_mask, (counter + |
| KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) { |
| prfcnt_base[*out_index + counter] = |
| counter_value; |
| } |
| } |
| *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE; |
| } |
| } |
| |
| static void gpu_model_dump_nolock(void) |
| { |
| u32 index = 0; |
| |
| lockdep_assert_held(&performance_counters.access_lock); |
| |
| #if !MALI_USE_CSF |
| gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1, |
| performance_counters.prfcnt_en.fe, 0x1); |
| #else |
| gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1, |
| performance_counters.prfcnt_en.fe, 0x1); |
| #endif /* !MALI_USE_CSF */ |
| gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters, |
| &index, 1, |
| performance_counters.prfcnt_en.tiler, |
| DUMMY_IMPLEMENTATION_TILER_PRESENT); |
| gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index, |
| KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, |
| performance_counters.prfcnt_en.l2, |
| performance_counters.l2_present); |
| gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters, |
| &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES, |
| performance_counters.prfcnt_en.shader, |
| performance_counters.shader_present); |
| |
| /* Counter values are cleared after each dump */ |
| gpu_model_clear_prfcnt_values_nolock(); |
| |
| /* simulate a 'long' time between samples */ |
| performance_counters.time += 10; |
| } |
| |
| #if !MALI_USE_CSF |
| static void midgard_model_dump_prfcnt(void) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&performance_counters.access_lock, flags); |
| gpu_model_dump_nolock(); |
| spin_unlock_irqrestore(&performance_counters.access_lock, flags); |
| } |
| #else |
| void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps) |
| { |
| unsigned long flags; |
| |
| if (WARN_ON(!sample_buf)) |
| return; |
| |
| spin_lock_irqsave(&performance_counters.access_lock, flags); |
| performance_counters.prfcnt_base_cpu = sample_buf; |
| performance_counters.prfcnt_en = enable_maps; |
| gpu_model_dump_nolock(); |
| spin_unlock_irqrestore(&performance_counters.access_lock, flags); |
| } |
| |
| void gpu_model_glb_request_job_irq(void *model) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&hw_error_status.access_lock, flags); |
| hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF; |
| spin_unlock_irqrestore(&hw_error_status.access_lock, flags); |
| gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ); |
| } |
| #endif /* !MALI_USE_CSF */ |
| |
| static void init_register_statuses(struct dummy_model_t *dummy) |
| { |
| int i; |
| |
| hw_error_status.errors_mask = 0; |
| hw_error_status.gpu_error_irq = 0; |
| hw_error_status.gpu_fault_status = 0; |
| hw_error_status.job_irq_rawstat = 0; |
| hw_error_status.job_irq_status = 0; |
| hw_error_status.mmu_irq_rawstat = 0; |
| hw_error_status.mmu_irq_mask = 0; |
| |
| for (i = 0; i < NUM_SLOTS; i++) { |
| hw_error_status.js_status[i] = 0; |
| hw_error_status.job_irq_rawstat |= |
| (dummy->slots[i].job_complete_irq_asserted) << i; |
| hw_error_status.job_irq_status |= |
| (dummy->slots[i].job_complete_irq_asserted) << i; |
| } |
| for (i = 0; i < NUM_MMU_AS; i++) { |
| hw_error_status.as_command[i] = 0; |
| hw_error_status.as_faultstatus[i] = 0; |
| hw_error_status.mmu_irq_mask |= 1 << i; |
| } |
| |
| performance_counters.time = 0; |
| } |
| |
| static void update_register_statuses(struct dummy_model_t *dummy, int job_slot) |
| { |
| lockdep_assert_held(&hw_error_status.access_lock); |
| |
| if (hw_error_status.errors_mask & IS_A_JOB_ERROR) { |
| if (job_slot == hw_error_status.current_job_slot) { |
| #if !MALI_USE_CSF |
| if (hw_error_status.js_status[job_slot] == 0) { |
| /* status reg is clean; it can be written */ |
| |
| switch (hw_error_status.errors_mask & |
| IS_A_JOB_ERROR) { |
| case KBASE_JOB_INTERRUPTED: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INTERRUPTED; |
| break; |
| |
| case KBASE_JOB_STOPPED: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_STOPPED; |
| break; |
| |
| case KBASE_JOB_TERMINATED: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_TERMINATED; |
| break; |
| |
| case KBASE_JOB_CONFIG_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_CONFIG_FAULT; |
| break; |
| |
| case KBASE_JOB_POWER_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_POWER_FAULT; |
| break; |
| |
| case KBASE_JOB_READ_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_READ_FAULT; |
| break; |
| |
| case KBASE_JOB_WRITE_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_WRITE_FAULT; |
| break; |
| |
| case KBASE_JOB_AFFINITY_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_AFFINITY_FAULT; |
| break; |
| |
| case KBASE_JOB_BUS_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_BUS_FAULT; |
| break; |
| |
| case KBASE_INSTR_INVALID_PC: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INSTR_INVALID_PC; |
| break; |
| |
| case KBASE_INSTR_INVALID_ENC: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INSTR_INVALID_ENC; |
| break; |
| |
| case KBASE_INSTR_TYPE_MISMATCH: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INSTR_TYPE_MISMATCH; |
| break; |
| |
| case KBASE_INSTR_OPERAND_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INSTR_OPERAND_FAULT; |
| break; |
| |
| case KBASE_INSTR_TLS_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INSTR_TLS_FAULT; |
| break; |
| |
| case KBASE_INSTR_BARRIER_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INSTR_BARRIER_FAULT; |
| break; |
| |
| case KBASE_INSTR_ALIGN_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_INSTR_ALIGN_FAULT; |
| break; |
| |
| case KBASE_DATA_INVALID_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_DATA_INVALID_FAULT; |
| break; |
| |
| case KBASE_TILE_RANGE_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_TILE_RANGE_FAULT; |
| break; |
| |
| case KBASE_ADDR_RANGE_FAULT: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_ADDRESS_RANGE_FAULT; |
| break; |
| |
| case KBASE_OUT_OF_MEMORY: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_OUT_OF_MEMORY; |
| break; |
| |
| case KBASE_UNKNOWN: |
| hw_error_status.js_status[job_slot] = |
| JS_STATUS_UNKNOWN; |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "\nAtom Chain 0x%llx: Invalid Error Mask!", |
| hw_error_status.current_jc); |
| break; |
| } |
| } |
| #endif /* !MALI_USE_CSF */ |
| |
| /* we set JOB_FAIL_<n> */ |
| hw_error_status.job_irq_rawstat |= |
| (dummy->slots[job_slot].job_complete_irq_asserted) << |
| (job_slot + 16); |
| hw_error_status.job_irq_status |= |
| (((dummy->slots[job_slot].job_complete_irq_asserted) << |
| (job_slot)) & |
| (dummy->slots[job_slot].job_irq_mask << |
| job_slot)) << 16; |
| } else { |
| hw_error_status.job_irq_rawstat |= |
| (dummy->slots[job_slot].job_complete_irq_asserted) << |
| job_slot; |
| hw_error_status.job_irq_status |= |
| ((dummy->slots[job_slot].job_complete_irq_asserted) << |
| (job_slot)) & |
| (dummy->slots[job_slot].job_irq_mask << |
| job_slot); |
| } |
| } else { |
| hw_error_status.job_irq_rawstat |= |
| (dummy->slots[job_slot].job_complete_irq_asserted) << |
| job_slot; |
| hw_error_status.job_irq_status |= |
| ((dummy->slots[job_slot].job_complete_irq_asserted) << |
| (job_slot)) & |
| (dummy->slots[job_slot].job_irq_mask << job_slot); |
| } /* end of job register statuses */ |
| |
| if (hw_error_status.errors_mask & IS_A_MMU_ERROR) { |
| int i; |
| |
| for (i = 0; i < NUM_MMU_AS; i++) { |
| if (i == hw_error_status.faulty_mmu_as) { |
| if (hw_error_status.as_faultstatus[i] == 0) { |
| u32 status = |
| hw_error_status.as_faultstatus[i]; |
| /* status reg is clean; it can be |
| * written |
| */ |
| switch (hw_error_status.errors_mask & |
| IS_A_MMU_ERROR) { |
| case KBASE_TRANSLATION_FAULT: |
| /* 0xCm means TRANSLATION FAULT |
| * (m is mmu_table_level) |
| */ |
| status = |
| ((1 << 7) | (1 << 6) | |
| hw_error_status.mmu_table_level |
| ); |
| break; |
| |
| case KBASE_PERMISSION_FAULT: |
| /*0xC8 means PERMISSION FAULT */ |
| status = ((1 << 7) | (1 << 6) | |
| (1 << 3)); |
| break; |
| |
| case KBASE_TRANSTAB_BUS_FAULT: |
| /* 0xDm means TRANSITION TABLE |
| * BUS FAULT (m is |
| * mmu_table_level) |
| */ |
| status = ((1 << 7) | (1 << 6) | |
| (1 << 4) | |
| hw_error_status.mmu_table_level |
| ); |
| break; |
| |
| case KBASE_ACCESS_FLAG: |
| /* 0xD8 means ACCESS FLAG */ |
| status = ((1 << 7) | (1 << 6) | |
| (1 << 4) | (1 << 3)); |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "\nAtom Chain 0x%llx: Invalid Error Mask!", |
| hw_error_status.current_jc); |
| break; |
| } |
| hw_error_status.as_faultstatus[i] = |
| status; |
| } |
| |
| if (hw_error_status.errors_mask & |
| KBASE_TRANSTAB_BUS_FAULT) |
| hw_error_status.mmu_irq_rawstat |= |
| 1 << (16 + i); /* bus error */ |
| else |
| hw_error_status.mmu_irq_rawstat |= |
| 1 << i; /* page fault */ |
| } |
| } |
| } /*end of mmu register statuses */ |
| if (hw_error_status.errors_mask & IS_A_GPU_ERROR) { |
| if (hw_error_status.gpu_fault_status) { |
| /* not the first GPU error reported */ |
| hw_error_status.gpu_error_irq |= (1 << 7); |
| } else { |
| hw_error_status.gpu_error_irq |= 1; |
| switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) { |
| case KBASE_DELAYED_BUS_FAULT: |
| hw_error_status.gpu_fault_status = (1 << 7); |
| break; |
| |
| case KBASE_SHAREABILITY_FAULT: |
| hw_error_status.gpu_fault_status = (1 << 7) | |
| (1 << 3); |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "\nAtom Chain 0x%llx: Invalid Error Mask!", |
| hw_error_status.current_jc); |
| break; |
| } |
| } |
| } |
| hw_error_status.errors_mask = 0; /*clear error mask */ |
| } |
| |
| #if !MALI_USE_CSF |
| static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask) |
| { |
| int i; |
| |
| lockdep_assert_held(&hw_error_status.access_lock); |
| pr_debug("%s", "Updating the JS_ACTIVE register"); |
| |
| for (i = 0; i < NUM_SLOTS; i++) { |
| int slot_active = dummy->slots[i].job_active; |
| int next_busy = dummy->slots[i].job_queued; |
| |
| if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) { |
| /* clear the bits we're updating */ |
| dummy->job_irq_js_state &= ~((1 << (16 + i)) | |
| (1 << i)); |
| if (hw_error_status.js_status[i]) { |
| dummy->job_irq_js_state |= next_busy << |
| (i + 16); |
| if (mask & (1 << (i + 16))) { |
| /* clear job slot status */ |
| hw_error_status.js_status[i] = 0; |
| /* continue execution of jobchain */ |
| dummy->slots[i].job_active = |
| dummy->slots[i].job_queued; |
| } |
| } else { |
| /* set bits if needed */ |
| dummy->job_irq_js_state |= ((slot_active << i) | |
| (next_busy << (i + 16))); |
| } |
| } |
| } |
| pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state); |
| } |
| #endif /* !MALI_USE_CSF */ |
| |
| /** |
| * find_control_reg_values() - Look up constant control register values. |
| * @gpu: GPU name |
| * |
| * Look up the GPU name to find the correct set of control register values for |
| * that GPU. If not found, warn and use the first values in the array. |
| * |
| * Return: Pointer to control register values for that GPU. |
| */ |
| static const struct control_reg_values_t *find_control_reg_values(const char *gpu) |
| { |
| size_t i; |
| const struct control_reg_values_t *ret = NULL; |
| |
| for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { |
| const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; |
| |
| if (!strcmp(fcrv->name, gpu)) { |
| ret = fcrv; |
| pr_debug("Found control register values for %s\n", gpu); |
| break; |
| } |
| } |
| |
| if (!ret) { |
| ret = &all_control_reg_values[0]; |
| pr_warn("Couldn't find control register values for GPU %s; using default %s\n", |
| gpu, ret->name); |
| } |
| |
| return ret; |
| } |
| |
| void *midgard_model_create(const void *config) |
| { |
| struct dummy_model_t *dummy = NULL; |
| |
| spin_lock_init(&hw_error_status.access_lock); |
| spin_lock_init(&performance_counters.access_lock); |
| |
| dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); |
| |
| if (dummy) { |
| dummy->job_irq_js_state = 0; |
| init_register_statuses(dummy); |
| dummy->control_reg_values = find_control_reg_values(no_mali_gpu); |
| } |
| return dummy; |
| } |
| |
| void midgard_model_destroy(void *h) |
| { |
| kfree((void *)h); |
| } |
| |
| static void midgard_model_get_outputs(void *h) |
| { |
| struct dummy_model_t *dummy = (struct dummy_model_t *)h; |
| |
| lockdep_assert_held(&hw_error_status.access_lock); |
| |
| if (hw_error_status.job_irq_status) |
| gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ); |
| |
| if ((dummy->power_changed && dummy->power_changed_mask) || |
| (dummy->reset_completed & dummy->reset_completed_mask) || |
| hw_error_status.gpu_error_irq || |
| #if !MALI_USE_CSF |
| dummy->prfcnt_sample_completed || |
| #endif |
| (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) |
| gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); |
| |
| if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask) |
| gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ); |
| } |
| |
| static void midgard_model_update(void *h) |
| { |
| struct dummy_model_t *dummy = (struct dummy_model_t *)h; |
| int i; |
| |
| lockdep_assert_held(&hw_error_status.access_lock); |
| |
| for (i = 0; i < NUM_SLOTS; i++) { |
| if (!dummy->slots[i].job_active) |
| continue; |
| |
| if (dummy->slots[i].job_disabled) { |
| update_register_statuses(dummy, i); |
| continue; |
| } |
| |
| /* If there are any pending interrupts that have not |
| * been cleared we cannot run the job in the next register |
| * as we will overwrite the register status of the job in |
| * the head registers - which has not yet been read |
| */ |
| if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) || |
| (hw_error_status.job_irq_rawstat & (1 << i))) { |
| continue; |
| } |
| |
| /*this job is done assert IRQ lines */ |
| signal_int(dummy, i); |
| #ifdef CONFIG_MALI_ERROR_INJECT |
| midgard_set_error(i); |
| #endif /* CONFIG_MALI_ERROR_INJECT */ |
| update_register_statuses(dummy, i); |
| /*if this job slot returned failures we cannot use it */ |
| if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) { |
| dummy->slots[i].job_active = 0; |
| continue; |
| } |
| /*process next job */ |
| dummy->slots[i].job_active = dummy->slots[i].job_queued; |
| dummy->slots[i].job_queued = 0; |
| if (dummy->slots[i].job_active) { |
| if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) |
| model_error_log(KBASE_CORE, |
| "\natom %lld running a job on a dirty slot", |
| hw_error_status.current_jc); |
| } |
| } |
| } |
| |
| static void invalidate_active_jobs(struct dummy_model_t *dummy) |
| { |
| int i; |
| |
| lockdep_assert_held(&hw_error_status.access_lock); |
| |
| for (i = 0; i < NUM_SLOTS; i++) { |
| if (dummy->slots[i].job_active) { |
| hw_error_status.job_irq_rawstat |= (1 << (16 + i)); |
| |
| hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/ |
| } |
| } |
| } |
| |
| u8 midgard_model_write_reg(void *h, u32 addr, u32 value) |
| { |
| unsigned long flags; |
| struct dummy_model_t *dummy = (struct dummy_model_t *)h; |
| |
| spin_lock_irqsave(&hw_error_status.access_lock, flags); |
| |
| #if !MALI_USE_CSF |
| if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && |
| (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { |
| int slot_idx = (addr >> 7) & 0xf; |
| |
| KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); |
| if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) { |
| hw_error_status.current_jc &= |
| ~((u64) (0xFFFFFFFF)); |
| hw_error_status.current_jc |= (u64) value; |
| } |
| if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) { |
| hw_error_status.current_jc &= (u64) 0xFFFFFFFF; |
| hw_error_status.current_jc |= |
| ((u64) value) << 32; |
| } |
| if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && |
| value == 1) { |
| pr_debug("%s", "start detected"); |
| KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active || |
| !dummy->slots[slot_idx].job_queued); |
| if ((dummy->slots[slot_idx].job_active) || |
| (hw_error_status.job_irq_rawstat & |
| (1 << (slot_idx + 16)))) { |
| pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~" |
| ); |
| dummy->slots[slot_idx].job_queued = 1; |
| } else { |
| dummy->slots[slot_idx].job_active = 1; |
| } |
| } |
| |
| if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value == |
| 0) |
| dummy->slots[slot_idx].job_queued = 0; |
| |
| if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) && |
| (value == JS_COMMAND_SOFT_STOP || |
| value == JS_COMMAND_HARD_STOP)) { |
| /*dummy->slots[slot_idx].job_active = 0; */ |
| hw_error_status.current_job_slot = slot_idx; |
| if (value == JS_COMMAND_SOFT_STOP) { |
| hw_error_status.errors_mask = KBASE_JOB_STOPPED; |
| } else { /*value == 3 */ |
| |
| if (dummy->slots[slot_idx].job_disabled != 0) { |
| pr_debug("enabling slot after HARD_STOP" |
| ); |
| dummy->slots[slot_idx].job_disabled = 0; |
| } |
| hw_error_status.errors_mask = |
| KBASE_JOB_TERMINATED; |
| } |
| } |
| } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { |
| int i; |
| |
| for (i = 0; i < NUM_SLOTS; i++) { |
| if (value & ((1 << i) | (1 << (i + 16)))) |
| dummy->slots[i].job_complete_irq_asserted = 0; |
| /* hw_error_status.js_status[i] is cleared in |
| * update_job_irq_js_state |
| */ |
| } |
| pr_debug("%s", "job irq cleared"); |
| update_job_irq_js_state(dummy, value); |
| /*remove error condition for JOB */ |
| hw_error_status.job_irq_rawstat &= ~(value); |
| hw_error_status.job_irq_status &= ~(value); |
| } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { |
| int i; |
| |
| for (i = 0; i < NUM_SLOTS; i++) |
| dummy->slots[i].job_irq_mask = (value >> i) & 0x01; |
| pr_debug("job irq mask to value %x", value); |
| } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { |
| #else /* !MALI_USE_CSF */ |
| if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) { |
| pr_debug("%s", "job irq cleared"); |
| |
| hw_error_status.job_irq_rawstat &= ~(value); |
| hw_error_status.job_irq_status &= ~(value); |
| } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { |
| /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ |
| } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { |
| #endif /* !MALI_USE_CSF */ |
| pr_debug("GPU_IRQ_MASK set to 0x%x", value); |
| dummy->reset_completed_mask = (value >> 8) & 0x01; |
| dummy->power_changed_mask = (value >> 9) & 0x03; |
| dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u; |
| } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { |
| dummy->coherency_enable = value; |
| } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { |
| if (value & (1 << 8)) { |
| pr_debug("%s", "gpu RESET_COMPLETED irq cleared"); |
| dummy->reset_completed = 0; |
| } |
| if (value & (3 << 9)) |
| dummy->power_changed = 0; |
| |
| if (value & (1 << 17)) |
| dummy->clean_caches_completed = false; |
| #if !MALI_USE_CSF |
| if (value & PRFCNT_SAMPLE_COMPLETED) |
| dummy->prfcnt_sample_completed = 0; |
| #endif /* !MALI_USE_CSF */ |
| /*update error status */ |
| hw_error_status.gpu_error_irq &= ~(value); |
| } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { |
| switch (value) { |
| case GPU_COMMAND_SOFT_RESET: |
| case GPU_COMMAND_HARD_RESET: |
| pr_debug("gpu reset (%d) requested", value); |
| /* no more fault status */ |
| hw_error_status.gpu_fault_status = 0; |
| /* completed reset instantly */ |
| dummy->reset_completed = 1; |
| break; |
| #if MALI_USE_CSF |
| case GPU_COMMAND_CACHE_CLN_INV_L2: |
| case GPU_COMMAND_CACHE_CLN_INV_L2_LSC: |
| case GPU_COMMAND_CACHE_CLN_INV_FULL: |
| #else |
| case GPU_COMMAND_CLEAN_CACHES: |
| case GPU_COMMAND_CLEAN_INV_CACHES: |
| #endif |
| pr_debug("clean caches requested"); |
| dummy->clean_caches_completed = true; |
| break; |
| #if !MALI_USE_CSF |
| case GPU_COMMAND_PRFCNT_SAMPLE: |
| midgard_model_dump_prfcnt(); |
| dummy->prfcnt_sample_completed = 1; |
| #endif /* !MALI_USE_CSF */ |
| default: |
| break; |
| } |
| } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { |
| dummy->l2_config = value; |
| } |
| #if MALI_USE_CSF |
| else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && |
| addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + |
| (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { |
| if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) |
| hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; |
| } else if (addr == IPA_CONTROL_REG(COMMAND)) { |
| pr_debug("Received IPA_CONTROL command"); |
| } else if (addr == IPA_CONTROL_REG(TIMER)) { |
| ipa_control_timer_enabled = value ? true : false; |
| } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) && |
| (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) { |
| enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)( |
| (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3); |
| bool is_low_word = |
| !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7); |
| |
| if (is_low_word) { |
| ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX; |
| ipa_ctl_select_config[core_type] |= value; |
| } else { |
| ipa_ctl_select_config[core_type] &= U32_MAX; |
| ipa_ctl_select_config[core_type] |= ((u64)value << 32); |
| } |
| } |
| #endif |
| else if (addr == MMU_REG(MMU_IRQ_MASK)) { |
| hw_error_status.mmu_irq_mask = value; |
| } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { |
| hw_error_status.mmu_irq_rawstat &= (~value); |
| } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && |
| (addr <= MMU_AS_REG(15, AS_STATUS))) { |
| int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) |
| >> 6; |
| |
| switch (addr & 0x3F) { |
| case AS_COMMAND: |
| switch (value) { |
| case AS_COMMAND_NOP: |
| hw_error_status.as_command[mem_addr_space] = |
| value; |
| break; |
| |
| case AS_COMMAND_UPDATE: |
| hw_error_status.as_command[mem_addr_space] = |
| value; |
| if ((hw_error_status.as_faultstatus[ |
| mem_addr_space]) |
| && ((hw_error_status.as_transtab[ |
| mem_addr_space] & 0x3) != 0)) { |
| model_error_log(KBASE_CORE, |
| "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n" |
| ); |
| } else if ((hw_error_status.as_faultstatus[ |
| mem_addr_space]) |
| && ((hw_error_status.as_transtab[ |
| mem_addr_space] & 0x3) == 0)) { |
| |
| /*invalidate all active jobs */ |
| invalidate_active_jobs(dummy); |
| /* error handled */ |
| hw_error_status.as_faultstatus[ |
| mem_addr_space] = 0; |
| } |
| break; |
| |
| case AS_COMMAND_LOCK: |
| case AS_COMMAND_UNLOCK: |
| hw_error_status.as_command[mem_addr_space] = |
| value; |
| break; |
| |
| case AS_COMMAND_FLUSH_PT: |
| case AS_COMMAND_FLUSH_MEM: |
| if (hw_error_status.as_command[mem_addr_space] |
| != AS_COMMAND_LOCK) |
| model_error_log(KBASE_CORE, |
| "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n" |
| ); |
| else /* error handled if any */ |
| hw_error_status.as_faultstatus[ |
| mem_addr_space] = 0; |
| hw_error_status.as_command[mem_addr_space] = |
| value; |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n", |
| value); |
| break; |
| } |
| break; |
| |
| case AS_TRANSTAB_LO: |
| hw_error_status.as_transtab[mem_addr_space] &= |
| ~((u64) (0xffffffff)); |
| hw_error_status.as_transtab[mem_addr_space] |= |
| (u64) value; |
| break; |
| |
| case AS_TRANSTAB_HI: |
| hw_error_status.as_transtab[mem_addr_space] &= |
| (u64) 0xffffffff; |
| hw_error_status.as_transtab[mem_addr_space] |= |
| ((u64) value) << 32; |
| break; |
| |
| case AS_LOCKADDR_LO: |
| case AS_LOCKADDR_HI: |
| case AS_MEMATTR_LO: |
| case AS_MEMATTR_HI: |
| case AS_TRANSCFG_LO: |
| case AS_TRANSCFG_HI: |
| /* Writes ignored */ |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n", |
| mem_addr_space, addr, value); |
| break; |
| } |
| } else { |
| switch (addr) { |
| #if !MALI_USE_CSF |
| case PRFCNT_BASE_LO: |
| performance_counters.prfcnt_base = |
| HI_MASK(performance_counters.prfcnt_base) | value; |
| performance_counters.prfcnt_base_cpu = |
| (u32 *)(uintptr_t)performance_counters.prfcnt_base; |
| break; |
| case PRFCNT_BASE_HI: |
| performance_counters.prfcnt_base = |
| LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32); |
| performance_counters.prfcnt_base_cpu = |
| (u32 *)(uintptr_t)performance_counters.prfcnt_base; |
| break; |
| case PRFCNT_JM_EN: |
| performance_counters.prfcnt_en.fe = value; |
| break; |
| case PRFCNT_SHADER_EN: |
| performance_counters.prfcnt_en.shader = value; |
| break; |
| case PRFCNT_TILER_EN: |
| performance_counters.prfcnt_en.tiler = value; |
| break; |
| case PRFCNT_MMU_L2_EN: |
| performance_counters.prfcnt_en.l2 = value; |
| break; |
| #endif /* !MALI_USE_CSF */ |
| case TILER_PWRON_LO: |
| dummy->power_on |= (value & 1) << 1; |
| /* Also ensure L2 is powered on */ |
| dummy->power_on |= value & 1; |
| dummy->power_changed = 1; |
| break; |
| case SHADER_PWRON_LO: |
| dummy->power_on |= (value & 0xF) << 2; |
| dummy->power_changed = 1; |
| break; |
| case L2_PWRON_LO: |
| dummy->power_on |= value & 1; |
| dummy->power_changed = 1; |
| break; |
| case STACK_PWRON_LO: |
| dummy->stack_power_on_lo |= value; |
| dummy->power_changed = 1; |
| break; |
| case TILER_PWROFF_LO: |
| dummy->power_on &= ~((value & 1) << 1); |
| dummy->power_changed = 1; |
| break; |
| case SHADER_PWROFF_LO: |
| dummy->power_on &= ~((value & 0xF) << 2); |
| dummy->power_changed = 1; |
| break; |
| case L2_PWROFF_LO: |
| dummy->power_on &= ~(value & 1); |
| /* Also ensure tiler is powered off */ |
| dummy->power_on &= ~((value & 1) << 1); |
| dummy->power_changed = 1; |
| break; |
| case STACK_PWROFF_LO: |
| dummy->stack_power_on_lo &= ~value; |
| dummy->power_changed = 1; |
| break; |
| |
| case TILER_PWROFF_HI: |
| case SHADER_PWROFF_HI: |
| case L2_PWROFF_HI: |
| case PWR_KEY: |
| case PWR_OVERRIDE0: |
| #if !MALI_USE_CSF |
| case JM_CONFIG: |
| case PRFCNT_CONFIG: |
| #else /* !MALI_USE_CSF */ |
| case CSF_CONFIG: |
| #endif /* !MALI_USE_CSF */ |
| case SHADER_CONFIG: |
| case TILER_CONFIG: |
| case L2_MMU_CONFIG: |
| /* Writes ignored */ |
| break; |
| default: |
| model_error_log(KBASE_CORE, |
| "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n", |
| addr, value); |
| break; |
| } |
| } |
| |
| midgard_model_update(dummy); |
| midgard_model_get_outputs(dummy); |
| spin_unlock_irqrestore(&hw_error_status.access_lock, flags); |
| |
| return 1; |
| } |
| |
| u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) |
| { |
| unsigned long flags; |
| struct dummy_model_t *dummy = (struct dummy_model_t *)h; |
| |
| spin_lock_irqsave(&hw_error_status.access_lock, flags); |
| |
| *value = 0; /* 0 by default */ |
| #if !MALI_USE_CSF |
| if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) { |
| pr_debug("%s", "JS_ACTIVE being read"); |
| |
| *value = dummy->job_irq_js_state; |
| } else if (addr == GPU_CONTROL_REG(GPU_ID)) { |
| #else /* !MALI_USE_CSF */ |
| if (addr == GPU_CONTROL_REG(GPU_ID)) { |
| #endif /* !MALI_USE_CSF */ |
| |
| *value = dummy->control_reg_values->gpu_id; |
| } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) { |
| *value = hw_error_status.job_irq_rawstat; |
| pr_debug("%s", "JS_IRQ_RAWSTAT being read"); |
| } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) { |
| *value = hw_error_status.job_irq_status; |
| pr_debug("JS_IRQ_STATUS being read %x", *value); |
| } |
| #if !MALI_USE_CSF |
| else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) { |
| int i; |
| |
| *value = 0; |
| for (i = 0; i < NUM_SLOTS; i++) |
| *value |= dummy->slots[i].job_irq_mask << i; |
| pr_debug("JS_IRQ_MASK being read %x", *value); |
| } |
| #else /* !MALI_USE_CSF */ |
| else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) |
| ; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */ |
| #endif /* !MALI_USE_CSF */ |
| else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { |
| *value = (dummy->reset_completed_mask << 8) | |
| ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | |
| (dummy->power_changed_mask << 9) | (1 << 7) | 1; |
| pr_debug("GPU_IRQ_MASK read %x", *value); |
| } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { |
| *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) | |
| (dummy->reset_completed << 8) | |
| #if !MALI_USE_CSF |
| (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | |
| #endif /* !MALI_USE_CSF */ |
| ((dummy->clean_caches_completed ? 1u : 0u) << 17) | |
| hw_error_status.gpu_error_irq; |
| pr_debug("GPU_IRQ_RAWSTAT read %x", *value); |
| } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { |
| *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) | |
| ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) | |
| ((dummy->reset_completed & dummy->reset_completed_mask) << 8) | |
| #if !MALI_USE_CSF |
| (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | |
| #endif /* !MALI_USE_CSF */ |
| (((dummy->clean_caches_completed && |
| dummy->clean_caches_completed_irq_enabled) ? |
| 1u : |
| 0u) |
| << 17) | |
| hw_error_status.gpu_error_irq; |
| pr_debug("GPU_IRQ_STAT read %x", *value); |
| } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { |
| *value = 0; |
| #if !MALI_USE_CSF |
| } else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) { |
| *value = 0; |
| #endif |
| } else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) { |
| *value = hw_error_status.gpu_fault_status; |
| } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { |
| *value = dummy->l2_config; |
| } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && |
| (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { |
| switch (addr) { |
| case GPU_CONTROL_REG(SHADER_PRESENT_LO): |
| case GPU_CONTROL_REG(SHADER_PRESENT_HI): |
| case GPU_CONTROL_REG(TILER_PRESENT_LO): |
| case GPU_CONTROL_REG(TILER_PRESENT_HI): |
| case GPU_CONTROL_REG(L2_PRESENT_LO): |
| case GPU_CONTROL_REG(L2_PRESENT_HI): |
| case GPU_CONTROL_REG(STACK_PRESENT_LO): |
| case GPU_CONTROL_REG(STACK_PRESENT_HI): |
| *value = get_implementation_register(addr); |
| break; |
| case GPU_CONTROL_REG(SHADER_READY_LO): |
| *value = (dummy->power_on >> 0x02) & |
| get_implementation_register( |
| GPU_CONTROL_REG(SHADER_PRESENT_LO)); |
| break; |
| case GPU_CONTROL_REG(TILER_READY_LO): |
| *value = (dummy->power_on >> 0x01) & |
| get_implementation_register( |
| GPU_CONTROL_REG(TILER_PRESENT_LO)); |
| break; |
| case GPU_CONTROL_REG(L2_READY_LO): |
| *value = dummy->power_on & |
| get_implementation_register( |
| GPU_CONTROL_REG(L2_PRESENT_LO)); |
| break; |
| case GPU_CONTROL_REG(STACK_READY_LO): |
| *value = dummy->stack_power_on_lo & |
| get_implementation_register( |
| GPU_CONTROL_REG(STACK_PRESENT_LO)); |
| break; |
| |
| case GPU_CONTROL_REG(SHADER_READY_HI): |
| case GPU_CONTROL_REG(TILER_READY_HI): |
| case GPU_CONTROL_REG(L2_READY_HI): |
| case GPU_CONTROL_REG(STACK_READY_HI): |
| *value = 0; |
| break; |
| |
| case GPU_CONTROL_REG(SHADER_PWRTRANS_LO): |
| case GPU_CONTROL_REG(SHADER_PWRTRANS_HI): |
| case GPU_CONTROL_REG(TILER_PWRTRANS_LO): |
| case GPU_CONTROL_REG(TILER_PWRTRANS_HI): |
| case GPU_CONTROL_REG(L2_PWRTRANS_LO): |
| case GPU_CONTROL_REG(L2_PWRTRANS_HI): |
| case GPU_CONTROL_REG(STACK_PWRTRANS_LO): |
| case GPU_CONTROL_REG(STACK_PWRTRANS_HI): |
| *value = 0; |
| break; |
| |
| case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO): |
| case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI): |
| case GPU_CONTROL_REG(TILER_PWRACTIVE_LO): |
| case GPU_CONTROL_REG(TILER_PWRACTIVE_HI): |
| case GPU_CONTROL_REG(L2_PWRACTIVE_LO): |
| case GPU_CONTROL_REG(L2_PWRACTIVE_HI): |
| *value = 0; |
| break; |
| |
| #if !MALI_USE_CSF |
| case GPU_CONTROL_REG(JM_CONFIG): |
| #else /* !MALI_USE_CSF */ |
| case GPU_CONTROL_REG(CSF_CONFIG): |
| #endif /* !MALI_USE_CSF */ |
| |
| case GPU_CONTROL_REG(SHADER_CONFIG): |
| case GPU_CONTROL_REG(TILER_CONFIG): |
| case GPU_CONTROL_REG(L2_MMU_CONFIG): |
| *value = 0; |
| break; |
| |
| case GPU_CONTROL_REG(COHERENCY_FEATURES): |
| *value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */ |
| break; |
| case GPU_CONTROL_REG(COHERENCY_ENABLE): |
| *value = dummy->coherency_enable; |
| break; |
| |
| case GPU_CONTROL_REG(THREAD_TLS_ALLOC): |
| *value = 0; |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "Dummy model register access: Reading unknown control reg 0x%x\n", |
| addr); |
| break; |
| } |
| #if !MALI_USE_CSF |
| } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) && |
| (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) { |
| int slot_idx = (addr >> 7) & 0xf; |
| int sub_reg = addr & 0x7F; |
| |
| KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS); |
| switch (sub_reg) { |
| case JS_HEAD_NEXT_LO: |
| *value = (u32) ((hw_error_status.current_jc) & |
| 0xFFFFFFFF); |
| break; |
| case JS_HEAD_NEXT_HI: |
| *value = (u32) (hw_error_status.current_jc >> 32); |
| break; |
| case JS_STATUS: |
| if (hw_error_status.js_status[slot_idx]) |
| *value = hw_error_status.js_status[slot_idx]; |
| else /* 0x08 means active, 0x00 idle */ |
| *value = (dummy->slots[slot_idx].job_active) |
| << 3; |
| break; |
| case JS_COMMAND_NEXT: |
| *value = dummy->slots[slot_idx].job_queued; |
| break; |
| |
| /* The dummy model does not implement these registers |
| * avoid printing error messages |
| */ |
| case JS_HEAD_HI: |
| case JS_HEAD_LO: |
| case JS_TAIL_HI: |
| case JS_TAIL_LO: |
| case JS_FLUSH_ID_NEXT: |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "Dummy model register access: unknown job slot reg 0x%02X being read\n", |
| sub_reg); |
| break; |
| } |
| #endif /* !MALI_USE_CSF */ |
| } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) { |
| *value = dummy->control_reg_values->as_present; |
| #if !MALI_USE_CSF |
| } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) { |
| *value = 0x7; |
| #endif /* !MALI_USE_CSF */ |
| } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) && |
| addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) { |
| switch (addr) { |
| case GPU_CONTROL_REG(TEXTURE_FEATURES_0): |
| *value = 0xfffff; |
| break; |
| |
| case GPU_CONTROL_REG(TEXTURE_FEATURES_1): |
| *value = 0xffff; |
| break; |
| |
| case GPU_CONTROL_REG(TEXTURE_FEATURES_2): |
| *value = 0x9f81ffff; |
| break; |
| |
| case GPU_CONTROL_REG(TEXTURE_FEATURES_3): |
| *value = 0; |
| break; |
| } |
| #if !MALI_USE_CSF |
| } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) && |
| addr <= GPU_CONTROL_REG(JS15_FEATURES)) { |
| switch (addr) { |
| case GPU_CONTROL_REG(JS0_FEATURES): |
| *value = 0x20e; |
| break; |
| |
| case GPU_CONTROL_REG(JS1_FEATURES): |
| *value = 0x1fe; |
| break; |
| |
| case GPU_CONTROL_REG(JS2_FEATURES): |
| *value = 0x7e; |
| break; |
| |
| default: |
| *value = 0; |
| break; |
| } |
| #endif /* !MALI_USE_CSF */ |
| } else if (addr >= GPU_CONTROL_REG(L2_FEATURES) |
| && addr <= GPU_CONTROL_REG(MMU_FEATURES)) { |
| switch (addr) { |
| case GPU_CONTROL_REG(L2_FEATURES): |
| *value = 0x6100206; |
| break; |
| |
| case GPU_CONTROL_REG(CORE_FEATURES): |
| *value = dummy->control_reg_values->core_features; |
| break; |
| |
| case GPU_CONTROL_REG(TILER_FEATURES): |
| *value = dummy->control_reg_values->tiler_features; |
| break; |
| |
| case GPU_CONTROL_REG(MEM_FEATURES): |
| /* Bit 0: Core group is coherent */ |
| *value = 0x01; |
| /* Bits 11:8: L2 slice count - 1 */ |
| *value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8; |
| break; |
| |
| case GPU_CONTROL_REG(MMU_FEATURES): |
| *value = dummy->control_reg_values->mmu_features; |
| break; |
| } |
| } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS) |
| && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) { |
| switch (addr) { |
| case GPU_CONTROL_REG(THREAD_FEATURES): |
| *value = dummy->control_reg_values->thread_features |
| | (IMPLEMENTATION_MODEL << 30); |
| break; |
| case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE): |
| *value = dummy->control_reg_values->thread_max_barrier_size; |
| break; |
| case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE): |
| *value = dummy->control_reg_values->thread_max_workgroup_size; |
| break; |
| case GPU_CONTROL_REG(THREAD_MAX_THREADS): |
| *value = dummy->control_reg_values->thread_max_threads; |
| break; |
| } |
| } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) |
| && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { |
| *value = 0; |
| } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) |
| && addr <= MMU_AS_REG(15, AS_STATUS)) { |
| int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) |
| >> 6; |
| |
| switch (addr & 0x3F) { |
| case AS_TRANSTAB_LO: |
| *value = (u32) |
| (hw_error_status.as_transtab[mem_addr_space] & |
| 0xffffffff); |
| break; |
| |
| case AS_TRANSTAB_HI: |
| *value = (u32) |
| (hw_error_status.as_transtab[mem_addr_space] >> |
| 32); |
| break; |
| |
| case AS_STATUS: |
| *value = 0; |
| break; |
| |
| case AS_FAULTSTATUS: |
| if (mem_addr_space == hw_error_status.faulty_mmu_as) |
| *value = hw_error_status.as_faultstatus[ |
| hw_error_status.faulty_mmu_as]; |
| else |
| *value = 0; |
| break; |
| |
| case AS_LOCKADDR_LO: |
| case AS_LOCKADDR_HI: |
| case AS_MEMATTR_LO: |
| case AS_MEMATTR_HI: |
| case AS_TRANSCFG_LO: |
| case AS_TRANSCFG_HI: |
| /* Read ignored */ |
| *value = 0; |
| break; |
| |
| default: |
| model_error_log(KBASE_CORE, |
| "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n", |
| mem_addr_space, addr); |
| *value = 0; |
| break; |
| } |
| } else if (addr == MMU_REG(MMU_IRQ_MASK)) { |
| *value = hw_error_status.mmu_irq_mask; |
| } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { |
| *value = hw_error_status.mmu_irq_rawstat; |
| } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { |
| *value = hw_error_status.mmu_irq_mask & |
| hw_error_status.mmu_irq_rawstat; |
| } |
| #if MALI_USE_CSF |
| else if (addr == IPA_CONTROL_REG(STATUS)) { |
| *value = (ipa_control_timer_enabled << 31); |
| } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && |
| (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( |
| IPA_CTL_MAX_VAL_CNT_IDX)))) { |
| u32 counter_index = |
| (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; |
| bool is_low_word = |
| !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7); |
| |
| *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, |
| counter_index, is_low_word); |
| } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && |
| (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( |
| IPA_CTL_MAX_VAL_CNT_IDX)))) { |
| u32 counter_index = |
| (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; |
| bool is_low_word = |
| !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7); |
| |
| *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, |
| counter_index, is_low_word); |
| } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && |
| (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( |
| IPA_CTL_MAX_VAL_CNT_IDX)))) { |
| u32 counter_index = |
| (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; |
| bool is_low_word = |
| !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7); |
| |
| *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, |
| counter_index, is_low_word); |
| } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && |
| (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( |
| IPA_CTL_MAX_VAL_CNT_IDX)))) { |
| u32 counter_index = |
| (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; |
| bool is_low_word = |
| !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7); |
| |
| *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, |
| counter_index, is_low_word); |
| } else if (addr == USER_REG(LATEST_FLUSH)) { |
| *value = 0; |
| } |
| #endif |
| else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { |
| *value = dummy->control_reg_values->gpu_features_lo; |
| } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) { |
| *value = dummy->control_reg_values->gpu_features_hi; |
| } else { |
| model_error_log(KBASE_CORE, |
| "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n", |
| addr); |
| *value = 0; |
| } |
| |
| spin_unlock_irqrestore(&hw_error_status.access_lock, flags); |
| CSTD_UNUSED(dummy); |
| |
| return 1; |
| } |
| |
| static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset, |
| u32 usr_data_size, u32 core_count) |
| { |
| u32 sample_size; |
| u32 *usr_data = NULL; |
| |
| lockdep_assert_held(&performance_counters.access_lock); |
| |
| sample_size = |
| core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32); |
| |
| if ((usr_data_size >= usr_data_offset) && |
| (sample_size <= usr_data_size - usr_data_offset)) |
| usr_data = usr_data_start + (usr_data_offset / sizeof(u32)); |
| |
| if (!usr_data) |
| model_error_log(KBASE_CORE, "Unable to set counter sample 1"); |
| else { |
| u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE; |
| u32 i; |
| |
| for (i = 0; i < loop_cnt; i++) { |
| counters[i] = usr_data[i]; |
| } |
| } |
| |
| return usr_data_offset + sample_size; |
| } |
| |
| static u32 set_kernel_sample_core_type(u64 *counters, |
| u64 *usr_data_start, u32 usr_data_offset, |
| u32 usr_data_size, u32 core_count) |
| { |
| u32 sample_size; |
| u64 *usr_data = NULL; |
| |
| lockdep_assert_held(&performance_counters.access_lock); |
| |
| sample_size = |
| core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64); |
| |
| if ((usr_data_size >= usr_data_offset) && |
| (sample_size <= usr_data_size - usr_data_offset)) |
| usr_data = usr_data_start + (usr_data_offset / sizeof(u64)); |
| |
| if (!usr_data) |
| model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1"); |
| else |
| memcpy(counters, usr_data, sample_size); |
| |
| return usr_data_offset + sample_size; |
| } |
| |
| /* Counter values injected through ioctl are of 32 bits */ |
| int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size) |
| { |
| unsigned long flags; |
| u32 *user_data; |
| u32 offset = 0; |
| |
| if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32)) |
| return -EINVAL; |
| |
| /* copy_from_user might sleep so can't be called from inside a spinlock |
| * allocate a temporary buffer for user data and copy to that before taking |
| * the lock |
| */ |
| user_data = kmalloc(size, GFP_KERNEL); |
| if (!user_data) |
| return -ENOMEM; |
| |
| if (copy_from_user(user_data, data, size)) { |
| model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace"); |
| kfree(user_data); |
| return -EINVAL; |
| } |
| |
| spin_lock_irqsave(&performance_counters.access_lock, flags); |
| #if !MALI_USE_CSF |
| offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset, |
| size, 1); |
| #else |
| offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset, |
| size, 1); |
| #endif /* !MALI_USE_CSF */ |
| offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset, |
| size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); |
| offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset, |
| size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS); |
| offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset, |
| size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES); |
| spin_unlock_irqrestore(&performance_counters.access_lock, flags); |
| |
| kfree(user_data); |
| return 0; |
| } |
| |
| /* Counter values injected through kutf are of 64 bits */ |
| void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size) |
| { |
| unsigned long flags; |
| u32 offset = 0; |
| |
| spin_lock_irqsave(&performance_counters.access_lock, flags); |
| #if !MALI_USE_CSF |
| offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size, |
| 1); |
| #else |
| offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size, |
| 1); |
| #endif /* !MALI_USE_CSF */ |
| offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset, |
| size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT)); |
| offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size, |
| hweight64(performance_counters.l2_present)); |
| offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset, |
| size, hweight64(performance_counters.shader_present)); |
| spin_unlock_irqrestore(&performance_counters.access_lock, flags); |
| } |
| KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample); |
| |
| void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev, |
| u64 *l2_present, u64 *shader_present) |
| { |
| if (shader_present) |
| *shader_present = performance_counters.shader_present; |
| if (l2_present) |
| *l2_present = performance_counters.l2_present; |
| } |
| KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores); |
| |
| void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev, |
| u64 l2_present, u64 shader_present) |
| { |
| if (WARN_ON(!l2_present || !shader_present |
| || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS |
| || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES)) |
| return; |
| |
| performance_counters.l2_present = l2_present; |
| performance_counters.shader_present = shader_present; |
| |
| /* Update the GPU properties used by vinstr to calculate the counter |
| * dump buffer size. |
| */ |
| kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present); |
| kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present; |
| kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present); |
| kbdev->gpu_props.curr_config.shader_present = shader_present; |
| } |
| KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores); |
| |
| int gpu_model_control(void *model, |
| struct kbase_model_control_params *params) |
| { |
| struct dummy_model_t *dummy = (struct dummy_model_t *)model; |
| int i; |
| unsigned long flags; |
| |
| if (params->command == KBASE_MC_DISABLE_JOBS) { |
| for (i = 0; i < NUM_SLOTS; i++) |
| dummy->slots[i].job_disabled = params->value; |
| } else { |
| return -EINVAL; |
| } |
| |
| spin_lock_irqsave(&hw_error_status.access_lock, flags); |
| midgard_model_update(dummy); |
| midgard_model_get_outputs(dummy); |
| spin_unlock_irqrestore(&hw_error_status.access_lock, flags); |
| |
| return 0; |
| } |