blob: 9fb1ca73f5e10bc80997f0437d72a29a60e22ce2 [file] [log] [blame] [edit]
From 8afe30e44e0d7bb4221c6f2d18d74005b29637f7 Mon Sep 17 00:00:00 2001
From: Piotr Roszkowski <piotr.roszkowski@arm.com>
Date: Tue, 02 Apr 2024 10:09:03 +0100
Subject: [PATCH] GPUCORE-41945 Failed page migration can lead to use after free
During page migration process, last step that is GPU MMU Cache
invalidate, can fail and error is returned without
page migration rollback.
GPU cache invalidate can fail but if it fails GPU is reset.
Expected outcome of this cache invalidate is to prevent of
GPU MMU to use outdated page table and load new one from RAM.
Both GPU cache invalidate and GPU reset caused the same:
GPU MMU load page table from RAM.
Simple solution of ignore GPU Cache flush error shall be enough
to prevent issue described in ticket.
This patch-set change GPU cache flush error handling and
add waits for GPU Reset Done if occurred before returning
from page migration to be sure if GPU was reset,
now is operational again.
TI2: 1149975 (PLAN-11965r1062 DDK Precommit)
TI2: 1149974 (PLAN-12467r798 TGT CS Nightly)
Change-Id: I7845f7118ff72ae9b97c5be3c80fdecaa3c52015
---
diff --git a/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c b/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c
index cc120c8..c6f96a81 100644
--- a/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c
+++ b/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c
@@ -3968,24 +3968,30 @@
/* Unlock MMU region.
*
- * Notice that GPUs which don't issue flush commands via GPU control
- * still need an additional GPU cache flush here, this time only
- * for the page table, because the function call above to sync PGDs
- * won't have any effect on them.
+ * For GPUs without FLUSH_PA_RANGE support, the GPU caches were completely
+ * cleaned and invalidated after locking the virtual address range affected
+ * by the migration. As long as the lock is in place, GPU access to the
+ * locked range would remain blocked. So there is no need to clean and
+ * invalidate the GPU caches again after the copying the page contents
+ * of old page and updating the page table entry to point to new page.
+ *
+ * For GPUs with FLUSH_PA_RANGE support, the contents of old page would
+ * have been evicted from the GPU caches after locking the virtual address
+ * range. The page table entry contents also would have been invalidated
+ * from the GPU's L2 cache by kbase_mmu_sync_pgd() after the page table
+ * update.
+ *
+ * If kbase_mmu_hw_do_unlock_no_addr() fails, GPU reset will be triggered which
+ * would remove the MMU lock and so there is no need to rollback page migration
+ * and the failure can be ignored.
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
int as_nr = mmut->kctx->as_nr;
struct kbase_as *as = &kbdev->as[as_nr];
+ int local_ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
- if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
- } else {
- ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
- GPU_COMMAND_CACHE_CLN_INV_L2);
- if (!ret)
- ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
- }
+ CSTD_UNUSED(local_ret);
}
/* Release the transition prevention in L2 by ending the transaction */
@@ -3994,12 +4000,6 @@
/* Releasing locks before checking the migration transaction error state */
mutex_unlock(&kbdev->mmu_hw_mutex);
- /* Checking the final migration transaction error state */
- if (ret < 0) {
- dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
- goto undo_mappings;
- }
-
/* Undertaking metadata transfer, while we are holding the mmu_lock */
spin_lock(&page_md->migrate_lock);
if (level == MIDGARD_MMU_BOTTOMLEVEL) {