blob: 9082b17d461d36d71dce85d189e4c2e46d147e14 [file] [log] [blame] [edit]
From 409c23d5af5fe4ef64d3d8f3de77bf851313ef23 Mon Sep 17 00:00:00 2001
From: Akash Goel <akash.goel@arm.com>
Date: Tue, 16 Apr 2024 09:26:41 +0100
Subject: [PATCH] GPUCORE-42157: Fix for race between shrinker and map
MMU insert pages operations have been changed to re-walk the MMU page
table every time a new PGD is allocated, because the allocation of a PGD
requires releasing and reacquiring the MMU lock,
which may allow concurrent modifications to the MMU page table itself.
In particular, memory shrinker callbacks can be invoked while memory
mapping operations are in progress, leading to potential race conditions.
TI2: 1168570 (PLAN-11965r1094 DDK Precommit)
TI2: 1168571 (PLAN-12467r825 TGT CS Nightly)
Change-Id: I9881b2ba342e59487fe93dcf2d00f09af4d69ed8
---
diff --git a/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c b/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c
index 5f23408..74f8852 100644
--- a/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c
+++ b/product/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c
@@ -2322,6 +2322,8 @@
* @level_high: The higher bound for the levels for which the PGD allocs are required
* @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
* newly allocated PGD addresses to.
+ * @pool_grown: True if new PGDs required the memory pool to grow to allocate more pages,
+ * or false otherwise
*
* Numerically, level_low < level_high, not to be confused with top level and
* bottom level concepts for MMU PGDs. They are only used as low and high bounds
@@ -2332,19 +2334,22 @@
* * -ENOMEM - allocation failed for a PGD.
*/
static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- phys_addr_t *new_pgds, int level_low, int level_high)
+ phys_addr_t *new_pgds, int level_low, int level_high,
+ bool *pool_grown)
{
int err = 0;
int i;
lockdep_assert_held(&mmut->mmu_lock);
+ *pool_grown = false;
for (i = level_low; i <= level_high; i++) {
+ if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
+ continue;
do {
new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
break;
-
mutex_unlock(&mmut->mmu_lock);
err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
(size_t)level_high, NULL);
@@ -2352,17 +2357,9 @@
if (err) {
dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
__func__, err);
-
- /* Free all PGDs allocated in previous successful iterations
- * from (i-1) to level_low
- */
- for (i = (i - 1); i >= level_low; i--) {
- if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
- kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
- }
-
return err;
}
+ *pool_grown = true;
} while (1);
}
@@ -2392,6 +2389,8 @@
if (WARN_ON(kctx == NULL))
return -EINVAL;
+ lockdep_assert_held(&kctx->reg_lock);
+
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
@@ -2427,6 +2426,7 @@
struct page *p;
register unsigned int num_of_valid_entries;
bool newly_created_pgd = false;
+ bool pool_grown;
if (count > remain)
count = remain;
@@ -2434,6 +2434,10 @@
cur_level = MIDGARD_MMU_BOTTOMLEVEL;
insert_level = cur_level;
+ for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
+ new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
+
+repeat_page_table_walk:
/*
* Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
@@ -2448,7 +2452,7 @@
if (err) {
dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
__func__, err);
- goto fail_unlock;
+ goto fail_unlock_free_pgds;
}
/* No valid pgd at cur_level */
@@ -2457,9 +2461,12 @@
* down to the lowest valid pgd at insert_level
*/
err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
- cur_level);
+ cur_level, &pool_grown);
if (err)
- goto fail_unlock;
+ goto fail_unlock_free_pgds;
+
+ if (pool_grown)
+ goto repeat_page_table_walk;
newly_created_pgd = true;
@@ -2545,9 +2552,9 @@
fail_unlock_free_pgds:
/* Free the pgds allocated by us from insert_level+1 to bottom level */
for (l = cur_level; l > insert_level; l--)
- kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+ if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
-fail_unlock:
if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
/* Invalidate the pages we have partially completed */
mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,
@@ -2698,6 +2705,9 @@
int l, cur_level, insert_level;
struct tagged_addr *start_phys = phys;
+ if (mmut->kctx)
+ lockdep_assert_held(&mmut->kctx->reg_lock);
+
/* Note that 0 is a valid start_vpfn */
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
@@ -2720,6 +2730,7 @@
register unsigned int num_of_valid_entries;
bool newly_created_pgd = false;
enum kbase_mmu_op_type flush_op;
+ bool pool_grown;
if (count > remain)
count = remain;
@@ -2739,6 +2750,10 @@
insert_level = cur_level;
+ for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
+ new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
+
+repeat_page_table_walk:
/*
* Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
@@ -2753,7 +2768,7 @@
if (err) {
dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
__func__, err);
- goto fail_unlock;
+ goto fail_unlock_free_pgds;
}
/* No valid pgd at cur_level */
@@ -2762,9 +2777,12 @@
* down to the lowest valid pgd at insert_level
*/
err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
- cur_level);
+ cur_level, &pool_grown);
if (err)
- goto fail_unlock;
+ goto fail_unlock_free_pgds;
+
+ if (pool_grown)
+ goto repeat_page_table_walk;
newly_created_pgd = true;
@@ -2878,9 +2896,9 @@
fail_unlock_free_pgds:
/* Free the pgds allocated by us from insert_level+1 to bottom level */
for (l = cur_level; l > insert_level; l--)
- kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+ if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
-fail_unlock:
if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
/* Invalidate the pages we have partially completed */
mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,