drivers/memory_ext/aml_cma.c - manifest_repos/kernel - Git at Google

 // SPDX-License-Identifier: (GPL-2.0+ OR MIT)
 /*
  * drivers/amlogic/memory_ext/aml_cma.c
  *
  * Copyright (C) 2017 Amlogic, Inc. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
  */

 #include <internal.h>
 #include <linux/stddef.h>
 #include <linux/compiler.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/rmap.h>
 #include <linux/kthread.h>
 #include <linux/sched/rt.h>
 #include <linux/completion.h>
 #include <linux/module.h>
 #include <linux/swap.h>
 #include <linux/migrate.h>
 #include <linux/cpu.h>
 #include <linux/page-isolation.h>
 #include <linux/spinlock_types.h>
 #include <linux/amlogic/aml_cma.h>
 #include <linux/sched/signal.h>
 #include <linux/hugetlb.h>
 #include <linux/cma.h>
 #include <linux/proc_fs.h>
 #include <linux/platform_device.h>
 #include <linux/sched/clock.h>
 #include <linux/oom.h>
 #include <linux/of.h>
 #include <linux/shrinker.h>
 #include <linux/vmalloc.h>
 #include <asm/system_misc.h>
 #include <asm/pgtable.h>
 #include <linux/page_pinner.h>
 #include <trace/events/page_isolation.h>
 #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_ANDROID_VENDOR_HOOKS)
 #include <trace/hooks/iommu.h>
 #endif

 /* from mm/ path */
 #include <internal.h>

 #ifdef CONFIG_AMLOGIC_PAGE_TRACE
 #include <linux/amlogic/page_trace.h>
 #endif /* CONFIG_AMLOGIC_PAGE_TRACE */

 #ifdef CONFIG_AMLOGIC_USER_FAULT
 #include <linux/amlogic/user_fault.h>
 #endif

 #define MAX_DEBUG_LEVEL		5

 struct work_cma {
 	struct list_head list;
 	unsigned long pfn;
 	unsigned long count;
 	struct task_struct *host;
 	int ret;
 };

 struct cma_pcp {
 	struct list_head list;
 	struct completion start;
 	struct completion end;
 	struct task_struct *task;
 	spinlock_t  list_lock;		/* protect job list */
 	int cpu;
 };

 static DEFINE_MUTEX(cma_mutex);
 static atomic_long_t nr_cma_allocated;
 static bool can_boost;
 static DEFINE_PER_CPU(struct cma_pcp, cma_pcp_thread);
 static struct proc_dir_entry *dentry;
 static int cma_alloc_trace;
 int cma_debug_level;
 static int allow_cma_tasks;
 static unsigned long cma_isolated;

 static atomic_t cma_allocate;

 #ifdef CONFIG_AMLOGIC_CMA_DIS
 unsigned long ion_cma_allocated;
 #endif
 /*
  * We insert a none-mapping vm area to vmalloc space
  * and dynamic adjust it's size according nr_cma_allocated.
  * Just in order to let all driver allocated cma size counted
  * into KernelUsed item for dumpsys meminfo command on Android
  * layer
  */

 static int cma_alloc_ref(void)
 {
 	return atomic_read(&cma_allocate);
 }

 static void get_cma_alloc_ref(void)
 {
 	atomic_inc(&cma_allocate);
 }

 static void put_cma_alloc_ref(void)
 {
 	atomic_dec(&cma_allocate);
 }

 unsigned long get_cma_allocated(void)
 {
 	return atomic_long_read(&nr_cma_allocated);
 }
 EXPORT_SYMBOL(get_cma_allocated);

 static bool cma_first_wm_low __read_mostly;

 static int __init early_cma_first_wm_low_param(char *buf)
 {
 	if (!buf)
 		return -EINVAL;

 	if (strcmp(buf, "off") == 0)
 		cma_first_wm_low = false;
 	else if (strcmp(buf, "on") == 0)
 		cma_first_wm_low = true;

 	pr_info("cma_first_wm_low %sabled\n", cma_first_wm_low ? "en" : "dis");

 	return 0;
 }

 early_param("cma_first_wm_low", early_cma_first_wm_low_param);

 void check_cma_isolated(unsigned long *isolate,
 			unsigned long active, unsigned long inactive)
 {
 	long tmp;
 	unsigned long raw = *isolate;

 	tmp = *isolate - cma_isolated;
 	if (tmp < 0)
 		*isolate = 0;
 	else
 		*isolate = tmp;

 	WARN_ONCE(*isolate > (inactive + active) / 2,
 		  "isolated:%ld, cma:%ld, inactive:%ld, active:%ld\n",
 		  raw, cma_isolated, inactive, active);
 }

 bool can_use_cma(gfp_t gfp_flags)
 {
 	if (unlikely(!cma_first_wm_low))
 		return false;

 	if (cma_forbidden_mask(gfp_flags))
 		return false;

 	if (cma_alloc_ref())
 		return false;

 	if (task_nice(current) > 0)
 		return false;

 	return true;
 }
 EXPORT_SYMBOL(can_use_cma);

 void update_gfp_flags(gfp_t *gfp)
 {
 	/*
 	 * There are 2 bit flags in gfp:
 	 * __GFP_CMA: indicate to get CMA page from buddy system
 	 * __GFP_NO_CMA: indicate not get CMA page from buddy system
 	 *
 	 * Kernel only allow ZRAM/ANON pages use cma, but cma pool usually
 	 * very large, if file cache can't use cma(movable) then it will
 	 * cause system hung. So we should use cma with all movable page but
 	 * filter some special case which may cause CMA allocation fail by
 	 * __GFP_NO_CMA.
 	 */
 	if (can_use_cma(*gfp))
 		*gfp |=  __GFP_CMA;
 	else
 		*gfp &= ~__GFP_CMA;
 }

 #define ACTIVE_MIGRATE		3
 #define INACTIVE_MIGRATE	(ACTIVE_MIGRATE * 4)
 static int filecache_need_migrate(struct page *page)
 {
 	if (PageActive(page) && page_mapcount(page) >= ACTIVE_MIGRATE)
 		return 1;

 	if (!PageActive(page) && page_mapcount(page) >= INACTIVE_MIGRATE)
 		return 1;

 	if (PageUnevictable(page))
 		return 0;

 	return 0;
 }

 void cma_keep_high_active(struct page *page, struct list_head *high,
 			  struct list_head *clean)
 {
 	if (filecache_need_migrate(page)) {
 		/*
 		 * leaving pages with high map count to migrate
 		 * instead of reclaimed. This can help to avoid
 		 * file cache jolt if reclaim large cma size
 		 */
 		list_move(&page->lru, high);
 	} else {
 		ClearPageActive(page);
 		list_move(&page->lru, clean);
 	}
 }

 bool cma_page(struct page *page)
 {
 	int migrate_type = 0;

 	if (!page)
 		return false;
 	migrate_type = get_pageblock_migratetype(page);
 	if (is_migrate_cma(migrate_type) ||
 	    is_migrate_isolate(migrate_type)) {
 		return true;
 	}
 	return false;
 }
 EXPORT_SYMBOL(cma_page);

 #ifdef CONFIG_AMLOGIC_PAGE_TRACE
 static void update_cma_page_trace(struct page *page, unsigned long cnt)
 {
 	long i;
 	unsigned long fun;

 	if (!page)
 		return;

 	fun = find_back_trace();
 	if (cma_alloc_trace)
 		pr_info("c a p:%lx, c:%ld, f:%ps\n",
 			page_to_pfn(page), cnt, (void *)fun);
 	for (i = 0; i < cnt; i++) {
 		set_page_trace(page, 0, __GFP_NO_CMA, (void *)fun);
 		page++;
 	}
 }
 #endif /* CONFIG_AMLOGIC_PAGE_TRACE */

 void aml_cma_alloc_pre_hook(int *dummy, int count, unsigned long *tick)
 {
 	get_cma_alloc_ref();

 	/* temporary increase task priority if allocate many pages */
 	*dummy = task_nice(current);
 	*tick  = sched_clock();
 	if (count >= (pageblock_nr_pages / 2))
 		set_user_nice(current, -18);
 #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_ANDROID_VENDOR_HOOKS) && defined(CONFIG_ARM64)
 	trace_android_vh_iommu_iovad_free_iova((struct iova_domain *)mte_sync_tags,
 			0, (size_t)&init_mm);
 #endif
 }

 void aml_cma_alloc_post_hook(int *dummy, int count, struct page *page,
 			     unsigned long tick, int ret)
 {
 	put_cma_alloc_ref();
 	if (page)
 		atomic_long_add(count, &nr_cma_allocated);

 	if (count >= (pageblock_nr_pages / 2))
 		set_user_nice(current, *dummy);
 	cma_debug(0, NULL, "return page:%lx, tick:%16ld, ret:%d\n",
 		  page ? page_to_pfn(page) : 0, (unsigned long)sched_clock() - tick, ret);
 #ifdef CONFIG_AMLOGIC_PAGE_TRACE
 	update_cma_page_trace(page, count);
 #endif /* CONFIG_AMLOGIC_PAGE_TRACE */
 }

 void check_water_mark(long free_pages, unsigned long mark)
 {
 	/* already set */
 	if (likely(cma_first_wm_low))
 		return;

 	/* cma first after system boot, dont care the watermark when:
 	 * 1) total ram is smaller than 1G or
 	 * 2) cma pages is more than 30% of totalram  (total - reserved)
 	 */
 	if ((totalcma_pages * 10 > 3 * totalram_pages() ||
 	     totalram_pages() < 262144) && system_state == SYSTEM_RUNNING) {
 		cma_first_wm_low = true;
 		pr_info("Now can use cma1, free:%ld, wm:%ld, cma:%ld, total:%ld\n",
 			free_pages, mark, totalcma_pages, totalram_pages());
 	}
 	if (free_pages <= mark && free_pages > 0) {
 		/* do not using cma until water mark is low */
 		cma_first_wm_low = true;
 		pr_info("Now can use cma, free:%ld, wm:%ld\n",
 			free_pages, mark);
 	}
 }

 #ifdef CONFIG_ARM64
 static int clear_cma_pagemap(unsigned long pfn, unsigned long count)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	unsigned long addr, end;
 	struct mm_struct *mm;

 	addr = (unsigned long)pfn_to_kaddr(pfn);
 	end  = addr + count * PAGE_SIZE;
 	mm = &init_mm;
 	for (; addr < end; addr += PMD_SIZE) {
 		pgd = pgd_offset(mm, addr);
 		if (pgd_none(*pgd) || pgd_bad(*pgd))
 			break;

 		p4d = p4d_offset(pgd, addr);
 		if (p4d_none(*p4d) || p4d_bad(*p4d))
 			break;

 		pud = pud_offset(p4d, addr);
 		if (pud_none(*pud) || pud_bad(*pud))
 			break;

 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd))
 			break;

 		pr_debug("%s, addr:%lx, pgd:%p %llx, pmd:%p %llx\n",
 			 __func__, addr, pgd,
 			 pgd_val(*pgd), pmd, pmd_val(*pmd));
 		pmd_clear(pmd);
 	}

 	return 0;
 }
 #endif

 int setup_cma_full_pagemap(unsigned long pfn, unsigned long count)
 {
 #ifdef CONFIG_ARM
 	/*
 	 * arm already create level 3 mmu mapping for lowmem cma.
 	 * And if high mem cma, there is no mapping. So nothing to
 	 * do for arch arm.
 	 */
 	return 0;
 #elif defined(CONFIG_ARM64)
 	struct vm_area_struct vma = {};
 	unsigned long addr, size;
 	int ret;

 	clear_cma_pagemap(pfn, count);
 	addr = (unsigned long)pfn_to_kaddr(pfn);
 	size = count * PAGE_SIZE;
 	vma.vm_mm    = &init_mm;
 	vma.vm_start = addr;
 	vma.vm_end   = addr + size;
 	vma.vm_page_prot = PAGE_KERNEL;
 	ret = remap_pfn_range(&vma, addr, pfn,
 			      size, vma.vm_page_prot);
 	if (ret < 0)
 		pr_info("%s, remap pte failed:%d, cma:%lx\n",
 			__func__, ret, pfn);
 	return 0;
 #else
 	#error "NOT supported ARCH"
 #endif
 }
 EXPORT_SYMBOL(setup_cma_full_pagemap);

 int cma_mmu_op(struct page *page, int count, bool set)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long addr, end;
 	struct mm_struct *mm;
 	struct cma *cma;

 	if (!page || PageHighMem(page))
 		return -EINVAL;

 	/* TODO: owner must make sure this cma pool have called
 	 * setup_cma_full_pagemap before call this function
 	 */
 	if (!cma_page(page)) {
 		pr_debug("%s, page:%lx is not cma or no clear-map, cma:%px\n",
 			 __func__, page_to_pfn(page), cma);
 		return -EINVAL;
 	}

 	addr = (unsigned long)page_address(page);
 	end  = addr + count * PAGE_SIZE;
 	mm = &init_mm;
 	for (; addr < end; addr += PAGE_SIZE) {
 		pgd = pgd_offset(mm, addr);
 		if (pgd_none(*pgd) || pgd_bad(*pgd))
 			break;

 		p4d = p4d_offset(pgd, addr);
 		if (p4d_none(*p4d) || p4d_bad(*p4d))
 			break;

 		pud = pud_offset(p4d, addr);
 		if (pud_none(*pud) || pud_bad(*pud))
 			break;

 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd))
 			break;

 		pte = pte_offset_map(pmd, addr);
 		if (set)
 			set_pte_at(mm, addr, pte, mk_pte(page, PAGE_KERNEL));
 		else
 			pte_clear(mm, addr, pte);
 		pte_unmap(pte);
 	#ifdef CONFIG_ARM
 		pr_debug("%s, add:%lx, pgd:%p %x, pmd:%p %x, pte:%p %x\n",
 			 __func__, addr, pgd, (int)pgd_val(*pgd),
 			 pmd, (int)pmd_val(*pmd), pte, (int)pte_val(*pte));
 	#elif defined(CONFIG_ARM64)
 		pr_debug("%s, add:%lx, pgd:%p %llx, pmd:%p %llx, pte:%p %llx\n",
 			 __func__, addr, pgd, pgd_val(*pgd),
 			 pmd, pmd_val(*pmd), pte, pte_val(*pte));
 	#endif
 		page++;
 	}
 	return 0;
 }

 void check_page_to_cma(struct compact_control *cc, struct page *page)
 {
 	struct address_space *mapping;

 	/* no need check once it is true */
 	if (test_bit(FORBID_TO_CMA_BIT, &cc->total_migrate_scanned))
 		return;

 	mapping = page_mapping(page);
 	if ((unsigned long)mapping & PAGE_MAPPING_ANON)
 		mapping = NULL;

 	if (PageKsm(page) && !PageSlab(page))
 		__set_bit(FORBID_TO_CMA_BIT, &cc->total_migrate_scanned);

 	if (mapping && cma_forbidden_mask(mapping_gfp_mask(mapping)))
 		__set_bit(FORBID_TO_CMA_BIT, &cc->total_migrate_scanned);
 }

 static int can_migrate_to_cma(struct page *page)
 {
 	struct address_space *mapping;

 	mapping = page_mapping(page);
 	if ((unsigned long)mapping & PAGE_MAPPING_ANON)
 		mapping = NULL;

 	if (PageKsm(page) && !PageSlab(page))
 		return 0;

 	if (mapping && cma_forbidden_mask(mapping_gfp_mask(mapping)))
 		return 0;

 	return 1;
 }

 struct page *get_compact_page(struct page *migratepage,
 			      struct compact_control *cc)
 {
 	int can_to_cma, find = 0;
 	struct page *page, *next;

 	can_to_cma = can_migrate_to_cma(migratepage);
 	if (!can_to_cma) {
 		list_for_each_entry_safe(page, next, &cc->freepages, lru) {
 			if (!cma_page(page)) {
 				list_del(&page->lru);
 				cc->nr_freepages--;
 				find = 1;
 				break;
 			}
 		}
 		if (!find)
 			return NULL;
 	} else {
 		page = list_entry(cc->freepages.next, struct page, lru);
 		list_del(&page->lru);
 		cc->nr_freepages--;
 	}
 	return page;
 }

 /* cma alloc/free interface */

 static unsigned long pfn_max_align_down(unsigned long pfn)
 {
 	return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES,
 			     pageblock_nr_pages) - 1);
 }

 #if CONFIG_AMLOGIC_KERNEL_VERSION < 14515
 static unsigned long pfn_max_align_up(unsigned long pfn)
 {
 	return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES,
 				pageblock_nr_pages));
 }
 #endif

 static struct page *get_migrate_page(struct page *page, unsigned long private)
 {
 	struct migration_target_control *mtc;
 	gfp_t gfp_mask;
 	unsigned int order = 0;
 	struct page *new_page = NULL;
 	int nid;
 	int zidx;

 	mtc = (struct migration_target_control *)private;
 	gfp_mask = mtc->gfp_mask | __GFP_NO_CMA;
 	nid = mtc->nid;
 	if (nid == NUMA_NO_NODE)
 		nid = page_to_nid(page);

 	/*
 	 * TODO: allocate a destination hugepage from a nearest neighbor node,
 	 * accordance with memory policy of the user process if possible. For
 	 * now as a simple work-around, we use the next node for destination.
 	 */
 	if (PageHuge(page)) {
 		struct hstate *h = page_hstate(compound_head(page));

 		gfp_mask |= htlb_modify_alloc_mask(h, gfp_mask);
 		new_page = alloc_huge_page_nodemask(h,
 					       page_to_nid(page),
 					       0, gfp_mask);
 	#ifdef CONFIG_AMLOGIC_PAGE_TRACE
 	#ifdef CONFIG_HUGETLB_PAGE
 		replace_page_trace(new_page, page);
 	#endif
 	#endif
 		return new_page;
 	}

 	if (PageTransHuge(page)) {
 		/*
 		 * clear __GFP_RECLAIM to make the migration callback
 		 * consistent with regular THP allocations.
 		 */
 		gfp_mask &= ~__GFP_RECLAIM;
 		gfp_mask |= GFP_TRANSHUGE;
 		order = HPAGE_PMD_ORDER;
 	}
 	zidx = zone_idx(page_zone(page));
 	if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
 		gfp_mask |= __GFP_HIGHMEM;

 	new_page = __alloc_pages(gfp_mask, order, nid, mtc->nmask);

 	if (new_page && PageTransHuge(new_page))
 		prep_transhuge_page(new_page);

 #ifdef CONFIG_AMLOGIC_PAGE_TRACE
 	replace_page_trace(new_page, page);
 #endif
 	return new_page;
 }

 #if defined(CONFIG_DYNAMIC_DEBUG) || \
 	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
 /* Usage: See admin-guide/dynamic-debug-howto.rst */
 static void alloc_contig_dump_pages(struct list_head *page_list)
 {
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, "migrate failure");

 	if (DYNAMIC_DEBUG_BRANCH(descriptor)) {
 		struct page *page;

 		dump_stack();
 		list_for_each_entry(page, page_list, lru)
 			dump_page(page, "migration failure");
 	}
 }
 #else
 static inline void alloc_contig_dump_pages(struct list_head *page_list)
 {
 }
 #endif

 /* [start, end) must belong to a single zone. */
 static int aml_alloc_contig_migrate_range(struct compact_control *cc,
 					  unsigned long start,
 					  unsigned long end, bool boost,
 					  struct task_struct *host)
 {
 	/* This function is based on compact_zone() from compaction.c. */
 	unsigned long nr_reclaimed;
 	unsigned long pfn = start;
 	unsigned int tries = 0;
 	int ret = 0;
 	struct migration_target_control mtc = {
 		.nid = zone_to_nid(cc->zone),
 		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
 	};

 	lru_cache_disable();
 	while (pfn < end || !list_empty(&cc->migratepages)) {
 		if (fatal_signal_pending(host)) {
 			ret = -EINTR;
 			break;
 		}

 		if (list_empty(&cc->migratepages)) {
 			cc->nr_migratepages = 0;
 			ret = isolate_migratepages_range(cc, pfn, end);
 			if (ret && ret != -EAGAIN) {
 				cma_debug(1, NULL, " iso migrate page fail, ret:%d\n",
 					ret);
 				break;
 			}
 			pfn = cc->migrate_pfn;
 			tries = 0;
 		} else if (++tries == 5) {
 			ret = -EBUSY;
 			break;
 		}

 		nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,
 							     &cc->migratepages);
 		cc->nr_migratepages -= nr_reclaimed;

 		ret = migrate_pages(&cc->migratepages, get_migrate_page,
 			NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);

 		/*
 		 * On -ENOMEM, migrate_pages() bails out right away. It is pointless
 		 * to retry again over this error, so do the same here.
 		 */
 		if (ret == -ENOMEM)
 			break;
 	}

 	lru_cache_enable();
 	if (ret < 0) {
 		if (ret == -EBUSY) {
 			struct page *page;

 			alloc_contig_dump_pages(&cc->migratepages);
 			list_for_each_entry(page, &cc->migratepages, lru) {
 				/* The page will be freed by putback_movable_pages soon */
 				if (page_count(page) == 1)
 					continue;
 				page_pinner_failure_detect(page);
 			}
 		}
 		putback_movable_pages(&cc->migratepages);
 		return ret;
 	}
 	return 0;
 }

 static int cma_boost_work_func(void *cma_data)
 {
 	struct cma_pcp *c_work;
 	struct work_cma *job;
 	unsigned long pfn, end;
 	int ret = -1;
 	int this_cpu;
 	struct compact_control cc = {
 		.nr_migratepages = 0,
 		.order = -1,
 		.mode = MIGRATE_SYNC,
 		.ignore_skip_hint = true,
 		.no_set_skip_hint = true,
 		.gfp_mask = GFP_KERNEL,
 		.alloc_contig = true,
 	};

 	c_work  = (struct cma_pcp *)cma_data;
 	for (;;) {
 		ret = wait_for_completion_interruptible(&c_work->start);
 		if (ret < 0) {
 			pr_err("%s wait for task %d is %d\n",
 			       __func__, c_work->cpu, ret);
 			continue;
 		}
 		this_cpu = get_cpu();
 		put_cpu();
 		if (this_cpu != c_work->cpu) {
 			pr_err("%s, cpu %d is not work cpu:%d\n",
 			       __func__, this_cpu, c_work->cpu);
 		}
 		spin_lock(&c_work->list_lock);
 		if (list_empty(&c_work->list)) {
 			/* NO job todo ? */
 			pr_err("%s,%d, list empty\n", __func__, __LINE__);
 			spin_unlock(&c_work->list_lock);
 			goto next;
 		}
 		job = list_first_entry(&c_work->list, struct work_cma, list);
 		list_del(&job->list);
 		spin_unlock(&c_work->list_lock);

 		INIT_LIST_HEAD(&cc.migratepages);
 		lru_add_drain();
 		pfn      = job->pfn;
 		cc.zone  = page_zone(pfn_to_page(pfn));
 		end      = pfn + job->count;
 		ret      = aml_alloc_contig_migrate_range(&cc, pfn, end,
 							  1, job->host);
 		job->ret = ret;
 		if (ret)
 			cma_debug(1, NULL, "failed, ret:%d\n", ret);
 next:
 		complete(&c_work->end);
 		if (kthread_should_stop()) {
 			pr_err("%s task exit\n", __func__);
 			break;
 		}
 	}
 	return 0;
 }

 static int __init init_cma_boost_task(void)
 {
 	int cpu;
 	struct task_struct *task;
 	struct cma_pcp *work;
 	char task_name[20] = {};

 	for_each_possible_cpu(cpu) {
 		memset(task_name, 0, sizeof(task_name));
 		sprintf(task_name, "cma_task%d", cpu);
 		work = &per_cpu(cma_pcp_thread, cpu);
 		init_completion(&work->start);
 		init_completion(&work->end);
 		INIT_LIST_HEAD(&work->list);
 		spin_lock_init(&work->list_lock);
 		work->cpu = cpu;
 		task = kthread_create(cma_boost_work_func, work, task_name);
 		if (!IS_ERR(task)) {
 			kthread_bind(task, cpu);
 			set_user_nice(task, -17);
 			work->task = task;
 			pr_debug("create cma task%p, for cpu %d\n", task, cpu);
 			wake_up_process(task);
 		} else {
 			can_boost = 0;
 			pr_err("create task for cpu %d fail:%p\n", cpu, task);
 			return -1;
 		}
 	}
 	can_boost = 1;
 	return 0;
 }
 module_init(init_cma_boost_task);

 int cma_alloc_contig_boost(unsigned long start_pfn, unsigned long count)
 {
 	struct cpumask has_work;
 	int cpu, cpus, i = 0, ret = 0, ebusy = 0, einv = 0;
 	atomic_t ok;
 	unsigned long delta;
 	unsigned long cnt;
 	unsigned long flags;
 	struct cma_pcp *work;
 	struct work_cma job[NR_CPUS] = {};

 	cpumask_clear(&has_work);

 	if (allow_cma_tasks)
 		cpus = allow_cma_tasks;
 	else
 		cpus = num_online_cpus() - 1;
 	cnt   = count;
 	delta = count / cpus;
 	atomic_set(&ok, 0);
 	local_irq_save(flags);
 	for_each_online_cpu(cpu) {
 		work = &per_cpu(cma_pcp_thread, cpu);
 		spin_lock(&work->list_lock);
 		INIT_LIST_HEAD(&job[cpu].list);
 		job[cpu].pfn   = start_pfn + i * delta;
 		job[cpu].count = delta;
 		job[cpu].ret   = -1;
 		job[cpu].host  = current;
 		if (i == cpus - 1)
 			job[cpu].count = count - i * delta;
 		cpumask_set_cpu(cpu, &has_work);
 		list_add(&job[cpu].list, &work->list);
 		spin_unlock(&work->list_lock);
 		complete(&work->start);
 		i++;
 		if (i == cpus) {
 			cma_debug(1, NULL, "sched work to %d cpu\n", i);
 			break;
 		}
 	}
 	local_irq_restore(flags);

 	for_each_cpu(cpu, &has_work) {
 		work = &per_cpu(cma_pcp_thread, cpu);
 		wait_for_completion(&work->end);
 		if (job[cpu].ret) {
 			if (job[cpu].ret != -EBUSY)
 				einv++;
 			else
 				ebusy++;
 		}
 	}

 	if (einv)
 		ret = -EINVAL;
 	else if (ebusy)
 		ret = -EBUSY;
 	else
 		ret = 0;

 	if (ret < 0 && ret != -EBUSY) {
 		pr_err("%s, failed, ret:%d, ok:%d\n",
 		       __func__, ret, atomic_read(&ok));
 	}

 	return ret;
 }

 static int __aml_check_pageblock_isolate(unsigned long pfn,
 					 unsigned long end_pfn,
 					 int flags)
 {
 	struct page *page;

 	while (pfn < end_pfn) {
 		page = pfn_to_page(pfn);
 		if (PageBuddy(page)) {
 			/*
 			 * If the page is on a free list, it has to be on
 			 * the correct MIGRATE_ISOLATE freelist. There is no
 			 * simple way to verify that as VM_BUG_ON(), though.
 			 */
 			pfn += 1 << buddy_order(page);
 		} else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) {
 			/* A HWPoisoned page cannot be also PageBuddy */
 			pfn++;
 		} else if ((flags & MEMORY_OFFLINE) && PageOffline(page) &&
 			 !page_count(page)) {
 			/*
 			 * The responsible driver agreed to skip PageOffline()
 			 * pages when offlining memory by dropping its
 			 * reference in MEM_GOING_OFFLINE.
 			 */
 			pfn++;
 		} else {
 			cma_debug(1, page, " isolate failed\n");
 			break;
 		}
 	}

 	return pfn;
 }

 static inline struct page *
 check_page_valid(unsigned long pfn, unsigned long nr_pages)
 {
 	int i;

 	for (i = 0; i < nr_pages; i++) {
 		struct page *page;

 		page = pfn_to_online_page(pfn + i);
 		if (!page)
 			continue;
 		return page;
 	}
 	return NULL;
 }

 int aml_check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			     int isol_flags)
 {
 	unsigned long pfn, flags;
 	struct page *page;
 	struct zone *zone;
 	int ret;

 	/*
 	 * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages
 	 * are not aligned to pageblock_nr_pages.
 	 * Then we just check migratetype first.
 	 */
 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
 		page = check_page_valid(pfn, pageblock_nr_pages);
 		if (page && !is_migrate_isolate_page(page))
 			break;
 	}
 	page = check_page_valid(start_pfn, end_pfn - start_pfn);
 	if (pfn < end_pfn || !page) {
 		ret = -EBUSY;
 		goto out;
 	}
 	/* Check all pages are free or marked as ISOLATED */
 	zone = page_zone(page);
 	spin_lock_irqsave(&zone->lock, flags);
 	pfn = __aml_check_pageblock_isolate(start_pfn, end_pfn, isol_flags);
 	spin_unlock_irqrestore(&zone->lock, flags);

 	ret = pfn < end_pfn ? -EBUSY : 0;

 out:
 	trace_test_pages_isolated(start_pfn, end_pfn, pfn);
 	if (pfn < end_pfn)
 		page_pinner_failure_detect(pfn_to_page(pfn));

 	return ret;
 }

 static unsigned long cur_alloc_start;
 static unsigned long cur_alloc_end;

 int in_cma_allocating(struct page *page)
 {
 	unsigned long pfn;

 	if (!page)
 		return 0;
 	pfn = page_to_pfn(page);
 	if (pfn >= cur_alloc_start && pfn <= cur_alloc_end)
 		return 1;

 	return 0;
 }

 int aml_cma_alloc_range(unsigned long start, unsigned long end,
 			unsigned int migrate_type, gfp_t gfp_mask)
 {
 	unsigned long outer_start, outer_end;
 	int ret = 0, order;
 	int try_times = 0;
 	int boost_ok = 0;
 #if CONFIG_AMLOGIC_KERNEL_VERSION >= 14515
 	unsigned long failed_pfn;
 #endif

 	struct compact_control cc = {
 		.nr_migratepages = 0,
 		.order = -1,
 		.zone = page_zone(pfn_to_page(start)),
 		.mode = MIGRATE_SYNC,
 		.ignore_skip_hint = true,
 		.no_set_skip_hint = true,
 		.gfp_mask = current_gfp_context(gfp_mask),
 		.alloc_contig = true,
 	};
 	INIT_LIST_HEAD(&cc.migratepages);

 	mutex_lock(&cma_mutex);
 	cma_debug(0, NULL, " range [%lx-%lx]\n", start, end);
 #if CONFIG_AMLOGIC_KERNEL_VERSION >= 14515
 	ret = start_isolate_page_range(pfn_max_align_down(start),
 				       pfn_max_align_up(end), migrate_type,
 				       0, &failed_pfn);
 #else
 	ret = start_isolate_page_range(pfn_max_align_down(start),
 				       pfn_max_align_up(end), migrate_type, 0);
 #endif
 	if (ret < 0) {
 		cma_debug(1, NULL, "ret:%d\n", ret);
 		return ret;
 	}

 	cur_alloc_start = start;
 	cur_alloc_end = end;
 	cma_isolated += (pfn_max_align_up(end) - pfn_max_align_down(start));
 try_again:
 	lru_add_drain();
 	drain_all_pages(cc.zone);
 	/*
 	 * try to use more cpu to do this job when alloc count is large
 	 */
 	cpus_read_lock();
 	if ((num_online_cpus() > 1) && can_boost &&
 	    ((end - start) >= pageblock_nr_pages / 2)) {
 		ret = cma_alloc_contig_boost(start, end - start);
 		boost_ok = !ret ? 1 : 0;
 	} else {
 		ret = aml_alloc_contig_migrate_range(&cc, start,
 						     end, 0, current);
 	}
 	cpus_read_unlock();

 	if (ret && ret != -EBUSY) {
 		cma_debug(1, NULL, "ret:%d\n", ret);
 		goto done;
 	}

 	ret = 0;
 	order = 0;
 	outer_start = start;
 	while (!PageBuddy(pfn_to_page(outer_start))) {
 		if (++order >= MAX_ORDER) {
 			outer_start = start;
 			break;
 		}
 		outer_start &= ~0UL << order;
 	}

 	if (outer_start != start) {
 		order = buddy_order(pfn_to_page(outer_start));

 		/*
 		 * outer_start page could be small order buddy page and
 		 * it doesn't include start page. Adjust outer_start
 		 * in this case to report failed page properly
 		 * on tracepoint in test_pages_isolated()
 		 */
 		if (outer_start + (1UL << order) <= start)
 			outer_start = start;
 	}

 	/* Make sure the range is really isolated. */
 	if (aml_check_pages_isolated(outer_start, end, false)) {
 		cma_debug(1, NULL, "check page isolate(%lx, %lx) failed\n",
 			  outer_start, end);
 		try_times++;
 		if (try_times < 10)
 			goto try_again;
 		ret = -EBUSY;
 		goto done;
 	}

 	/* Grab isolated pages from freelists. */
 	outer_end = isolate_freepages_range(&cc, outer_start, end);
 	if (!outer_end) {
 		ret = -EBUSY;
 		cma_debug(1, NULL, "iso free range(%lx, %lx) failed\n",
 			  outer_start, end);
 		goto done;
 	}

 	/* Free head and tail (if any) */
 	if (start != outer_start)
 		aml_cma_free(outer_start, start - outer_start, 0);
 	if (end != outer_end)
 		aml_cma_free(end, outer_end - end, 0);

 done:
 	undo_isolate_page_range(pfn_max_align_down(start),
 				pfn_max_align_up(end), migrate_type);
 	cma_isolated -= (pfn_max_align_up(end) - pfn_max_align_down(start));
 	cur_alloc_start = 0;
 	cur_alloc_end = 0;
 	mutex_unlock(&cma_mutex);

 	return ret;
 }

 static int __aml_cma_free_check(struct page *page, int order, unsigned int *cnt)
 {
 	int i;
 	int ref = 0;

 	/*
 	 * clear ref count, head page should avoid this operation.
 	 * ref count of head page will be cleared when __free_pages
 	 * is called.
 	 */
 	for (i = 1; i < (1 << order); i++) {
 		if (!put_page_testzero(page + i))
 			ref++;
 	}
 	if (ref) {
 		pr_info("%s, %d pages are still in use\n", __func__, ref);
 		*cnt += ref;
 		return -1;
 	}
 	return 0;
 }

 static int aml_cma_get_page_order(unsigned long pfn)
 {
 	int i, mask = 1;

 	for (i = 0; i < (MAX_ORDER - 1); i++) {
 		if (pfn & (mask << i))
 			break;
 	}

 	return i;
 }

 void aml_cma_free(unsigned long pfn, unsigned int nr_pages, int update)
 {
 	unsigned int count = 0;
 	struct page *page;
 	int free_order, start_order = 0;
 	int batch;
 	unsigned int orig_nr_pages = nr_pages;

 	while (nr_pages) {
 		page = pfn_to_page(pfn);
 		free_order = aml_cma_get_page_order(pfn);
 		if (nr_pages >= (1 << free_order)) {
 			start_order = free_order;
 		} else {
 			/* remain pages is not enough */
 			start_order = 0;
 			while (nr_pages >= (1 << start_order))
 				start_order++;
 			start_order--;
 		}
 		batch = (1 << start_order);
 		if (__aml_cma_free_check(page, start_order, &count))
 			break;
 		__free_pages(page, start_order);
 		pr_debug("pages:%4d, free:%2d, start:%2d, batch:%4d, pfn:%lx\n",
 			 nr_pages, free_order,
 			 start_order, batch, pfn);
 		nr_pages -= batch;
 		pfn += batch;
 	}
 	WARN(count != 0, "%d pages are still in use!\n", count);
 	if (update) {
 	#ifdef CONFIG_AMLOGIC_PAGE_TRACE
 		if (cma_alloc_trace)
 			pr_info("c f p:%lx, c:%d, f:%ps\n",
 				pfn, count, (void *)find_back_trace());
 	#endif /* CONFIG_AMLOGIC_PAGE_TRACE */
 		atomic_long_sub(orig_nr_pages, &nr_cma_allocated);
 	}
 }

 static bool cma_vma_show(struct page *page, struct vm_area_struct *vma,
 			 unsigned long addr, void *arg)
 {
 #ifdef CONFIG_AMLOGIC_USER_FAULT
 	struct mm_struct *mm = vma->vm_mm;

 	show_vma(mm, addr);
 #endif
 	return false; /* keep loop */
 }

 void rmap_walk_vma(struct page *page)
 {
 	struct rmap_walk_control rwc = {
 		.rmap_one = cma_vma_show,
 	};

 	pr_info("%s, show map for page:%lx,f:%lx, m:%px, p:%d\n",
 		__func__, page_to_pfn(page), page->flags,
 		page->mapping, page_count(page));
 	if (!page_mapping(page))
 		return;
 	rmap_walk(page, &rwc);
 }

 void show_page(struct page *page)
 {
 	unsigned long trace = 0;
 	unsigned long map_flag = -1UL;

 	if (!page)
 		return;
 #ifdef CONFIG_AMLOGIC_PAGE_TRACE
 	trace = get_page_trace(page);
 #endif
 	if (page->mapping && !((unsigned long)page->mapping & 0x3))
 		map_flag = page->mapping->flags;
 	pr_info("page:%lx, map:%lx, mf:%lx, pf:%lx, m:%d, c:%d, o:%lx, pt:%lx, f:%ps\n",
 		page_to_pfn(page), (unsigned long)page->mapping, map_flag,
 		page->flags & 0xffffffff,
 		page_mapcount(page), page_count(page), page->private, page->index,
 		(void *)trace);
 	if (cma_debug_level > 4 && !irqs_disabled())
 		rmap_walk_vma(page);
 }

 static int cma_debug_show(struct seq_file *m, void *arg)
 {
 	seq_printf(m, "level=%d, alloc trace:%d, allow task:%d\n",
 		   cma_debug_level, cma_alloc_trace, allow_cma_tasks);
 	seq_printf(m, "driver used:%lu isolated:%d total:%lu\n",
 		   get_cma_allocated(), 0, totalcma_pages);
 	return 0;
 }

 static ssize_t cma_debug_write(struct file *file, const char __user *buffer,
 			       size_t count, loff_t *ppos)
 {
 	int arg = 0;
 	int ok = 0;
 	int cpu;
 	struct cma_pcp *work;
 	char *buf;

 	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;

 	if (copy_from_user(buf, buffer, count))
 		goto exit;

 	if (!strncmp(buf, "cma_task=", 9)) {	/* option for 'cma_task=' */
 		if (sscanf(buf, "cma_task=%d", &arg) < 0)
 			goto exit;
 		if (arg <= num_online_cpus() && arg >= 1) {
 			ok = 1;
 			allow_cma_tasks = arg;
 			pr_info("set allow_cma_tasks to %d\n", allow_cma_tasks);
 		}
 		goto exit;
 	}

 	if (!strncmp(buf, "cma_prio=", 9)) {	/* option for 'cma_prio=' */
 		if (sscanf(buf, "cma_prio=%d", &arg) < 0)
 			goto exit;
 		if (arg >= MIN_NICE && arg < MAX_NICE) {
 			for_each_possible_cpu(cpu) {
 				work = &per_cpu(cma_pcp_thread, cpu);
 				set_user_nice(work->task, arg);
 			}
 			ok = 1;
 			pr_info("renice cma task to %d\n", arg);
 		}
 		goto exit;
 	}

 	if (!strncmp(buf, "cma_trace=", 9)) {	/* option for 'cma_trace=' */
 		if (sscanf(buf, "cma_trace=%d", &arg) < 0)
 			goto exit;

 		cma_alloc_trace = arg ? 1 : 0;
 		goto exit;
 	}

 	if (kstrtoint(buf, 10, &arg))
 		goto exit;

 	if (arg > MAX_DEBUG_LEVEL)
 		goto exit;

 	ok = 1;
 	cma_debug_level = arg;
 exit:
 	kfree(buf);
 	if (ok)
 		return count;
 	else
 		return -EINVAL;
 }

 static int cma_debug_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, cma_debug_show, NULL);
 }

 static const struct proc_ops cma_dbg_file_ops = {
 	.proc_open	= cma_debug_open,
 	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_write	= cma_debug_write,
 	.proc_release	= single_release,
 };

 static int __init aml_cma_init(void)
 {
 	atomic_set(&cma_allocate, 0);
 	atomic_long_set(&nr_cma_allocated, 0);

 	dentry = proc_create("cma_debug", 0644, NULL, &cma_dbg_file_ops);
 	if (IS_ERR_OR_NULL(dentry)) {
 		pr_err("%s, create sysfs failed\n", __func__);
 		return -1;
 	}
 	return 0;
 }
 arch_initcall(aml_cma_init);