blob: 459a29861c410390eb6f44b6311a5721a0ae13b8 [file] [log] [blame]
/* drivers/misc/lowmemorykiller.c
*
* The lowmemorykiller driver lets user-space specify a set of memory thresholds
* where processes with a range of oom_score_adj values will get killed. Specify
* the minimum oom_score_adj values in
* /sys/module/lowmemorykiller/parameters/adj and the number of free pages in
* /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma
* separated list of numbers in ascending order.
*
* For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
* "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
* processes with a oom_score_adj value of 8 or higher when the free memory
* drops below 4096 pages and kill processes with a oom_score_adj value of 0 or
* higher when the free memory drops below 1024 pages.
*
* The driver considers memory used for caches to be free, but if a large
* percentage of the cached memory is locked this can be very inaccurate
* and processes may not get killed until the normal oom killer is triggered.
*
* Copyright (C) 2007-2008 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cma.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/oom.h>
#include <linux/sched/signal.h>
#include <linux/swap.h>
#include <linux/rcupdate.h>
#include <linux/profile.h>
#include <linux/notifier.h>
#define PAGE_TO_KB (PAGE_SIZE / 1024)
static u32 lowmem_debug_level = 1;
static short lowmem_adj[6] = {
0,
1,
6,
12,
};
static int lowmem_adj_size = 4;
static int lowmem_minfree[6] = {
3 * 512, /* 6MB */
2 * 1024, /* 8MB */
4 * 1024, /* 16MB */
16 * 1024, /* 64MB */
};
static int lowmem_minfree_size = 4;
static unsigned long lowmem_deathpending_timeout;
struct process_stats_info {
pid_t pid;
char comm[TASK_COMM_LEN];
unsigned long rss;
unsigned long shared;
unsigned long file;
unsigned long anon;
};
#define PROCESS_STATS_COUNT (25)
static struct process_stats_info process_stats[PROCESS_STATS_COUNT];
#define lowmem_print(level, x...) \
do { \
if (lowmem_debug_level >= (level)) \
pr_info(x); \
} while (0)
// Lock is acquired before this function is called
static void record_task_stats(struct task_struct *task) {
int i = 0;
unsigned long rss = get_mm_rss(task->mm) * (long)(PAGE_SIZE / 1024);
for (i = 0; i < PROCESS_STATS_COUNT; i++) {
if(rss > process_stats[i].rss) {
//shift down entries (or just replace last entry)
if (i < (PROCESS_STATS_COUNT - 1)) {
memmove(&process_stats[i+1], &process_stats[i],
sizeof(process_stats[0]) * (PROCESS_STATS_COUNT - 1 - i));
}
process_stats[i].pid = task->pid;
strncpy(process_stats[i].comm, task->comm, TASK_COMM_LEN);
process_stats[i].rss = rss;
process_stats[i].shared = get_mm_counter(task->mm, MM_SHMEMPAGES) * (long)(PAGE_SIZE / 1024);
process_stats[i].file = get_mm_counter(task->mm, MM_FILEPAGES) * (long)(PAGE_SIZE / 1024);
process_stats[i].anon = get_mm_counter(task->mm, MM_ANONPAGES) * (long)(PAGE_SIZE / 1024);
return;
}
}
}
static void print_task_stats(void) {
int i = 0;
printk("-----------------------------");
printk("Lowmemorykiller is about to kill a process, current top processes: ");
for (i = 0; i < PROCESS_STATS_COUNT; i++) {
if (process_stats[i].pid != 0) {
printk("%s (%d) Rss: %lu kB (Shared: %lu kB File: %lu kB Anon: %lu kB)",
process_stats[i].comm, process_stats[i].pid,
process_stats[i].rss, process_stats[i].shared,
process_stats[i].file, process_stats[i].anon);
}
}
printk("-----------------------------");
}
static void print_system_memory_stats(void) {
struct sysinfo info;
printk("Detailed system memory information follows. First system totals (in pages), ");
printk("then totals for each node (currenty there is only one node on all plaforms), ");
printk("then totals for each zone in the node.");
show_mem(0, NULL);
printk("-----------------------------");
printk("Summary of system memory information: ");
si_meminfo(&info);
si_swapinfo(&info);
printk("MemTotal: %lu kB", PAGE_TO_KB * info.totalram);
printk("MemFree: %lu kB", PAGE_TO_KB * info.freeram);
printk("MemAvailable: %ld kB", PAGE_TO_KB * si_mem_available());
printk(" ");
printk("Anon: %lu kB", PAGE_TO_KB * (global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_ANON)));
printk("File: %lu kB", PAGE_TO_KB * (global_node_page_state(NR_INACTIVE_FILE) + global_node_page_state(NR_ACTIVE_FILE)));
printk("Slab: %lu kB", PAGE_TO_KB * (global_node_page_state(NR_SLAB_RECLAIMABLE) + global_node_page_state(NR_SLAB_UNRECLAIMABLE)));
printk("KernelStack: %lu kB", global_zone_page_state(NR_KERNEL_STACK_KB));
printk("PageTables: %lu kB", PAGE_TO_KB * global_zone_page_state(NR_PAGETABLE));
printk(" ");
printk("Buffers: %lu kB", PAGE_TO_KB * info.bufferram);
printk("Shared %lu kB", PAGE_TO_KB * global_node_page_state(NR_SHMEM));
printk("Isolated File: %lu kB", PAGE_TO_KB * global_node_page_state(NR_ISOLATED_ANON));
printk("Isolated Anon: %lu kB", PAGE_TO_KB * global_node_page_state(NR_ISOLATED_FILE));
printk("Unevictable: %lu kB", PAGE_TO_KB * global_node_page_state(NR_UNEVICTABLE));
printk("Mlocked: %lu kB", PAGE_TO_KB * global_zone_page_state(NR_MLOCK));
printk("Dirty: %lu kB", PAGE_TO_KB * global_node_page_state(NR_FILE_DIRTY));
printk("Writeback: %lu kB", PAGE_TO_KB * global_node_page_state(NR_WRITEBACK));
printk("SwapTotal: %lu kB", PAGE_TO_KB * info.totalswap);
printk("SwapFree: %lu kB", PAGE_TO_KB * info.freeswap);
printk("Anon Mapped: %lu kB", PAGE_TO_KB * global_node_page_state(NR_ANON_MAPPED));
printk("File Mapped: %lu kB", PAGE_TO_KB * global_node_page_state(NR_FILE_MAPPED));
#ifdef CONFIG_CMA
printk("CmaTotal: %lu kB", PAGE_TO_KB * totalcma_pages);
printk("CmaFree: %lu kB", PAGE_TO_KB * global_zone_page_state(NR_FREE_CMA_PAGES));
#endif
printk("-----------------------------");
}
static unsigned long lowmem_count(struct shrinker *s,
struct shrink_control *sc)
{
return global_node_page_state(NR_ACTIVE_ANON) +
global_node_page_state(NR_ACTIVE_FILE) +
global_node_page_state(NR_INACTIVE_ANON) +
global_node_page_state(NR_INACTIVE_FILE);
}
static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
{
struct task_struct *tsk;
struct task_struct *selected = NULL;
unsigned long rem = 0;
int tasksize;
int i;
short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
int minfree = 0;
int selected_tasksize = 0;
short selected_oom_score_adj;
int array_size = ARRAY_SIZE(lowmem_adj);
#if 0
int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
#else
int other_free = si_mem_available();
#endif
int other_file = global_node_page_state(NR_FILE_PAGES) -
global_node_page_state(NR_SHMEM) -
total_swapcache_pages();
if (lowmem_adj_size < array_size)
array_size = lowmem_adj_size;
if (lowmem_minfree_size < array_size)
array_size = lowmem_minfree_size;
for (i = 0; i < array_size; i++) {
minfree = lowmem_minfree[i];
if (other_free < minfree && other_file < minfree) {
min_score_adj = lowmem_adj[i];
break;
}
}
lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
sc->nr_to_scan, sc->gfp_mask, other_free,
other_file, min_score_adj);
if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
sc->nr_to_scan, sc->gfp_mask);
return 0;
}
selected_oom_score_adj = min_score_adj;
// We are probably killing a task, start recording and printing top memory users
memset(process_stats, 0, sizeof(process_stats));
rcu_read_lock();
for_each_process(tsk) {
struct task_struct *p;
short oom_score_adj;
if (tsk->flags & PF_KTHREAD)
continue;
p = find_lock_task_mm(tsk);
if (!p)
continue;
if (task_lmk_waiting(p) &&
time_before_eq(jiffies, lowmem_deathpending_timeout)) {
task_unlock(p);
rcu_read_unlock();
return 0;
}
record_task_stats(p);
oom_score_adj = p->signal->oom_score_adj;
if (oom_score_adj < min_score_adj) {
task_unlock(p);
continue;
}
tasksize = get_mm_rss(p->mm);
task_unlock(p);
if (tasksize <= 0)
continue;
if (selected) {
if (oom_score_adj < selected_oom_score_adj)
continue;
if (oom_score_adj == selected_oom_score_adj &&
tasksize <= selected_tasksize)
continue;
}
selected = p;
selected_tasksize = tasksize;
selected_oom_score_adj = oom_score_adj;
lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n",
p->comm, p->pid, oom_score_adj, tasksize);
}
if (selected) {
print_task_stats();
print_system_memory_stats();
task_lock(selected);
send_sig(SIGKILL, selected, 0);
if (selected->mm)
task_set_lmk_waiting(selected);
task_unlock(selected);
lowmem_print(1, "Killing '%s' (%d), adj %hd,\n"
" to free %ldkB on behalf of '%s' (%d) because\n"
" cache %ldkB is below limit %ldkB for oom_score_adj %hd\n"
" Free memory is %ldkB above reserved\n",
selected->comm, selected->pid,
selected_oom_score_adj,
selected_tasksize * (long)(PAGE_SIZE / 1024),
current->comm, current->pid,
other_file * (long)(PAGE_SIZE / 1024),
minfree * (long)(PAGE_SIZE / 1024),
min_score_adj,
other_free * (long)(PAGE_SIZE / 1024));
lowmem_deathpending_timeout = jiffies + HZ;
rem += selected_tasksize;
}
lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
sc->nr_to_scan, sc->gfp_mask, rem);
rcu_read_unlock();
return rem;
}
static struct shrinker lowmem_shrinker = {
.scan_objects = lowmem_scan,
.count_objects = lowmem_count,
.seeks = DEFAULT_SEEKS * 16
};
static int __init lowmem_init(void)
{
register_shrinker(&lowmem_shrinker);
return 0;
}
device_initcall(lowmem_init);
/*
* not really modular, but the easiest way to keep compat with existing
* bootargs behaviour is to continue using module_param here.
*/
module_param_named(cost, lowmem_shrinker.seeks, int, 0644);
module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, 0644);
module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
0644);
module_param_named(debug_level, lowmem_debug_level, uint, 0644);