| /* drivers/misc/lowmemorykiller.c |
| * |
| * The lowmemorykiller driver lets user-space specify a set of memory thresholds |
| * where processes with a range of oom_score_adj values will get killed. Specify |
| * the minimum oom_score_adj values in |
| * /sys/module/lowmemorykiller/parameters/adj and the number of free pages in |
| * /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma |
| * separated list of numbers in ascending order. |
| * |
| * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and |
| * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill |
| * processes with a oom_score_adj value of 8 or higher when the free memory |
| * drops below 4096 pages and kill processes with a oom_score_adj value of 0 or |
| * higher when the free memory drops below 1024 pages. |
| * |
| * The driver considers memory used for caches to be free, but if a large |
| * percentage of the cached memory is locked this can be very inaccurate |
| * and processes may not get killed until the normal oom killer is triggered. |
| * |
| * Copyright (C) 2007-2008 Google, Inc. |
| * |
| * This software is licensed under the terms of the GNU General Public |
| * License version 2, as published by the Free Software Foundation, and |
| * may be copied, distributed, and modified under those terms. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| */ |
| |
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| |
| #include <linux/cma.h> |
| #include <linux/init.h> |
| #include <linux/moduleparam.h> |
| #include <linux/kernel.h> |
| #include <linux/mm.h> |
| #include <linux/oom.h> |
| #include <linux/sched/signal.h> |
| #include <linux/swap.h> |
| #include <linux/rcupdate.h> |
| #include <linux/profile.h> |
| #include <linux/notifier.h> |
| |
| #define PAGE_TO_KB (PAGE_SIZE / 1024) |
| |
| static u32 lowmem_debug_level = 1; |
| static short lowmem_adj[6] = { |
| 0, |
| 1, |
| 6, |
| 12, |
| }; |
| |
| static int lowmem_adj_size = 4; |
| static int lowmem_minfree[6] = { |
| 3 * 512, /* 6MB */ |
| 2 * 1024, /* 8MB */ |
| 4 * 1024, /* 16MB */ |
| 16 * 1024, /* 64MB */ |
| }; |
| |
| static int lowmem_minfree_size = 4; |
| |
| static unsigned long lowmem_deathpending_timeout; |
| |
| struct process_stats_info { |
| pid_t pid; |
| char comm[TASK_COMM_LEN]; |
| unsigned long rss; |
| unsigned long shared; |
| unsigned long file; |
| unsigned long anon; |
| }; |
| |
| #define PROCESS_STATS_COUNT (25) |
| static struct process_stats_info process_stats[PROCESS_STATS_COUNT]; |
| |
| #define lowmem_print(level, x...) \ |
| do { \ |
| if (lowmem_debug_level >= (level)) \ |
| pr_info(x); \ |
| } while (0) |
| |
| // Lock is acquired before this function is called |
| static void record_task_stats(struct task_struct *task) { |
| int i = 0; |
| unsigned long rss = get_mm_rss(task->mm) * (long)(PAGE_SIZE / 1024); |
| |
| for (i = 0; i < PROCESS_STATS_COUNT; i++) { |
| if(rss > process_stats[i].rss) { |
| //shift down entries (or just replace last entry) |
| if (i < (PROCESS_STATS_COUNT - 1)) { |
| memmove(&process_stats[i+1], &process_stats[i], |
| sizeof(process_stats[0]) * (PROCESS_STATS_COUNT - 1 - i)); |
| } |
| |
| process_stats[i].pid = task->pid; |
| strncpy(process_stats[i].comm, task->comm, TASK_COMM_LEN); |
| process_stats[i].rss = rss; |
| process_stats[i].shared = get_mm_counter(task->mm, MM_SHMEMPAGES) * (long)(PAGE_SIZE / 1024); |
| process_stats[i].file = get_mm_counter(task->mm, MM_FILEPAGES) * (long)(PAGE_SIZE / 1024); |
| process_stats[i].anon = get_mm_counter(task->mm, MM_ANONPAGES) * (long)(PAGE_SIZE / 1024); |
| return; |
| } |
| } |
| } |
| |
| static void print_task_stats(void) { |
| int i = 0; |
| printk("-----------------------------"); |
| printk("Lowmemorykiller is about to kill a process, current top processes: "); |
| for (i = 0; i < PROCESS_STATS_COUNT; i++) { |
| if (process_stats[i].pid != 0) { |
| printk("%s (%d) Rss: %lu kB (Shared: %lu kB File: %lu kB Anon: %lu kB)", |
| process_stats[i].comm, process_stats[i].pid, |
| process_stats[i].rss, process_stats[i].shared, |
| process_stats[i].file, process_stats[i].anon); |
| } |
| } |
| printk("-----------------------------"); |
| } |
| |
| static void print_system_memory_stats(void) { |
| struct sysinfo info; |
| printk("Detailed system memory information follows. First system totals (in pages), "); |
| printk("then totals for each node (currenty there is only one node on all plaforms), "); |
| printk("then totals for each zone in the node."); |
| show_mem(0, NULL); |
| printk("-----------------------------"); |
| printk("Summary of system memory information: "); |
| |
| si_meminfo(&info); |
| si_swapinfo(&info); |
| |
| printk("MemTotal: %lu kB", PAGE_TO_KB * info.totalram); |
| printk("MemFree: %lu kB", PAGE_TO_KB * info.freeram); |
| printk("MemAvailable: %ld kB", PAGE_TO_KB * si_mem_available()); |
| printk(" "); |
| printk("Anon: %lu kB", PAGE_TO_KB * (global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_ANON))); |
| printk("File: %lu kB", PAGE_TO_KB * (global_node_page_state(NR_INACTIVE_FILE) + global_node_page_state(NR_ACTIVE_FILE))); |
| printk("Slab: %lu kB", PAGE_TO_KB * (global_node_page_state(NR_SLAB_RECLAIMABLE) + global_node_page_state(NR_SLAB_UNRECLAIMABLE))); |
| printk("KernelStack: %lu kB", global_zone_page_state(NR_KERNEL_STACK_KB)); |
| printk("PageTables: %lu kB", PAGE_TO_KB * global_zone_page_state(NR_PAGETABLE)); |
| printk(" "); |
| printk("Buffers: %lu kB", PAGE_TO_KB * info.bufferram); |
| printk("Shared %lu kB", PAGE_TO_KB * global_node_page_state(NR_SHMEM)); |
| printk("Isolated File: %lu kB", PAGE_TO_KB * global_node_page_state(NR_ISOLATED_ANON)); |
| printk("Isolated Anon: %lu kB", PAGE_TO_KB * global_node_page_state(NR_ISOLATED_FILE)); |
| printk("Unevictable: %lu kB", PAGE_TO_KB * global_node_page_state(NR_UNEVICTABLE)); |
| printk("Mlocked: %lu kB", PAGE_TO_KB * global_zone_page_state(NR_MLOCK)); |
| printk("Dirty: %lu kB", PAGE_TO_KB * global_node_page_state(NR_FILE_DIRTY)); |
| printk("Writeback: %lu kB", PAGE_TO_KB * global_node_page_state(NR_WRITEBACK)); |
| printk("SwapTotal: %lu kB", PAGE_TO_KB * info.totalswap); |
| printk("SwapFree: %lu kB", PAGE_TO_KB * info.freeswap); |
| printk("Anon Mapped: %lu kB", PAGE_TO_KB * global_node_page_state(NR_ANON_MAPPED)); |
| printk("File Mapped: %lu kB", PAGE_TO_KB * global_node_page_state(NR_FILE_MAPPED)); |
| #ifdef CONFIG_CMA |
| printk("CmaTotal: %lu kB", PAGE_TO_KB * totalcma_pages); |
| printk("CmaFree: %lu kB", PAGE_TO_KB * global_zone_page_state(NR_FREE_CMA_PAGES)); |
| #endif |
| printk("-----------------------------"); |
| } |
| |
| static unsigned long lowmem_count(struct shrinker *s, |
| struct shrink_control *sc) |
| { |
| return global_node_page_state(NR_ACTIVE_ANON) + |
| global_node_page_state(NR_ACTIVE_FILE) + |
| global_node_page_state(NR_INACTIVE_ANON) + |
| global_node_page_state(NR_INACTIVE_FILE); |
| } |
| |
| static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) |
| { |
| struct task_struct *tsk; |
| struct task_struct *selected = NULL; |
| unsigned long rem = 0; |
| int tasksize; |
| int i; |
| short min_score_adj = OOM_SCORE_ADJ_MAX + 1; |
| int minfree = 0; |
| int selected_tasksize = 0; |
| short selected_oom_score_adj; |
| int array_size = ARRAY_SIZE(lowmem_adj); |
| #if 0 |
| int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; |
| #else |
| int other_free = si_mem_available(); |
| #endif |
| int other_file = global_node_page_state(NR_FILE_PAGES) - |
| global_node_page_state(NR_SHMEM) - |
| total_swapcache_pages(); |
| |
| if (lowmem_adj_size < array_size) |
| array_size = lowmem_adj_size; |
| if (lowmem_minfree_size < array_size) |
| array_size = lowmem_minfree_size; |
| for (i = 0; i < array_size; i++) { |
| minfree = lowmem_minfree[i]; |
| if (other_free < minfree && other_file < minfree) { |
| min_score_adj = lowmem_adj[i]; |
| break; |
| } |
| } |
| |
| lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", |
| sc->nr_to_scan, sc->gfp_mask, other_free, |
| other_file, min_score_adj); |
| |
| if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { |
| lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", |
| sc->nr_to_scan, sc->gfp_mask); |
| return 0; |
| } |
| |
| selected_oom_score_adj = min_score_adj; |
| |
| // We are probably killing a task, start recording and printing top memory users |
| memset(process_stats, 0, sizeof(process_stats)); |
| |
| rcu_read_lock(); |
| for_each_process(tsk) { |
| struct task_struct *p; |
| short oom_score_adj; |
| |
| if (tsk->flags & PF_KTHREAD) |
| continue; |
| |
| p = find_lock_task_mm(tsk); |
| if (!p) |
| continue; |
| |
| if (task_lmk_waiting(p) && |
| time_before_eq(jiffies, lowmem_deathpending_timeout)) { |
| task_unlock(p); |
| rcu_read_unlock(); |
| return 0; |
| } |
| |
| record_task_stats(p); |
| |
| oom_score_adj = p->signal->oom_score_adj; |
| if (oom_score_adj < min_score_adj) { |
| task_unlock(p); |
| continue; |
| } |
| tasksize = get_mm_rss(p->mm); |
| task_unlock(p); |
| if (tasksize <= 0) |
| continue; |
| if (selected) { |
| if (oom_score_adj < selected_oom_score_adj) |
| continue; |
| if (oom_score_adj == selected_oom_score_adj && |
| tasksize <= selected_tasksize) |
| continue; |
| } |
| selected = p; |
| selected_tasksize = tasksize; |
| selected_oom_score_adj = oom_score_adj; |
| lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n", |
| p->comm, p->pid, oom_score_adj, tasksize); |
| } |
| if (selected) { |
| print_task_stats(); |
| print_system_memory_stats(); |
| task_lock(selected); |
| send_sig(SIGKILL, selected, 0); |
| if (selected->mm) |
| task_set_lmk_waiting(selected); |
| task_unlock(selected); |
| lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" |
| " to free %ldkB on behalf of '%s' (%d) because\n" |
| " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" |
| " Free memory is %ldkB above reserved\n", |
| selected->comm, selected->pid, |
| selected_oom_score_adj, |
| selected_tasksize * (long)(PAGE_SIZE / 1024), |
| current->comm, current->pid, |
| other_file * (long)(PAGE_SIZE / 1024), |
| minfree * (long)(PAGE_SIZE / 1024), |
| min_score_adj, |
| other_free * (long)(PAGE_SIZE / 1024)); |
| lowmem_deathpending_timeout = jiffies + HZ; |
| rem += selected_tasksize; |
| } |
| |
| lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", |
| sc->nr_to_scan, sc->gfp_mask, rem); |
| rcu_read_unlock(); |
| return rem; |
| } |
| |
| static struct shrinker lowmem_shrinker = { |
| .scan_objects = lowmem_scan, |
| .count_objects = lowmem_count, |
| .seeks = DEFAULT_SEEKS * 16 |
| }; |
| |
| static int __init lowmem_init(void) |
| { |
| register_shrinker(&lowmem_shrinker); |
| return 0; |
| } |
| device_initcall(lowmem_init); |
| |
| /* |
| * not really modular, but the easiest way to keep compat with existing |
| * bootargs behaviour is to continue using module_param here. |
| */ |
| module_param_named(cost, lowmem_shrinker.seeks, int, 0644); |
| module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, 0644); |
| module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, |
| 0644); |
| module_param_named(debug_level, lowmem_debug_level, uint, 0644); |
| |