1 /* drivers/misc/lowmemorykiller.c
2 *
3 * The lowmemorykiller driver lets user-space specify a set of memory thresholds
4 * where processes with a range of oom_score_adj values will get killed. Specify
5 * the minimum oom_score_adj values in
6 * /sys/module/lowmemorykiller/parameters/adj and the number of free pages in
7 * /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma
8 * separated list of numbers in ascending order.
9 *
10 * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
11 * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
12 * processes with a oom_score_adj value of 8 or higher when the free memory
13 * drops below 4096 pages and kill processes with a oom_score_adj value of 0 or
14 * higher when the free memory drops below 1024 pages.
15 *
16 * The driver considers memory used for caches to be free, but if a large
17 * percentage of the cached memory is locked this can be very inaccurate
18 * and processes may not get killed until the normal oom killer is triggered.
19 *
20 * Copyright (C) 2007-2008 Google, Inc.
21 *
22 * This software is licensed under the terms of the GNU General Public
23 * License version 2, as published by the Free Software Foundation, and
24 * may be copied, distributed, and modified under those terms.
25 *
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/init.h>
36 #include <linux/moduleparam.h>
37 #include <linux/kernel.h>
38 #include <linux/mm.h>
39 #include <linux/oom.h>
40 #include <linux/sched.h>
41 #include <linux/swap.h>
42 #include <linux/rcupdate.h>
43 #include <linux/profile.h>
44 #include <linux/notifier.h>
45
46 static uint32_t lowmem_debug_level = 1;
47 static short lowmem_adj[6] = {
48 0,
49 1,
50 6,
51 12,
52 };
53 static int lowmem_adj_size = 4;
54 static int lowmem_minfree[6] = {
55 3 * 512, /* 6MB */
56 2 * 1024, /* 8MB */
57 4 * 1024, /* 16MB */
58 16 * 1024, /* 64MB */
59 };
60 static int lowmem_minfree_size = 4;
61
62 static unsigned long lowmem_deathpending_timeout;
63
64 #define lowmem_print(level, x...) \
65 do { \
66 if (lowmem_debug_level >= (level)) \
67 pr_info(x); \
68 } while (0)
69
lowmem_count(struct shrinker * s,struct shrink_control * sc)70 static unsigned long lowmem_count(struct shrinker *s,
71 struct shrink_control *sc)
72 {
73 return global_page_state(NR_ACTIVE_ANON) +
74 global_page_state(NR_ACTIVE_FILE) +
75 global_page_state(NR_INACTIVE_ANON) +
76 global_page_state(NR_INACTIVE_FILE);
77 }
78
lowmem_scan(struct shrinker * s,struct shrink_control * sc)79 static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
80 {
81 struct task_struct *tsk;
82 struct task_struct *selected = NULL;
83 unsigned long rem = 0;
84 int tasksize;
85 int i;
86 short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
87 int selected_tasksize = 0;
88 short selected_oom_score_adj;
89 int array_size = ARRAY_SIZE(lowmem_adj);
90 int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
91 int other_file = global_page_state(NR_FILE_PAGES) -
92 global_page_state(NR_SHMEM) -
93 total_swapcache_pages();
94
95 if (lowmem_adj_size < array_size)
96 array_size = lowmem_adj_size;
97 if (lowmem_minfree_size < array_size)
98 array_size = lowmem_minfree_size;
99 for (i = 0; i < array_size; i++) {
100 if (other_free < lowmem_minfree[i] &&
101 other_file < lowmem_minfree[i]) {
102 min_score_adj = lowmem_adj[i];
103 break;
104 }
105 }
106
107 lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
108 sc->nr_to_scan, sc->gfp_mask, other_free,
109 other_file, min_score_adj);
110
111 if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
112 lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
113 sc->nr_to_scan, sc->gfp_mask);
114 return 0;
115 }
116
117 selected_oom_score_adj = min_score_adj;
118
119 rcu_read_lock();
120 for_each_process(tsk) {
121 struct task_struct *p;
122 short oom_score_adj;
123
124 if (tsk->flags & PF_KTHREAD)
125 continue;
126
127 p = find_lock_task_mm(tsk);
128 if (!p)
129 continue;
130
131 if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
132 time_before_eq(jiffies, lowmem_deathpending_timeout)) {
133 task_unlock(p);
134 rcu_read_unlock();
135 return 0;
136 }
137 oom_score_adj = p->signal->oom_score_adj;
138 if (oom_score_adj < min_score_adj) {
139 task_unlock(p);
140 continue;
141 }
142 tasksize = get_mm_rss(p->mm);
143 task_unlock(p);
144 if (tasksize <= 0)
145 continue;
146 if (selected) {
147 if (oom_score_adj < selected_oom_score_adj)
148 continue;
149 if (oom_score_adj == selected_oom_score_adj &&
150 tasksize <= selected_tasksize)
151 continue;
152 }
153 selected = p;
154 selected_tasksize = tasksize;
155 selected_oom_score_adj = oom_score_adj;
156 lowmem_print(2, "select %d (%s), adj %hd, size %d, to kill\n",
157 p->pid, p->comm, oom_score_adj, tasksize);
158 }
159 if (selected) {
160 task_lock(selected);
161 send_sig(SIGKILL, selected, 0);
162 /*
163 * FIXME: lowmemorykiller shouldn't abuse global OOM killer
164 * infrastructure. There is no real reason why the selected
165 * task should have access to the memory reserves.
166 */
167 if (selected->mm)
168 mark_oom_victim(selected);
169 task_unlock(selected);
170 lowmem_print(1, "send sigkill to %d (%s), adj %hd, size %d\n",
171 selected->pid, selected->comm,
172 selected_oom_score_adj, selected_tasksize);
173 lowmem_deathpending_timeout = jiffies + HZ;
174 rem += selected_tasksize;
175 }
176
177 lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
178 sc->nr_to_scan, sc->gfp_mask, rem);
179 rcu_read_unlock();
180 return rem;
181 }
182
183 static struct shrinker lowmem_shrinker = {
184 .scan_objects = lowmem_scan,
185 .count_objects = lowmem_count,
186 .seeks = DEFAULT_SEEKS * 16
187 };
188
lowmem_init(void)189 static int __init lowmem_init(void)
190 {
191 register_shrinker(&lowmem_shrinker);
192 return 0;
193 }
194 device_initcall(lowmem_init);
195
196 /*
197 * not really modular, but the easiest way to keep compat with existing
198 * bootargs behaviour is to continue using module_param here.
199 */
200 module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
201 module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
202 S_IRUGO | S_IWUSR);
203 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
204 S_IRUGO | S_IWUSR);
205 module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
206
207