1/*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 *  Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 *  Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
21#include <linux/module.h>
22#include <linux/aio.h>
23#include <linux/mm.h>
24#include <linux/swap.h>
25#include <linux/slab.h>
26#include <linux/sysctl.h>
27#include <linux/bitmap.h>
28#include <linux/signal.h>
29#include <linux/printk.h>
30#include <linux/proc_fs.h>
31#include <linux/security.h>
32#include <linux/ctype.h>
33#include <linux/kmemcheck.h>
34#include <linux/kmemleak.h>
35#include <linux/fs.h>
36#include <linux/init.h>
37#include <linux/kernel.h>
38#include <linux/kobject.h>
39#include <linux/net.h>
40#include <linux/sysrq.h>
41#include <linux/highuid.h>
42#include <linux/writeback.h>
43#include <linux/ratelimit.h>
44#include <linux/compaction.h>
45#include <linux/hugetlb.h>
46#include <linux/initrd.h>
47#include <linux/key.h>
48#include <linux/times.h>
49#include <linux/limits.h>
50#include <linux/dcache.h>
51#include <linux/dnotify.h>
52#include <linux/syscalls.h>
53#include <linux/vmstat.h>
54#include <linux/nfs_fs.h>
55#include <linux/acpi.h>
56#include <linux/reboot.h>
57#include <linux/ftrace.h>
58#include <linux/perf_event.h>
59#include <linux/kprobes.h>
60#include <linux/pipe_fs_i.h>
61#include <linux/oom.h>
62#include <linux/kmod.h>
63#include <linux/capability.h>
64#include <linux/binfmts.h>
65#include <linux/sched/sysctl.h>
66#include <linux/kexec.h>
67
68#include <asm/uaccess.h>
69#include <asm/processor.h>
70
71#ifdef CONFIG_X86
72#include <asm/nmi.h>
73#include <asm/stacktrace.h>
74#include <asm/io.h>
75#endif
76#ifdef CONFIG_SPARC
77#include <asm/setup.h>
78#endif
79#ifdef CONFIG_BSD_PROCESS_ACCT
80#include <linux/acct.h>
81#endif
82#ifdef CONFIG_RT_MUTEXES
83#include <linux/rtmutex.h>
84#endif
85#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
86#include <linux/lockdep.h>
87#endif
88#ifdef CONFIG_CHR_DEV_SG
89#include <scsi/sg.h>
90#endif
91
92#ifdef CONFIG_LOCKUP_DETECTOR
93#include <linux/nmi.h>
94#endif
95
96#if defined(CONFIG_SYSCTL)
97
98/* External variables not in a header file. */
99extern int suid_dumpable;
100#ifdef CONFIG_COREDUMP
101extern int core_uses_pid;
102extern char core_pattern[];
103extern unsigned int core_pipe_limit;
104#endif
105extern int pid_max;
106extern int pid_max_min, pid_max_max;
107extern int percpu_pagelist_fraction;
108extern int compat_log;
109extern int latencytop_enabled;
110extern int sysctl_nr_open_min, sysctl_nr_open_max;
111#ifndef CONFIG_MMU
112extern int sysctl_nr_trim_pages;
113#endif
114
115/* Constants used for minimum and  maximum */
116#ifdef CONFIG_LOCKUP_DETECTOR
117static int sixty = 60;
118#endif
119
120static int __maybe_unused neg_one = -1;
121
122static int zero;
123static int __maybe_unused one = 1;
124static int __maybe_unused two = 2;
125static int __maybe_unused four = 4;
126static unsigned long one_ul = 1;
127static int one_hundred = 100;
128#ifdef CONFIG_PRINTK
129static int ten_thousand = 10000;
130#endif
131
132/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
133static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
134
135/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
136static int maxolduid = 65535;
137static int minolduid;
138
139static int ngroups_max = NGROUPS_MAX;
140static const int cap_last_cap = CAP_LAST_CAP;
141
142/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
143#ifdef CONFIG_DETECT_HUNG_TASK
144static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
145#endif
146
147#ifdef CONFIG_INOTIFY_USER
148#include <linux/inotify.h>
149#endif
150#ifdef CONFIG_SPARC
151#endif
152
153#ifdef __hppa__
154extern int pwrsw_enabled;
155#endif
156
157#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
158extern int unaligned_enabled;
159#endif
160
161#ifdef CONFIG_IA64
162extern int unaligned_dump_stack;
163#endif
164
165#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
166extern int no_unaligned_warning;
167#endif
168
169#ifdef CONFIG_PROC_SYSCTL
170
171#define SYSCTL_WRITES_LEGACY	-1
172#define SYSCTL_WRITES_WARN	 0
173#define SYSCTL_WRITES_STRICT	 1
174
175static int sysctl_writes_strict = SYSCTL_WRITES_WARN;
176
177static int proc_do_cad_pid(struct ctl_table *table, int write,
178		  void __user *buffer, size_t *lenp, loff_t *ppos);
179static int proc_taint(struct ctl_table *table, int write,
180			       void __user *buffer, size_t *lenp, loff_t *ppos);
181#endif
182
183#ifdef CONFIG_PRINTK
184static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
185				void __user *buffer, size_t *lenp, loff_t *ppos);
186#endif
187
188static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
189		void __user *buffer, size_t *lenp, loff_t *ppos);
190#ifdef CONFIG_COREDUMP
191static int proc_dostring_coredump(struct ctl_table *table, int write,
192		void __user *buffer, size_t *lenp, loff_t *ppos);
193#endif
194
195#ifdef CONFIG_MAGIC_SYSRQ
196/* Note: sysrq code uses it's own private copy */
197static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
198
199static int sysrq_sysctl_handler(struct ctl_table *table, int write,
200				void __user *buffer, size_t *lenp,
201				loff_t *ppos)
202{
203	int error;
204
205	error = proc_dointvec(table, write, buffer, lenp, ppos);
206	if (error)
207		return error;
208
209	if (write)
210		sysrq_toggle_support(__sysrq_enabled);
211
212	return 0;
213}
214
215#endif
216
217static struct ctl_table kern_table[];
218static struct ctl_table vm_table[];
219static struct ctl_table fs_table[];
220static struct ctl_table debug_table[];
221static struct ctl_table dev_table[];
222extern struct ctl_table random_table[];
223#ifdef CONFIG_EPOLL
224extern struct ctl_table epoll_table[];
225#endif
226
227#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
228int sysctl_legacy_va_layout;
229#endif
230
231/* The default sysctl tables: */
232
233static struct ctl_table sysctl_base_table[] = {
234	{
235		.procname	= "kernel",
236		.mode		= 0555,
237		.child		= kern_table,
238	},
239	{
240		.procname	= "vm",
241		.mode		= 0555,
242		.child		= vm_table,
243	},
244	{
245		.procname	= "fs",
246		.mode		= 0555,
247		.child		= fs_table,
248	},
249	{
250		.procname	= "debug",
251		.mode		= 0555,
252		.child		= debug_table,
253	},
254	{
255		.procname	= "dev",
256		.mode		= 0555,
257		.child		= dev_table,
258	},
259	{ }
260};
261
262#ifdef CONFIG_SCHED_DEBUG
263static int min_sched_granularity_ns = 100000;		/* 100 usecs */
264static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
265static int min_wakeup_granularity_ns;			/* 0 usecs */
266static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
267#ifdef CONFIG_SMP
268static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
269static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
270#endif /* CONFIG_SMP */
271#endif /* CONFIG_SCHED_DEBUG */
272
273#ifdef CONFIG_COMPACTION
274static int min_extfrag_threshold;
275static int max_extfrag_threshold = 1000;
276#endif
277
278static struct ctl_table kern_table[] = {
279	{
280		.procname	= "sched_child_runs_first",
281		.data		= &sysctl_sched_child_runs_first,
282		.maxlen		= sizeof(unsigned int),
283		.mode		= 0644,
284		.proc_handler	= proc_dointvec,
285	},
286#ifdef CONFIG_SCHED_DEBUG
287	{
288		.procname	= "sched_min_granularity_ns",
289		.data		= &sysctl_sched_min_granularity,
290		.maxlen		= sizeof(unsigned int),
291		.mode		= 0644,
292		.proc_handler	= sched_proc_update_handler,
293		.extra1		= &min_sched_granularity_ns,
294		.extra2		= &max_sched_granularity_ns,
295	},
296	{
297		.procname	= "sched_latency_ns",
298		.data		= &sysctl_sched_latency,
299		.maxlen		= sizeof(unsigned int),
300		.mode		= 0644,
301		.proc_handler	= sched_proc_update_handler,
302		.extra1		= &min_sched_granularity_ns,
303		.extra2		= &max_sched_granularity_ns,
304	},
305	{
306		.procname	= "sched_wakeup_granularity_ns",
307		.data		= &sysctl_sched_wakeup_granularity,
308		.maxlen		= sizeof(unsigned int),
309		.mode		= 0644,
310		.proc_handler	= sched_proc_update_handler,
311		.extra1		= &min_wakeup_granularity_ns,
312		.extra2		= &max_wakeup_granularity_ns,
313	},
314#ifdef CONFIG_SMP
315	{
316		.procname	= "sched_tunable_scaling",
317		.data		= &sysctl_sched_tunable_scaling,
318		.maxlen		= sizeof(enum sched_tunable_scaling),
319		.mode		= 0644,
320		.proc_handler	= sched_proc_update_handler,
321		.extra1		= &min_sched_tunable_scaling,
322		.extra2		= &max_sched_tunable_scaling,
323	},
324	{
325		.procname	= "sched_migration_cost_ns",
326		.data		= &sysctl_sched_migration_cost,
327		.maxlen		= sizeof(unsigned int),
328		.mode		= 0644,
329		.proc_handler	= proc_dointvec,
330	},
331	{
332		.procname	= "sched_nr_migrate",
333		.data		= &sysctl_sched_nr_migrate,
334		.maxlen		= sizeof(unsigned int),
335		.mode		= 0644,
336		.proc_handler	= proc_dointvec,
337	},
338	{
339		.procname	= "sched_time_avg_ms",
340		.data		= &sysctl_sched_time_avg,
341		.maxlen		= sizeof(unsigned int),
342		.mode		= 0644,
343		.proc_handler	= proc_dointvec,
344	},
345	{
346		.procname	= "sched_shares_window_ns",
347		.data		= &sysctl_sched_shares_window,
348		.maxlen		= sizeof(unsigned int),
349		.mode		= 0644,
350		.proc_handler	= proc_dointvec,
351	},
352	{
353		.procname	= "timer_migration",
354		.data		= &sysctl_timer_migration,
355		.maxlen		= sizeof(unsigned int),
356		.mode		= 0644,
357		.proc_handler	= proc_dointvec_minmax,
358		.extra1		= &zero,
359		.extra2		= &one,
360	},
361#endif /* CONFIG_SMP */
362#ifdef CONFIG_NUMA_BALANCING
363	{
364		.procname	= "numa_balancing_scan_delay_ms",
365		.data		= &sysctl_numa_balancing_scan_delay,
366		.maxlen		= sizeof(unsigned int),
367		.mode		= 0644,
368		.proc_handler	= proc_dointvec,
369	},
370	{
371		.procname	= "numa_balancing_scan_period_min_ms",
372		.data		= &sysctl_numa_balancing_scan_period_min,
373		.maxlen		= sizeof(unsigned int),
374		.mode		= 0644,
375		.proc_handler	= proc_dointvec,
376	},
377	{
378		.procname	= "numa_balancing_scan_period_max_ms",
379		.data		= &sysctl_numa_balancing_scan_period_max,
380		.maxlen		= sizeof(unsigned int),
381		.mode		= 0644,
382		.proc_handler	= proc_dointvec,
383	},
384	{
385		.procname	= "numa_balancing_scan_size_mb",
386		.data		= &sysctl_numa_balancing_scan_size,
387		.maxlen		= sizeof(unsigned int),
388		.mode		= 0644,
389		.proc_handler	= proc_dointvec_minmax,
390		.extra1		= &one,
391	},
392	{
393		.procname	= "numa_balancing",
394		.data		= NULL, /* filled in by handler */
395		.maxlen		= sizeof(unsigned int),
396		.mode		= 0644,
397		.proc_handler	= sysctl_numa_balancing,
398		.extra1		= &zero,
399		.extra2		= &one,
400	},
401#endif /* CONFIG_NUMA_BALANCING */
402#endif /* CONFIG_SCHED_DEBUG */
403	{
404		.procname	= "sched_rt_period_us",
405		.data		= &sysctl_sched_rt_period,
406		.maxlen		= sizeof(unsigned int),
407		.mode		= 0644,
408		.proc_handler	= sched_rt_handler,
409	},
410	{
411		.procname	= "sched_rt_runtime_us",
412		.data		= &sysctl_sched_rt_runtime,
413		.maxlen		= sizeof(int),
414		.mode		= 0644,
415		.proc_handler	= sched_rt_handler,
416	},
417	{
418		.procname	= "sched_rr_timeslice_ms",
419		.data		= &sched_rr_timeslice,
420		.maxlen		= sizeof(int),
421		.mode		= 0644,
422		.proc_handler	= sched_rr_handler,
423	},
424#ifdef CONFIG_SCHED_AUTOGROUP
425	{
426		.procname	= "sched_autogroup_enabled",
427		.data		= &sysctl_sched_autogroup_enabled,
428		.maxlen		= sizeof(unsigned int),
429		.mode		= 0644,
430		.proc_handler	= proc_dointvec_minmax,
431		.extra1		= &zero,
432		.extra2		= &one,
433	},
434#endif
435#ifdef CONFIG_CFS_BANDWIDTH
436	{
437		.procname	= "sched_cfs_bandwidth_slice_us",
438		.data		= &sysctl_sched_cfs_bandwidth_slice,
439		.maxlen		= sizeof(unsigned int),
440		.mode		= 0644,
441		.proc_handler	= proc_dointvec_minmax,
442		.extra1		= &one,
443	},
444#endif
445#ifdef CONFIG_PROVE_LOCKING
446	{
447		.procname	= "prove_locking",
448		.data		= &prove_locking,
449		.maxlen		= sizeof(int),
450		.mode		= 0644,
451		.proc_handler	= proc_dointvec,
452	},
453#endif
454#ifdef CONFIG_LOCK_STAT
455	{
456		.procname	= "lock_stat",
457		.data		= &lock_stat,
458		.maxlen		= sizeof(int),
459		.mode		= 0644,
460		.proc_handler	= proc_dointvec,
461	},
462#endif
463	{
464		.procname	= "panic",
465		.data		= &panic_timeout,
466		.maxlen		= sizeof(int),
467		.mode		= 0644,
468		.proc_handler	= proc_dointvec,
469	},
470#ifdef CONFIG_COREDUMP
471	{
472		.procname	= "core_uses_pid",
473		.data		= &core_uses_pid,
474		.maxlen		= sizeof(int),
475		.mode		= 0644,
476		.proc_handler	= proc_dointvec,
477	},
478	{
479		.procname	= "core_pattern",
480		.data		= core_pattern,
481		.maxlen		= CORENAME_MAX_SIZE,
482		.mode		= 0644,
483		.proc_handler	= proc_dostring_coredump,
484	},
485	{
486		.procname	= "core_pipe_limit",
487		.data		= &core_pipe_limit,
488		.maxlen		= sizeof(unsigned int),
489		.mode		= 0644,
490		.proc_handler	= proc_dointvec,
491	},
492#endif
493#ifdef CONFIG_PROC_SYSCTL
494	{
495		.procname	= "tainted",
496		.maxlen 	= sizeof(long),
497		.mode		= 0644,
498		.proc_handler	= proc_taint,
499	},
500	{
501		.procname	= "sysctl_writes_strict",
502		.data		= &sysctl_writes_strict,
503		.maxlen		= sizeof(int),
504		.mode		= 0644,
505		.proc_handler	= proc_dointvec_minmax,
506		.extra1		= &neg_one,
507		.extra2		= &one,
508	},
509#endif
510#ifdef CONFIG_LATENCYTOP
511	{
512		.procname	= "latencytop",
513		.data		= &latencytop_enabled,
514		.maxlen		= sizeof(int),
515		.mode		= 0644,
516		.proc_handler	= proc_dointvec,
517	},
518#endif
519#ifdef CONFIG_BLK_DEV_INITRD
520	{
521		.procname	= "real-root-dev",
522		.data		= &real_root_dev,
523		.maxlen		= sizeof(int),
524		.mode		= 0644,
525		.proc_handler	= proc_dointvec,
526	},
527#endif
528	{
529		.procname	= "print-fatal-signals",
530		.data		= &print_fatal_signals,
531		.maxlen		= sizeof(int),
532		.mode		= 0644,
533		.proc_handler	= proc_dointvec,
534	},
535#ifdef CONFIG_SPARC
536	{
537		.procname	= "reboot-cmd",
538		.data		= reboot_command,
539		.maxlen		= 256,
540		.mode		= 0644,
541		.proc_handler	= proc_dostring,
542	},
543	{
544		.procname	= "stop-a",
545		.data		= &stop_a_enabled,
546		.maxlen		= sizeof (int),
547		.mode		= 0644,
548		.proc_handler	= proc_dointvec,
549	},
550	{
551		.procname	= "scons-poweroff",
552		.data		= &scons_pwroff,
553		.maxlen		= sizeof (int),
554		.mode		= 0644,
555		.proc_handler	= proc_dointvec,
556	},
557#endif
558#ifdef CONFIG_SPARC64
559	{
560		.procname	= "tsb-ratio",
561		.data		= &sysctl_tsb_ratio,
562		.maxlen		= sizeof (int),
563		.mode		= 0644,
564		.proc_handler	= proc_dointvec,
565	},
566#endif
567#ifdef __hppa__
568	{
569		.procname	= "soft-power",
570		.data		= &pwrsw_enabled,
571		.maxlen		= sizeof (int),
572	 	.mode		= 0644,
573		.proc_handler	= proc_dointvec,
574	},
575#endif
576#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
577	{
578		.procname	= "unaligned-trap",
579		.data		= &unaligned_enabled,
580		.maxlen		= sizeof (int),
581		.mode		= 0644,
582		.proc_handler	= proc_dointvec,
583	},
584#endif
585	{
586		.procname	= "ctrl-alt-del",
587		.data		= &C_A_D,
588		.maxlen		= sizeof(int),
589		.mode		= 0644,
590		.proc_handler	= proc_dointvec,
591	},
592#ifdef CONFIG_FUNCTION_TRACER
593	{
594		.procname	= "ftrace_enabled",
595		.data		= &ftrace_enabled,
596		.maxlen		= sizeof(int),
597		.mode		= 0644,
598		.proc_handler	= ftrace_enable_sysctl,
599	},
600#endif
601#ifdef CONFIG_STACK_TRACER
602	{
603		.procname	= "stack_tracer_enabled",
604		.data		= &stack_tracer_enabled,
605		.maxlen		= sizeof(int),
606		.mode		= 0644,
607		.proc_handler	= stack_trace_sysctl,
608	},
609#endif
610#ifdef CONFIG_TRACING
611	{
612		.procname	= "ftrace_dump_on_oops",
613		.data		= &ftrace_dump_on_oops,
614		.maxlen		= sizeof(int),
615		.mode		= 0644,
616		.proc_handler	= proc_dointvec,
617	},
618	{
619		.procname	= "traceoff_on_warning",
620		.data		= &__disable_trace_on_warning,
621		.maxlen		= sizeof(__disable_trace_on_warning),
622		.mode		= 0644,
623		.proc_handler	= proc_dointvec,
624	},
625	{
626		.procname	= "tracepoint_printk",
627		.data		= &tracepoint_printk,
628		.maxlen		= sizeof(tracepoint_printk),
629		.mode		= 0644,
630		.proc_handler	= proc_dointvec,
631	},
632#endif
633#ifdef CONFIG_KEXEC
634	{
635		.procname	= "kexec_load_disabled",
636		.data		= &kexec_load_disabled,
637		.maxlen		= sizeof(int),
638		.mode		= 0644,
639		/* only handle a transition from default "0" to "1" */
640		.proc_handler	= proc_dointvec_minmax,
641		.extra1		= &one,
642		.extra2		= &one,
643	},
644#endif
645#ifdef CONFIG_MODULES
646	{
647		.procname	= "modprobe",
648		.data		= &modprobe_path,
649		.maxlen		= KMOD_PATH_LEN,
650		.mode		= 0644,
651		.proc_handler	= proc_dostring,
652	},
653	{
654		.procname	= "modules_disabled",
655		.data		= &modules_disabled,
656		.maxlen		= sizeof(int),
657		.mode		= 0644,
658		/* only handle a transition from default "0" to "1" */
659		.proc_handler	= proc_dointvec_minmax,
660		.extra1		= &one,
661		.extra2		= &one,
662	},
663#endif
664#ifdef CONFIG_UEVENT_HELPER
665	{
666		.procname	= "hotplug",
667		.data		= &uevent_helper,
668		.maxlen		= UEVENT_HELPER_PATH_LEN,
669		.mode		= 0644,
670		.proc_handler	= proc_dostring,
671	},
672#endif
673#ifdef CONFIG_CHR_DEV_SG
674	{
675		.procname	= "sg-big-buff",
676		.data		= &sg_big_buff,
677		.maxlen		= sizeof (int),
678		.mode		= 0444,
679		.proc_handler	= proc_dointvec,
680	},
681#endif
682#ifdef CONFIG_BSD_PROCESS_ACCT
683	{
684		.procname	= "acct",
685		.data		= &acct_parm,
686		.maxlen		= 3*sizeof(int),
687		.mode		= 0644,
688		.proc_handler	= proc_dointvec,
689	},
690#endif
691#ifdef CONFIG_MAGIC_SYSRQ
692	{
693		.procname	= "sysrq",
694		.data		= &__sysrq_enabled,
695		.maxlen		= sizeof (int),
696		.mode		= 0644,
697		.proc_handler	= sysrq_sysctl_handler,
698	},
699#endif
700#ifdef CONFIG_PROC_SYSCTL
701	{
702		.procname	= "cad_pid",
703		.data		= NULL,
704		.maxlen		= sizeof (int),
705		.mode		= 0600,
706		.proc_handler	= proc_do_cad_pid,
707	},
708#endif
709	{
710		.procname	= "threads-max",
711		.data		= NULL,
712		.maxlen		= sizeof(int),
713		.mode		= 0644,
714		.proc_handler	= sysctl_max_threads,
715	},
716	{
717		.procname	= "random",
718		.mode		= 0555,
719		.child		= random_table,
720	},
721	{
722		.procname	= "usermodehelper",
723		.mode		= 0555,
724		.child		= usermodehelper_table,
725	},
726	{
727		.procname	= "overflowuid",
728		.data		= &overflowuid,
729		.maxlen		= sizeof(int),
730		.mode		= 0644,
731		.proc_handler	= proc_dointvec_minmax,
732		.extra1		= &minolduid,
733		.extra2		= &maxolduid,
734	},
735	{
736		.procname	= "overflowgid",
737		.data		= &overflowgid,
738		.maxlen		= sizeof(int),
739		.mode		= 0644,
740		.proc_handler	= proc_dointvec_minmax,
741		.extra1		= &minolduid,
742		.extra2		= &maxolduid,
743	},
744#ifdef CONFIG_S390
745#ifdef CONFIG_MATHEMU
746	{
747		.procname	= "ieee_emulation_warnings",
748		.data		= &sysctl_ieee_emulation_warnings,
749		.maxlen		= sizeof(int),
750		.mode		= 0644,
751		.proc_handler	= proc_dointvec,
752	},
753#endif
754	{
755		.procname	= "userprocess_debug",
756		.data		= &show_unhandled_signals,
757		.maxlen		= sizeof(int),
758		.mode		= 0644,
759		.proc_handler	= proc_dointvec,
760	},
761#endif
762	{
763		.procname	= "pid_max",
764		.data		= &pid_max,
765		.maxlen		= sizeof (int),
766		.mode		= 0644,
767		.proc_handler	= proc_dointvec_minmax,
768		.extra1		= &pid_max_min,
769		.extra2		= &pid_max_max,
770	},
771	{
772		.procname	= "panic_on_oops",
773		.data		= &panic_on_oops,
774		.maxlen		= sizeof(int),
775		.mode		= 0644,
776		.proc_handler	= proc_dointvec,
777	},
778#if defined CONFIG_PRINTK
779	{
780		.procname	= "printk",
781		.data		= &console_loglevel,
782		.maxlen		= 4*sizeof(int),
783		.mode		= 0644,
784		.proc_handler	= proc_dointvec,
785	},
786	{
787		.procname	= "printk_ratelimit",
788		.data		= &printk_ratelimit_state.interval,
789		.maxlen		= sizeof(int),
790		.mode		= 0644,
791		.proc_handler	= proc_dointvec_jiffies,
792	},
793	{
794		.procname	= "printk_ratelimit_burst",
795		.data		= &printk_ratelimit_state.burst,
796		.maxlen		= sizeof(int),
797		.mode		= 0644,
798		.proc_handler	= proc_dointvec,
799	},
800	{
801		.procname	= "printk_delay",
802		.data		= &printk_delay_msec,
803		.maxlen		= sizeof(int),
804		.mode		= 0644,
805		.proc_handler	= proc_dointvec_minmax,
806		.extra1		= &zero,
807		.extra2		= &ten_thousand,
808	},
809	{
810		.procname	= "dmesg_restrict",
811		.data		= &dmesg_restrict,
812		.maxlen		= sizeof(int),
813		.mode		= 0644,
814		.proc_handler	= proc_dointvec_minmax_sysadmin,
815		.extra1		= &zero,
816		.extra2		= &one,
817	},
818	{
819		.procname	= "kptr_restrict",
820		.data		= &kptr_restrict,
821		.maxlen		= sizeof(int),
822		.mode		= 0644,
823		.proc_handler	= proc_dointvec_minmax_sysadmin,
824		.extra1		= &zero,
825		.extra2		= &two,
826	},
827#endif
828	{
829		.procname	= "ngroups_max",
830		.data		= &ngroups_max,
831		.maxlen		= sizeof (int),
832		.mode		= 0444,
833		.proc_handler	= proc_dointvec,
834	},
835	{
836		.procname	= "cap_last_cap",
837		.data		= (void *)&cap_last_cap,
838		.maxlen		= sizeof(int),
839		.mode		= 0444,
840		.proc_handler	= proc_dointvec,
841	},
842#if defined(CONFIG_LOCKUP_DETECTOR)
843	{
844		.procname       = "watchdog",
845		.data           = &watchdog_user_enabled,
846		.maxlen         = sizeof (int),
847		.mode           = 0644,
848		.proc_handler   = proc_watchdog,
849		.extra1		= &zero,
850		.extra2		= &one,
851	},
852	{
853		.procname	= "watchdog_thresh",
854		.data		= &watchdog_thresh,
855		.maxlen		= sizeof(int),
856		.mode		= 0644,
857		.proc_handler	= proc_watchdog_thresh,
858		.extra1		= &zero,
859		.extra2		= &sixty,
860	},
861	{
862		.procname       = "nmi_watchdog",
863		.data           = &nmi_watchdog_enabled,
864		.maxlen         = sizeof (int),
865		.mode           = 0644,
866		.proc_handler   = proc_nmi_watchdog,
867		.extra1		= &zero,
868#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
869		.extra2		= &one,
870#else
871		.extra2		= &zero,
872#endif
873	},
874	{
875		.procname       = "soft_watchdog",
876		.data           = &soft_watchdog_enabled,
877		.maxlen         = sizeof (int),
878		.mode           = 0644,
879		.proc_handler   = proc_soft_watchdog,
880		.extra1		= &zero,
881		.extra2		= &one,
882	},
883	{
884		.procname	= "softlockup_panic",
885		.data		= &softlockup_panic,
886		.maxlen		= sizeof(int),
887		.mode		= 0644,
888		.proc_handler	= proc_dointvec_minmax,
889		.extra1		= &zero,
890		.extra2		= &one,
891	},
892#ifdef CONFIG_SMP
893	{
894		.procname	= "softlockup_all_cpu_backtrace",
895		.data		= &sysctl_softlockup_all_cpu_backtrace,
896		.maxlen		= sizeof(int),
897		.mode		= 0644,
898		.proc_handler	= proc_dointvec_minmax,
899		.extra1		= &zero,
900		.extra2		= &one,
901	},
902#endif /* CONFIG_SMP */
903#endif
904#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
905	{
906		.procname       = "unknown_nmi_panic",
907		.data           = &unknown_nmi_panic,
908		.maxlen         = sizeof (int),
909		.mode           = 0644,
910		.proc_handler   = proc_dointvec,
911	},
912#endif
913#if defined(CONFIG_X86)
914	{
915		.procname	= "panic_on_unrecovered_nmi",
916		.data		= &panic_on_unrecovered_nmi,
917		.maxlen		= sizeof(int),
918		.mode		= 0644,
919		.proc_handler	= proc_dointvec,
920	},
921	{
922		.procname	= "panic_on_io_nmi",
923		.data		= &panic_on_io_nmi,
924		.maxlen		= sizeof(int),
925		.mode		= 0644,
926		.proc_handler	= proc_dointvec,
927	},
928#ifdef CONFIG_DEBUG_STACKOVERFLOW
929	{
930		.procname	= "panic_on_stackoverflow",
931		.data		= &sysctl_panic_on_stackoverflow,
932		.maxlen		= sizeof(int),
933		.mode		= 0644,
934		.proc_handler	= proc_dointvec,
935	},
936#endif
937	{
938		.procname	= "bootloader_type",
939		.data		= &bootloader_type,
940		.maxlen		= sizeof (int),
941		.mode		= 0444,
942		.proc_handler	= proc_dointvec,
943	},
944	{
945		.procname	= "bootloader_version",
946		.data		= &bootloader_version,
947		.maxlen		= sizeof (int),
948		.mode		= 0444,
949		.proc_handler	= proc_dointvec,
950	},
951	{
952		.procname	= "kstack_depth_to_print",
953		.data		= &kstack_depth_to_print,
954		.maxlen		= sizeof(int),
955		.mode		= 0644,
956		.proc_handler	= proc_dointvec,
957	},
958	{
959		.procname	= "io_delay_type",
960		.data		= &io_delay_type,
961		.maxlen		= sizeof(int),
962		.mode		= 0644,
963		.proc_handler	= proc_dointvec,
964	},
965#endif
966#if defined(CONFIG_MMU)
967	{
968		.procname	= "randomize_va_space",
969		.data		= &randomize_va_space,
970		.maxlen		= sizeof(int),
971		.mode		= 0644,
972		.proc_handler	= proc_dointvec,
973	},
974#endif
975#if defined(CONFIG_S390) && defined(CONFIG_SMP)
976	{
977		.procname	= "spin_retry",
978		.data		= &spin_retry,
979		.maxlen		= sizeof (int),
980		.mode		= 0644,
981		.proc_handler	= proc_dointvec,
982	},
983#endif
984#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
985	{
986		.procname	= "acpi_video_flags",
987		.data		= &acpi_realmode_flags,
988		.maxlen		= sizeof (unsigned long),
989		.mode		= 0644,
990		.proc_handler	= proc_doulongvec_minmax,
991	},
992#endif
993#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
994	{
995		.procname	= "ignore-unaligned-usertrap",
996		.data		= &no_unaligned_warning,
997		.maxlen		= sizeof (int),
998	 	.mode		= 0644,
999		.proc_handler	= proc_dointvec,
1000	},
1001#endif
1002#ifdef CONFIG_IA64
1003	{
1004		.procname	= "unaligned-dump-stack",
1005		.data		= &unaligned_dump_stack,
1006		.maxlen		= sizeof (int),
1007		.mode		= 0644,
1008		.proc_handler	= proc_dointvec,
1009	},
1010#endif
1011#ifdef CONFIG_DETECT_HUNG_TASK
1012	{
1013		.procname	= "hung_task_panic",
1014		.data		= &sysctl_hung_task_panic,
1015		.maxlen		= sizeof(int),
1016		.mode		= 0644,
1017		.proc_handler	= proc_dointvec_minmax,
1018		.extra1		= &zero,
1019		.extra2		= &one,
1020	},
1021	{
1022		.procname	= "hung_task_check_count",
1023		.data		= &sysctl_hung_task_check_count,
1024		.maxlen		= sizeof(int),
1025		.mode		= 0644,
1026		.proc_handler	= proc_dointvec_minmax,
1027		.extra1		= &zero,
1028	},
1029	{
1030		.procname	= "hung_task_timeout_secs",
1031		.data		= &sysctl_hung_task_timeout_secs,
1032		.maxlen		= sizeof(unsigned long),
1033		.mode		= 0644,
1034		.proc_handler	= proc_dohung_task_timeout_secs,
1035		.extra2		= &hung_task_timeout_max,
1036	},
1037	{
1038		.procname	= "hung_task_warnings",
1039		.data		= &sysctl_hung_task_warnings,
1040		.maxlen		= sizeof(int),
1041		.mode		= 0644,
1042		.proc_handler	= proc_dointvec_minmax,
1043		.extra1		= &neg_one,
1044	},
1045#endif
1046#ifdef CONFIG_COMPAT
1047	{
1048		.procname	= "compat-log",
1049		.data		= &compat_log,
1050		.maxlen		= sizeof (int),
1051	 	.mode		= 0644,
1052		.proc_handler	= proc_dointvec,
1053	},
1054#endif
1055#ifdef CONFIG_RT_MUTEXES
1056	{
1057		.procname	= "max_lock_depth",
1058		.data		= &max_lock_depth,
1059		.maxlen		= sizeof(int),
1060		.mode		= 0644,
1061		.proc_handler	= proc_dointvec,
1062	},
1063#endif
1064	{
1065		.procname	= "poweroff_cmd",
1066		.data		= &poweroff_cmd,
1067		.maxlen		= POWEROFF_CMD_PATH_LEN,
1068		.mode		= 0644,
1069		.proc_handler	= proc_dostring,
1070	},
1071#ifdef CONFIG_KEYS
1072	{
1073		.procname	= "keys",
1074		.mode		= 0555,
1075		.child		= key_sysctls,
1076	},
1077#endif
1078#ifdef CONFIG_PERF_EVENTS
1079	/*
1080	 * User-space scripts rely on the existence of this file
1081	 * as a feature check for perf_events being enabled.
1082	 *
1083	 * So it's an ABI, do not remove!
1084	 */
1085	{
1086		.procname	= "perf_event_paranoid",
1087		.data		= &sysctl_perf_event_paranoid,
1088		.maxlen		= sizeof(sysctl_perf_event_paranoid),
1089		.mode		= 0644,
1090		.proc_handler	= proc_dointvec,
1091	},
1092	{
1093		.procname	= "perf_event_mlock_kb",
1094		.data		= &sysctl_perf_event_mlock,
1095		.maxlen		= sizeof(sysctl_perf_event_mlock),
1096		.mode		= 0644,
1097		.proc_handler	= proc_dointvec,
1098	},
1099	{
1100		.procname	= "perf_event_max_sample_rate",
1101		.data		= &sysctl_perf_event_sample_rate,
1102		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
1103		.mode		= 0644,
1104		.proc_handler	= perf_proc_update_handler,
1105		.extra1		= &one,
1106	},
1107	{
1108		.procname	= "perf_cpu_time_max_percent",
1109		.data		= &sysctl_perf_cpu_time_max_percent,
1110		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
1111		.mode		= 0644,
1112		.proc_handler	= perf_cpu_time_max_percent_handler,
1113		.extra1		= &zero,
1114		.extra2		= &one_hundred,
1115	},
1116#endif
1117#ifdef CONFIG_KMEMCHECK
1118	{
1119		.procname	= "kmemcheck",
1120		.data		= &kmemcheck_enabled,
1121		.maxlen		= sizeof(int),
1122		.mode		= 0644,
1123		.proc_handler	= proc_dointvec,
1124	},
1125#endif
1126	{
1127		.procname	= "panic_on_warn",
1128		.data		= &panic_on_warn,
1129		.maxlen		= sizeof(int),
1130		.mode		= 0644,
1131		.proc_handler	= proc_dointvec_minmax,
1132		.extra1		= &zero,
1133		.extra2		= &one,
1134	},
1135	{ }
1136};
1137
1138static struct ctl_table vm_table[] = {
1139	{
1140		.procname	= "overcommit_memory",
1141		.data		= &sysctl_overcommit_memory,
1142		.maxlen		= sizeof(sysctl_overcommit_memory),
1143		.mode		= 0644,
1144		.proc_handler	= proc_dointvec_minmax,
1145		.extra1		= &zero,
1146		.extra2		= &two,
1147	},
1148	{
1149		.procname	= "panic_on_oom",
1150		.data		= &sysctl_panic_on_oom,
1151		.maxlen		= sizeof(sysctl_panic_on_oom),
1152		.mode		= 0644,
1153		.proc_handler	= proc_dointvec_minmax,
1154		.extra1		= &zero,
1155		.extra2		= &two,
1156	},
1157	{
1158		.procname	= "oom_kill_allocating_task",
1159		.data		= &sysctl_oom_kill_allocating_task,
1160		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1161		.mode		= 0644,
1162		.proc_handler	= proc_dointvec,
1163	},
1164	{
1165		.procname	= "oom_dump_tasks",
1166		.data		= &sysctl_oom_dump_tasks,
1167		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1168		.mode		= 0644,
1169		.proc_handler	= proc_dointvec,
1170	},
1171	{
1172		.procname	= "overcommit_ratio",
1173		.data		= &sysctl_overcommit_ratio,
1174		.maxlen		= sizeof(sysctl_overcommit_ratio),
1175		.mode		= 0644,
1176		.proc_handler	= overcommit_ratio_handler,
1177	},
1178	{
1179		.procname	= "overcommit_kbytes",
1180		.data		= &sysctl_overcommit_kbytes,
1181		.maxlen		= sizeof(sysctl_overcommit_kbytes),
1182		.mode		= 0644,
1183		.proc_handler	= overcommit_kbytes_handler,
1184	},
1185	{
1186		.procname	= "page-cluster",
1187		.data		= &page_cluster,
1188		.maxlen		= sizeof(int),
1189		.mode		= 0644,
1190		.proc_handler	= proc_dointvec_minmax,
1191		.extra1		= &zero,
1192	},
1193	{
1194		.procname	= "dirty_background_ratio",
1195		.data		= &dirty_background_ratio,
1196		.maxlen		= sizeof(dirty_background_ratio),
1197		.mode		= 0644,
1198		.proc_handler	= dirty_background_ratio_handler,
1199		.extra1		= &zero,
1200		.extra2		= &one_hundred,
1201	},
1202	{
1203		.procname	= "dirty_background_bytes",
1204		.data		= &dirty_background_bytes,
1205		.maxlen		= sizeof(dirty_background_bytes),
1206		.mode		= 0644,
1207		.proc_handler	= dirty_background_bytes_handler,
1208		.extra1		= &one_ul,
1209	},
1210	{
1211		.procname	= "dirty_ratio",
1212		.data		= &vm_dirty_ratio,
1213		.maxlen		= sizeof(vm_dirty_ratio),
1214		.mode		= 0644,
1215		.proc_handler	= dirty_ratio_handler,
1216		.extra1		= &zero,
1217		.extra2		= &one_hundred,
1218	},
1219	{
1220		.procname	= "dirty_bytes",
1221		.data		= &vm_dirty_bytes,
1222		.maxlen		= sizeof(vm_dirty_bytes),
1223		.mode		= 0644,
1224		.proc_handler	= dirty_bytes_handler,
1225		.extra1		= &dirty_bytes_min,
1226	},
1227	{
1228		.procname	= "dirty_writeback_centisecs",
1229		.data		= &dirty_writeback_interval,
1230		.maxlen		= sizeof(dirty_writeback_interval),
1231		.mode		= 0644,
1232		.proc_handler	= dirty_writeback_centisecs_handler,
1233	},
1234	{
1235		.procname	= "dirty_expire_centisecs",
1236		.data		= &dirty_expire_interval,
1237		.maxlen		= sizeof(dirty_expire_interval),
1238		.mode		= 0644,
1239		.proc_handler	= proc_dointvec_minmax,
1240		.extra1		= &zero,
1241	},
1242	{
1243		.procname	= "dirtytime_expire_seconds",
1244		.data		= &dirtytime_expire_interval,
1245		.maxlen		= sizeof(dirty_expire_interval),
1246		.mode		= 0644,
1247		.proc_handler	= dirtytime_interval_handler,
1248		.extra1		= &zero,
1249	},
1250	{
1251		.procname       = "nr_pdflush_threads",
1252		.mode           = 0444 /* read-only */,
1253		.proc_handler   = pdflush_proc_obsolete,
1254	},
1255	{
1256		.procname	= "swappiness",
1257		.data		= &vm_swappiness,
1258		.maxlen		= sizeof(vm_swappiness),
1259		.mode		= 0644,
1260		.proc_handler	= proc_dointvec_minmax,
1261		.extra1		= &zero,
1262		.extra2		= &one_hundred,
1263	},
1264#ifdef CONFIG_HUGETLB_PAGE
1265	{
1266		.procname	= "nr_hugepages",
1267		.data		= NULL,
1268		.maxlen		= sizeof(unsigned long),
1269		.mode		= 0644,
1270		.proc_handler	= hugetlb_sysctl_handler,
1271	},
1272#ifdef CONFIG_NUMA
1273	{
1274		.procname       = "nr_hugepages_mempolicy",
1275		.data           = NULL,
1276		.maxlen         = sizeof(unsigned long),
1277		.mode           = 0644,
1278		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1279	},
1280#endif
1281	 {
1282		.procname	= "hugetlb_shm_group",
1283		.data		= &sysctl_hugetlb_shm_group,
1284		.maxlen		= sizeof(gid_t),
1285		.mode		= 0644,
1286		.proc_handler	= proc_dointvec,
1287	 },
1288	 {
1289		.procname	= "hugepages_treat_as_movable",
1290		.data		= &hugepages_treat_as_movable,
1291		.maxlen		= sizeof(int),
1292		.mode		= 0644,
1293		.proc_handler	= proc_dointvec,
1294	},
1295	{
1296		.procname	= "nr_overcommit_hugepages",
1297		.data		= NULL,
1298		.maxlen		= sizeof(unsigned long),
1299		.mode		= 0644,
1300		.proc_handler	= hugetlb_overcommit_handler,
1301	},
1302#endif
1303	{
1304		.procname	= "lowmem_reserve_ratio",
1305		.data		= &sysctl_lowmem_reserve_ratio,
1306		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1307		.mode		= 0644,
1308		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1309	},
1310	{
1311		.procname	= "drop_caches",
1312		.data		= &sysctl_drop_caches,
1313		.maxlen		= sizeof(int),
1314		.mode		= 0644,
1315		.proc_handler	= drop_caches_sysctl_handler,
1316		.extra1		= &one,
1317		.extra2		= &four,
1318	},
1319#ifdef CONFIG_COMPACTION
1320	{
1321		.procname	= "compact_memory",
1322		.data		= &sysctl_compact_memory,
1323		.maxlen		= sizeof(int),
1324		.mode		= 0200,
1325		.proc_handler	= sysctl_compaction_handler,
1326	},
1327	{
1328		.procname	= "extfrag_threshold",
1329		.data		= &sysctl_extfrag_threshold,
1330		.maxlen		= sizeof(int),
1331		.mode		= 0644,
1332		.proc_handler	= sysctl_extfrag_handler,
1333		.extra1		= &min_extfrag_threshold,
1334		.extra2		= &max_extfrag_threshold,
1335	},
1336	{
1337		.procname	= "compact_unevictable_allowed",
1338		.data		= &sysctl_compact_unevictable_allowed,
1339		.maxlen		= sizeof(int),
1340		.mode		= 0644,
1341		.proc_handler	= proc_dointvec,
1342		.extra1		= &zero,
1343		.extra2		= &one,
1344	},
1345
1346#endif /* CONFIG_COMPACTION */
1347	{
1348		.procname	= "min_free_kbytes",
1349		.data		= &min_free_kbytes,
1350		.maxlen		= sizeof(min_free_kbytes),
1351		.mode		= 0644,
1352		.proc_handler	= min_free_kbytes_sysctl_handler,
1353		.extra1		= &zero,
1354	},
1355	{
1356		.procname	= "percpu_pagelist_fraction",
1357		.data		= &percpu_pagelist_fraction,
1358		.maxlen		= sizeof(percpu_pagelist_fraction),
1359		.mode		= 0644,
1360		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1361		.extra1		= &zero,
1362	},
1363#ifdef CONFIG_MMU
1364	{
1365		.procname	= "max_map_count",
1366		.data		= &sysctl_max_map_count,
1367		.maxlen		= sizeof(sysctl_max_map_count),
1368		.mode		= 0644,
1369		.proc_handler	= proc_dointvec_minmax,
1370		.extra1		= &zero,
1371	},
1372#else
1373	{
1374		.procname	= "nr_trim_pages",
1375		.data		= &sysctl_nr_trim_pages,
1376		.maxlen		= sizeof(sysctl_nr_trim_pages),
1377		.mode		= 0644,
1378		.proc_handler	= proc_dointvec_minmax,
1379		.extra1		= &zero,
1380	},
1381#endif
1382	{
1383		.procname	= "laptop_mode",
1384		.data		= &laptop_mode,
1385		.maxlen		= sizeof(laptop_mode),
1386		.mode		= 0644,
1387		.proc_handler	= proc_dointvec_jiffies,
1388	},
1389	{
1390		.procname	= "block_dump",
1391		.data		= &block_dump,
1392		.maxlen		= sizeof(block_dump),
1393		.mode		= 0644,
1394		.proc_handler	= proc_dointvec,
1395		.extra1		= &zero,
1396	},
1397	{
1398		.procname	= "vfs_cache_pressure",
1399		.data		= &sysctl_vfs_cache_pressure,
1400		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1401		.mode		= 0644,
1402		.proc_handler	= proc_dointvec,
1403		.extra1		= &zero,
1404	},
1405#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1406	{
1407		.procname	= "legacy_va_layout",
1408		.data		= &sysctl_legacy_va_layout,
1409		.maxlen		= sizeof(sysctl_legacy_va_layout),
1410		.mode		= 0644,
1411		.proc_handler	= proc_dointvec,
1412		.extra1		= &zero,
1413	},
1414#endif
1415#ifdef CONFIG_NUMA
1416	{
1417		.procname	= "zone_reclaim_mode",
1418		.data		= &zone_reclaim_mode,
1419		.maxlen		= sizeof(zone_reclaim_mode),
1420		.mode		= 0644,
1421		.proc_handler	= proc_dointvec,
1422		.extra1		= &zero,
1423	},
1424	{
1425		.procname	= "min_unmapped_ratio",
1426		.data		= &sysctl_min_unmapped_ratio,
1427		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1428		.mode		= 0644,
1429		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1430		.extra1		= &zero,
1431		.extra2		= &one_hundred,
1432	},
1433	{
1434		.procname	= "min_slab_ratio",
1435		.data		= &sysctl_min_slab_ratio,
1436		.maxlen		= sizeof(sysctl_min_slab_ratio),
1437		.mode		= 0644,
1438		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1439		.extra1		= &zero,
1440		.extra2		= &one_hundred,
1441	},
1442#endif
1443#ifdef CONFIG_SMP
1444	{
1445		.procname	= "stat_interval",
1446		.data		= &sysctl_stat_interval,
1447		.maxlen		= sizeof(sysctl_stat_interval),
1448		.mode		= 0644,
1449		.proc_handler	= proc_dointvec_jiffies,
1450	},
1451#endif
1452#ifdef CONFIG_MMU
1453	{
1454		.procname	= "mmap_min_addr",
1455		.data		= &dac_mmap_min_addr,
1456		.maxlen		= sizeof(unsigned long),
1457		.mode		= 0644,
1458		.proc_handler	= mmap_min_addr_handler,
1459	},
1460#endif
1461#ifdef CONFIG_NUMA
1462	{
1463		.procname	= "numa_zonelist_order",
1464		.data		= &numa_zonelist_order,
1465		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1466		.mode		= 0644,
1467		.proc_handler	= numa_zonelist_order_handler,
1468	},
1469#endif
1470#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1471   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1472	{
1473		.procname	= "vdso_enabled",
1474#ifdef CONFIG_X86_32
1475		.data		= &vdso32_enabled,
1476		.maxlen		= sizeof(vdso32_enabled),
1477#else
1478		.data		= &vdso_enabled,
1479		.maxlen		= sizeof(vdso_enabled),
1480#endif
1481		.mode		= 0644,
1482		.proc_handler	= proc_dointvec,
1483		.extra1		= &zero,
1484	},
1485#endif
1486#ifdef CONFIG_HIGHMEM
1487	{
1488		.procname	= "highmem_is_dirtyable",
1489		.data		= &vm_highmem_is_dirtyable,
1490		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1491		.mode		= 0644,
1492		.proc_handler	= proc_dointvec_minmax,
1493		.extra1		= &zero,
1494		.extra2		= &one,
1495	},
1496#endif
1497#ifdef CONFIG_MEMORY_FAILURE
1498	{
1499		.procname	= "memory_failure_early_kill",
1500		.data		= &sysctl_memory_failure_early_kill,
1501		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1502		.mode		= 0644,
1503		.proc_handler	= proc_dointvec_minmax,
1504		.extra1		= &zero,
1505		.extra2		= &one,
1506	},
1507	{
1508		.procname	= "memory_failure_recovery",
1509		.data		= &sysctl_memory_failure_recovery,
1510		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1511		.mode		= 0644,
1512		.proc_handler	= proc_dointvec_minmax,
1513		.extra1		= &zero,
1514		.extra2		= &one,
1515	},
1516#endif
1517	{
1518		.procname	= "user_reserve_kbytes",
1519		.data		= &sysctl_user_reserve_kbytes,
1520		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
1521		.mode		= 0644,
1522		.proc_handler	= proc_doulongvec_minmax,
1523	},
1524	{
1525		.procname	= "admin_reserve_kbytes",
1526		.data		= &sysctl_admin_reserve_kbytes,
1527		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
1528		.mode		= 0644,
1529		.proc_handler	= proc_doulongvec_minmax,
1530	},
1531	{ }
1532};
1533
1534static struct ctl_table fs_table[] = {
1535	{
1536		.procname	= "inode-nr",
1537		.data		= &inodes_stat,
1538		.maxlen		= 2*sizeof(long),
1539		.mode		= 0444,
1540		.proc_handler	= proc_nr_inodes,
1541	},
1542	{
1543		.procname	= "inode-state",
1544		.data		= &inodes_stat,
1545		.maxlen		= 7*sizeof(long),
1546		.mode		= 0444,
1547		.proc_handler	= proc_nr_inodes,
1548	},
1549	{
1550		.procname	= "file-nr",
1551		.data		= &files_stat,
1552		.maxlen		= sizeof(files_stat),
1553		.mode		= 0444,
1554		.proc_handler	= proc_nr_files,
1555	},
1556	{
1557		.procname	= "file-max",
1558		.data		= &files_stat.max_files,
1559		.maxlen		= sizeof(files_stat.max_files),
1560		.mode		= 0644,
1561		.proc_handler	= proc_doulongvec_minmax,
1562	},
1563	{
1564		.procname	= "nr_open",
1565		.data		= &sysctl_nr_open,
1566		.maxlen		= sizeof(int),
1567		.mode		= 0644,
1568		.proc_handler	= proc_dointvec_minmax,
1569		.extra1		= &sysctl_nr_open_min,
1570		.extra2		= &sysctl_nr_open_max,
1571	},
1572	{
1573		.procname	= "dentry-state",
1574		.data		= &dentry_stat,
1575		.maxlen		= 6*sizeof(long),
1576		.mode		= 0444,
1577		.proc_handler	= proc_nr_dentry,
1578	},
1579	{
1580		.procname	= "overflowuid",
1581		.data		= &fs_overflowuid,
1582		.maxlen		= sizeof(int),
1583		.mode		= 0644,
1584		.proc_handler	= proc_dointvec_minmax,
1585		.extra1		= &minolduid,
1586		.extra2		= &maxolduid,
1587	},
1588	{
1589		.procname	= "overflowgid",
1590		.data		= &fs_overflowgid,
1591		.maxlen		= sizeof(int),
1592		.mode		= 0644,
1593		.proc_handler	= proc_dointvec_minmax,
1594		.extra1		= &minolduid,
1595		.extra2		= &maxolduid,
1596	},
1597#ifdef CONFIG_FILE_LOCKING
1598	{
1599		.procname	= "leases-enable",
1600		.data		= &leases_enable,
1601		.maxlen		= sizeof(int),
1602		.mode		= 0644,
1603		.proc_handler	= proc_dointvec,
1604	},
1605#endif
1606#ifdef CONFIG_DNOTIFY
1607	{
1608		.procname	= "dir-notify-enable",
1609		.data		= &dir_notify_enable,
1610		.maxlen		= sizeof(int),
1611		.mode		= 0644,
1612		.proc_handler	= proc_dointvec,
1613	},
1614#endif
1615#ifdef CONFIG_MMU
1616#ifdef CONFIG_FILE_LOCKING
1617	{
1618		.procname	= "lease-break-time",
1619		.data		= &lease_break_time,
1620		.maxlen		= sizeof(int),
1621		.mode		= 0644,
1622		.proc_handler	= proc_dointvec,
1623	},
1624#endif
1625#ifdef CONFIG_AIO
1626	{
1627		.procname	= "aio-nr",
1628		.data		= &aio_nr,
1629		.maxlen		= sizeof(aio_nr),
1630		.mode		= 0444,
1631		.proc_handler	= proc_doulongvec_minmax,
1632	},
1633	{
1634		.procname	= "aio-max-nr",
1635		.data		= &aio_max_nr,
1636		.maxlen		= sizeof(aio_max_nr),
1637		.mode		= 0644,
1638		.proc_handler	= proc_doulongvec_minmax,
1639	},
1640#endif /* CONFIG_AIO */
1641#ifdef CONFIG_INOTIFY_USER
1642	{
1643		.procname	= "inotify",
1644		.mode		= 0555,
1645		.child		= inotify_table,
1646	},
1647#endif
1648#ifdef CONFIG_EPOLL
1649	{
1650		.procname	= "epoll",
1651		.mode		= 0555,
1652		.child		= epoll_table,
1653	},
1654#endif
1655#endif
1656	{
1657		.procname	= "protected_symlinks",
1658		.data		= &sysctl_protected_symlinks,
1659		.maxlen		= sizeof(int),
1660		.mode		= 0600,
1661		.proc_handler	= proc_dointvec_minmax,
1662		.extra1		= &zero,
1663		.extra2		= &one,
1664	},
1665	{
1666		.procname	= "protected_hardlinks",
1667		.data		= &sysctl_protected_hardlinks,
1668		.maxlen		= sizeof(int),
1669		.mode		= 0600,
1670		.proc_handler	= proc_dointvec_minmax,
1671		.extra1		= &zero,
1672		.extra2		= &one,
1673	},
1674	{
1675		.procname	= "suid_dumpable",
1676		.data		= &suid_dumpable,
1677		.maxlen		= sizeof(int),
1678		.mode		= 0644,
1679		.proc_handler	= proc_dointvec_minmax_coredump,
1680		.extra1		= &zero,
1681		.extra2		= &two,
1682	},
1683#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1684	{
1685		.procname	= "binfmt_misc",
1686		.mode		= 0555,
1687		.child		= sysctl_mount_point,
1688	},
1689#endif
1690	{
1691		.procname	= "pipe-max-size",
1692		.data		= &pipe_max_size,
1693		.maxlen		= sizeof(int),
1694		.mode		= 0644,
1695		.proc_handler	= &pipe_proc_fn,
1696		.extra1		= &pipe_min_size,
1697	},
1698	{ }
1699};
1700
1701static struct ctl_table debug_table[] = {
1702#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1703	{
1704		.procname	= "exception-trace",
1705		.data		= &show_unhandled_signals,
1706		.maxlen		= sizeof(int),
1707		.mode		= 0644,
1708		.proc_handler	= proc_dointvec
1709	},
1710#endif
1711#if defined(CONFIG_OPTPROBES)
1712	{
1713		.procname	= "kprobes-optimization",
1714		.data		= &sysctl_kprobes_optimization,
1715		.maxlen		= sizeof(int),
1716		.mode		= 0644,
1717		.proc_handler	= proc_kprobes_optimization_handler,
1718		.extra1		= &zero,
1719		.extra2		= &one,
1720	},
1721#endif
1722	{ }
1723};
1724
1725static struct ctl_table dev_table[] = {
1726	{ }
1727};
1728
1729int __init sysctl_init(void)
1730{
1731	struct ctl_table_header *hdr;
1732
1733	hdr = register_sysctl_table(sysctl_base_table);
1734	kmemleak_not_leak(hdr);
1735	return 0;
1736}
1737
1738#endif /* CONFIG_SYSCTL */
1739
1740/*
1741 * /proc/sys support
1742 */
1743
1744#ifdef CONFIG_PROC_SYSCTL
1745
1746static int _proc_do_string(char *data, int maxlen, int write,
1747			   char __user *buffer,
1748			   size_t *lenp, loff_t *ppos)
1749{
1750	size_t len;
1751	char __user *p;
1752	char c;
1753
1754	if (!data || !maxlen || !*lenp) {
1755		*lenp = 0;
1756		return 0;
1757	}
1758
1759	if (write) {
1760		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1761			/* Only continue writes not past the end of buffer. */
1762			len = strlen(data);
1763			if (len > maxlen - 1)
1764				len = maxlen - 1;
1765
1766			if (*ppos > len)
1767				return 0;
1768			len = *ppos;
1769		} else {
1770			/* Start writing from beginning of buffer. */
1771			len = 0;
1772		}
1773
1774		*ppos += *lenp;
1775		p = buffer;
1776		while ((p - buffer) < *lenp && len < maxlen - 1) {
1777			if (get_user(c, p++))
1778				return -EFAULT;
1779			if (c == 0 || c == '\n')
1780				break;
1781			data[len++] = c;
1782		}
1783		data[len] = 0;
1784	} else {
1785		len = strlen(data);
1786		if (len > maxlen)
1787			len = maxlen;
1788
1789		if (*ppos > len) {
1790			*lenp = 0;
1791			return 0;
1792		}
1793
1794		data += *ppos;
1795		len  -= *ppos;
1796
1797		if (len > *lenp)
1798			len = *lenp;
1799		if (len)
1800			if (copy_to_user(buffer, data, len))
1801				return -EFAULT;
1802		if (len < *lenp) {
1803			if (put_user('\n', buffer + len))
1804				return -EFAULT;
1805			len++;
1806		}
1807		*lenp = len;
1808		*ppos += len;
1809	}
1810	return 0;
1811}
1812
1813static void warn_sysctl_write(struct ctl_table *table)
1814{
1815	pr_warn_once("%s wrote to %s when file position was not 0!\n"
1816		"This will not be supported in the future. To silence this\n"
1817		"warning, set kernel.sysctl_writes_strict = -1\n",
1818		current->comm, table->procname);
1819}
1820
1821/**
1822 * proc_dostring - read a string sysctl
1823 * @table: the sysctl table
1824 * @write: %TRUE if this is a write to the sysctl file
1825 * @buffer: the user buffer
1826 * @lenp: the size of the user buffer
1827 * @ppos: file position
1828 *
1829 * Reads/writes a string from/to the user buffer. If the kernel
1830 * buffer provided is not large enough to hold the string, the
1831 * string is truncated. The copied string is %NULL-terminated.
1832 * If the string is being read by the user process, it is copied
1833 * and a newline '\n' is added. It is truncated if the buffer is
1834 * not large enough.
1835 *
1836 * Returns 0 on success.
1837 */
1838int proc_dostring(struct ctl_table *table, int write,
1839		  void __user *buffer, size_t *lenp, loff_t *ppos)
1840{
1841	if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
1842		warn_sysctl_write(table);
1843
1844	return _proc_do_string((char *)(table->data), table->maxlen, write,
1845			       (char __user *)buffer, lenp, ppos);
1846}
1847
1848static size_t proc_skip_spaces(char **buf)
1849{
1850	size_t ret;
1851	char *tmp = skip_spaces(*buf);
1852	ret = tmp - *buf;
1853	*buf = tmp;
1854	return ret;
1855}
1856
1857static void proc_skip_char(char **buf, size_t *size, const char v)
1858{
1859	while (*size) {
1860		if (**buf != v)
1861			break;
1862		(*size)--;
1863		(*buf)++;
1864	}
1865}
1866
1867#define TMPBUFLEN 22
1868/**
1869 * proc_get_long - reads an ASCII formatted integer from a user buffer
1870 *
1871 * @buf: a kernel buffer
1872 * @size: size of the kernel buffer
1873 * @val: this is where the number will be stored
1874 * @neg: set to %TRUE if number is negative
1875 * @perm_tr: a vector which contains the allowed trailers
1876 * @perm_tr_len: size of the perm_tr vector
1877 * @tr: pointer to store the trailer character
1878 *
1879 * In case of success %0 is returned and @buf and @size are updated with
1880 * the amount of bytes read. If @tr is non-NULL and a trailing
1881 * character exists (size is non-zero after returning from this
1882 * function), @tr is updated with the trailing character.
1883 */
1884static int proc_get_long(char **buf, size_t *size,
1885			  unsigned long *val, bool *neg,
1886			  const char *perm_tr, unsigned perm_tr_len, char *tr)
1887{
1888	int len;
1889	char *p, tmp[TMPBUFLEN];
1890
1891	if (!*size)
1892		return -EINVAL;
1893
1894	len = *size;
1895	if (len > TMPBUFLEN - 1)
1896		len = TMPBUFLEN - 1;
1897
1898	memcpy(tmp, *buf, len);
1899
1900	tmp[len] = 0;
1901	p = tmp;
1902	if (*p == '-' && *size > 1) {
1903		*neg = true;
1904		p++;
1905	} else
1906		*neg = false;
1907	if (!isdigit(*p))
1908		return -EINVAL;
1909
1910	*val = simple_strtoul(p, &p, 0);
1911
1912	len = p - tmp;
1913
1914	/* We don't know if the next char is whitespace thus we may accept
1915	 * invalid integers (e.g. 1234...a) or two integers instead of one
1916	 * (e.g. 123...1). So lets not allow such large numbers. */
1917	if (len == TMPBUFLEN - 1)
1918		return -EINVAL;
1919
1920	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
1921		return -EINVAL;
1922
1923	if (tr && (len < *size))
1924		*tr = *p;
1925
1926	*buf += len;
1927	*size -= len;
1928
1929	return 0;
1930}
1931
1932/**
1933 * proc_put_long - converts an integer to a decimal ASCII formatted string
1934 *
1935 * @buf: the user buffer
1936 * @size: the size of the user buffer
1937 * @val: the integer to be converted
1938 * @neg: sign of the number, %TRUE for negative
1939 *
1940 * In case of success %0 is returned and @buf and @size are updated with
1941 * the amount of bytes written.
1942 */
1943static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
1944			  bool neg)
1945{
1946	int len;
1947	char tmp[TMPBUFLEN], *p = tmp;
1948
1949	sprintf(p, "%s%lu", neg ? "-" : "", val);
1950	len = strlen(tmp);
1951	if (len > *size)
1952		len = *size;
1953	if (copy_to_user(*buf, tmp, len))
1954		return -EFAULT;
1955	*size -= len;
1956	*buf += len;
1957	return 0;
1958}
1959#undef TMPBUFLEN
1960
1961static int proc_put_char(void __user **buf, size_t *size, char c)
1962{
1963	if (*size) {
1964		char __user **buffer = (char __user **)buf;
1965		if (put_user(c, *buffer))
1966			return -EFAULT;
1967		(*size)--, (*buffer)++;
1968		*buf = *buffer;
1969	}
1970	return 0;
1971}
1972
1973static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
1974				 int *valp,
1975				 int write, void *data)
1976{
1977	if (write) {
1978		if (*negp) {
1979			if (*lvalp > (unsigned long) INT_MAX + 1)
1980				return -EINVAL;
1981			*valp = -*lvalp;
1982		} else {
1983			if (*lvalp > (unsigned long) INT_MAX)
1984				return -EINVAL;
1985			*valp = *lvalp;
1986		}
1987	} else {
1988		int val = *valp;
1989		if (val < 0) {
1990			*negp = true;
1991			*lvalp = (unsigned long)-val;
1992		} else {
1993			*negp = false;
1994			*lvalp = (unsigned long)val;
1995		}
1996	}
1997	return 0;
1998}
1999
2000static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2001
2002static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2003		  int write, void __user *buffer,
2004		  size_t *lenp, loff_t *ppos,
2005		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2006			      int write, void *data),
2007		  void *data)
2008{
2009	int *i, vleft, first = 1, err = 0;
2010	unsigned long page = 0;
2011	size_t left;
2012	char *kbuf;
2013
2014	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2015		*lenp = 0;
2016		return 0;
2017	}
2018
2019	i = (int *) tbl_data;
2020	vleft = table->maxlen / sizeof(*i);
2021	left = *lenp;
2022
2023	if (!conv)
2024		conv = do_proc_dointvec_conv;
2025
2026	if (write) {
2027		if (*ppos) {
2028			switch (sysctl_writes_strict) {
2029			case SYSCTL_WRITES_STRICT:
2030				goto out;
2031			case SYSCTL_WRITES_WARN:
2032				warn_sysctl_write(table);
2033				break;
2034			default:
2035				break;
2036			}
2037		}
2038
2039		if (left > PAGE_SIZE - 1)
2040			left = PAGE_SIZE - 1;
2041		page = __get_free_page(GFP_TEMPORARY);
2042		kbuf = (char *) page;
2043		if (!kbuf)
2044			return -ENOMEM;
2045		if (copy_from_user(kbuf, buffer, left)) {
2046			err = -EFAULT;
2047			goto free;
2048		}
2049		kbuf[left] = 0;
2050	}
2051
2052	for (; left && vleft--; i++, first=0) {
2053		unsigned long lval;
2054		bool neg;
2055
2056		if (write) {
2057			left -= proc_skip_spaces(&kbuf);
2058
2059			if (!left)
2060				break;
2061			err = proc_get_long(&kbuf, &left, &lval, &neg,
2062					     proc_wspace_sep,
2063					     sizeof(proc_wspace_sep), NULL);
2064			if (err)
2065				break;
2066			if (conv(&neg, &lval, i, 1, data)) {
2067				err = -EINVAL;
2068				break;
2069			}
2070		} else {
2071			if (conv(&neg, &lval, i, 0, data)) {
2072				err = -EINVAL;
2073				break;
2074			}
2075			if (!first)
2076				err = proc_put_char(&buffer, &left, '\t');
2077			if (err)
2078				break;
2079			err = proc_put_long(&buffer, &left, lval, neg);
2080			if (err)
2081				break;
2082		}
2083	}
2084
2085	if (!write && !first && left && !err)
2086		err = proc_put_char(&buffer, &left, '\n');
2087	if (write && !err && left)
2088		left -= proc_skip_spaces(&kbuf);
2089free:
2090	if (write) {
2091		free_page(page);
2092		if (first)
2093			return err ? : -EINVAL;
2094	}
2095	*lenp -= left;
2096out:
2097	*ppos += *lenp;
2098	return err;
2099}
2100
2101static int do_proc_dointvec(struct ctl_table *table, int write,
2102		  void __user *buffer, size_t *lenp, loff_t *ppos,
2103		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2104			      int write, void *data),
2105		  void *data)
2106{
2107	return __do_proc_dointvec(table->data, table, write,
2108			buffer, lenp, ppos, conv, data);
2109}
2110
2111/**
2112 * proc_dointvec - read a vector of integers
2113 * @table: the sysctl table
2114 * @write: %TRUE if this is a write to the sysctl file
2115 * @buffer: the user buffer
2116 * @lenp: the size of the user buffer
2117 * @ppos: file position
2118 *
2119 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2120 * values from/to the user buffer, treated as an ASCII string.
2121 *
2122 * Returns 0 on success.
2123 */
2124int proc_dointvec(struct ctl_table *table, int write,
2125		     void __user *buffer, size_t *lenp, loff_t *ppos)
2126{
2127    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2128		    	    NULL,NULL);
2129}
2130
2131/*
2132 * Taint values can only be increased
2133 * This means we can safely use a temporary.
2134 */
2135static int proc_taint(struct ctl_table *table, int write,
2136			       void __user *buffer, size_t *lenp, loff_t *ppos)
2137{
2138	struct ctl_table t;
2139	unsigned long tmptaint = get_taint();
2140	int err;
2141
2142	if (write && !capable(CAP_SYS_ADMIN))
2143		return -EPERM;
2144
2145	t = *table;
2146	t.data = &tmptaint;
2147	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2148	if (err < 0)
2149		return err;
2150
2151	if (write) {
2152		/*
2153		 * Poor man's atomic or. Not worth adding a primitive
2154		 * to everyone's atomic.h for this
2155		 */
2156		int i;
2157		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2158			if ((tmptaint >> i) & 1)
2159				add_taint(i, LOCKDEP_STILL_OK);
2160		}
2161	}
2162
2163	return err;
2164}
2165
2166#ifdef CONFIG_PRINTK
2167static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2168				void __user *buffer, size_t *lenp, loff_t *ppos)
2169{
2170	if (write && !capable(CAP_SYS_ADMIN))
2171		return -EPERM;
2172
2173	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2174}
2175#endif
2176
2177struct do_proc_dointvec_minmax_conv_param {
2178	int *min;
2179	int *max;
2180};
2181
2182static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2183					int *valp,
2184					int write, void *data)
2185{
2186	struct do_proc_dointvec_minmax_conv_param *param = data;
2187	if (write) {
2188		int val = *negp ? -*lvalp : *lvalp;
2189		if ((param->min && *param->min > val) ||
2190		    (param->max && *param->max < val))
2191			return -EINVAL;
2192		*valp = val;
2193	} else {
2194		int val = *valp;
2195		if (val < 0) {
2196			*negp = true;
2197			*lvalp = (unsigned long)-val;
2198		} else {
2199			*negp = false;
2200			*lvalp = (unsigned long)val;
2201		}
2202	}
2203	return 0;
2204}
2205
2206/**
2207 * proc_dointvec_minmax - read a vector of integers with min/max values
2208 * @table: the sysctl table
2209 * @write: %TRUE if this is a write to the sysctl file
2210 * @buffer: the user buffer
2211 * @lenp: the size of the user buffer
2212 * @ppos: file position
2213 *
2214 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2215 * values from/to the user buffer, treated as an ASCII string.
2216 *
2217 * This routine will ensure the values are within the range specified by
2218 * table->extra1 (min) and table->extra2 (max).
2219 *
2220 * Returns 0 on success.
2221 */
2222int proc_dointvec_minmax(struct ctl_table *table, int write,
2223		  void __user *buffer, size_t *lenp, loff_t *ppos)
2224{
2225	struct do_proc_dointvec_minmax_conv_param param = {
2226		.min = (int *) table->extra1,
2227		.max = (int *) table->extra2,
2228	};
2229	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2230				do_proc_dointvec_minmax_conv, &param);
2231}
2232
2233static void validate_coredump_safety(void)
2234{
2235#ifdef CONFIG_COREDUMP
2236	if (suid_dumpable == SUID_DUMP_ROOT &&
2237	    core_pattern[0] != '/' && core_pattern[0] != '|') {
2238		printk(KERN_WARNING "Unsafe core_pattern used with "\
2239			"suid_dumpable=2. Pipe handler or fully qualified "\
2240			"core dump path required.\n");
2241	}
2242#endif
2243}
2244
2245static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2246		void __user *buffer, size_t *lenp, loff_t *ppos)
2247{
2248	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2249	if (!error)
2250		validate_coredump_safety();
2251	return error;
2252}
2253
2254#ifdef CONFIG_COREDUMP
2255static int proc_dostring_coredump(struct ctl_table *table, int write,
2256		  void __user *buffer, size_t *lenp, loff_t *ppos)
2257{
2258	int error = proc_dostring(table, write, buffer, lenp, ppos);
2259	if (!error)
2260		validate_coredump_safety();
2261	return error;
2262}
2263#endif
2264
2265static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2266				     void __user *buffer,
2267				     size_t *lenp, loff_t *ppos,
2268				     unsigned long convmul,
2269				     unsigned long convdiv)
2270{
2271	unsigned long *i, *min, *max;
2272	int vleft, first = 1, err = 0;
2273	unsigned long page = 0;
2274	size_t left;
2275	char *kbuf;
2276
2277	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2278		*lenp = 0;
2279		return 0;
2280	}
2281
2282	i = (unsigned long *) data;
2283	min = (unsigned long *) table->extra1;
2284	max = (unsigned long *) table->extra2;
2285	vleft = table->maxlen / sizeof(unsigned long);
2286	left = *lenp;
2287
2288	if (write) {
2289		if (*ppos) {
2290			switch (sysctl_writes_strict) {
2291			case SYSCTL_WRITES_STRICT:
2292				goto out;
2293			case SYSCTL_WRITES_WARN:
2294				warn_sysctl_write(table);
2295				break;
2296			default:
2297				break;
2298			}
2299		}
2300
2301		if (left > PAGE_SIZE - 1)
2302			left = PAGE_SIZE - 1;
2303		page = __get_free_page(GFP_TEMPORARY);
2304		kbuf = (char *) page;
2305		if (!kbuf)
2306			return -ENOMEM;
2307		if (copy_from_user(kbuf, buffer, left)) {
2308			err = -EFAULT;
2309			goto free;
2310		}
2311		kbuf[left] = 0;
2312	}
2313
2314	for (; left && vleft--; i++, first = 0) {
2315		unsigned long val;
2316
2317		if (write) {
2318			bool neg;
2319
2320			left -= proc_skip_spaces(&kbuf);
2321
2322			err = proc_get_long(&kbuf, &left, &val, &neg,
2323					     proc_wspace_sep,
2324					     sizeof(proc_wspace_sep), NULL);
2325			if (err)
2326				break;
2327			if (neg)
2328				continue;
2329			if ((min && val < *min) || (max && val > *max))
2330				continue;
2331			*i = val;
2332		} else {
2333			val = convdiv * (*i) / convmul;
2334			if (!first) {
2335				err = proc_put_char(&buffer, &left, '\t');
2336				if (err)
2337					break;
2338			}
2339			err = proc_put_long(&buffer, &left, val, false);
2340			if (err)
2341				break;
2342		}
2343	}
2344
2345	if (!write && !first && left && !err)
2346		err = proc_put_char(&buffer, &left, '\n');
2347	if (write && !err)
2348		left -= proc_skip_spaces(&kbuf);
2349free:
2350	if (write) {
2351		free_page(page);
2352		if (first)
2353			return err ? : -EINVAL;
2354	}
2355	*lenp -= left;
2356out:
2357	*ppos += *lenp;
2358	return err;
2359}
2360
2361static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2362				     void __user *buffer,
2363				     size_t *lenp, loff_t *ppos,
2364				     unsigned long convmul,
2365				     unsigned long convdiv)
2366{
2367	return __do_proc_doulongvec_minmax(table->data, table, write,
2368			buffer, lenp, ppos, convmul, convdiv);
2369}
2370
2371/**
2372 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2373 * @table: the sysctl table
2374 * @write: %TRUE if this is a write to the sysctl file
2375 * @buffer: the user buffer
2376 * @lenp: the size of the user buffer
2377 * @ppos: file position
2378 *
2379 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2380 * values from/to the user buffer, treated as an ASCII string.
2381 *
2382 * This routine will ensure the values are within the range specified by
2383 * table->extra1 (min) and table->extra2 (max).
2384 *
2385 * Returns 0 on success.
2386 */
2387int proc_doulongvec_minmax(struct ctl_table *table, int write,
2388			   void __user *buffer, size_t *lenp, loff_t *ppos)
2389{
2390    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2391}
2392
2393/**
2394 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2395 * @table: the sysctl table
2396 * @write: %TRUE if this is a write to the sysctl file
2397 * @buffer: the user buffer
2398 * @lenp: the size of the user buffer
2399 * @ppos: file position
2400 *
2401 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2402 * values from/to the user buffer, treated as an ASCII string. The values
2403 * are treated as milliseconds, and converted to jiffies when they are stored.
2404 *
2405 * This routine will ensure the values are within the range specified by
2406 * table->extra1 (min) and table->extra2 (max).
2407 *
2408 * Returns 0 on success.
2409 */
2410int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2411				      void __user *buffer,
2412				      size_t *lenp, loff_t *ppos)
2413{
2414    return do_proc_doulongvec_minmax(table, write, buffer,
2415				     lenp, ppos, HZ, 1000l);
2416}
2417
2418
2419static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2420					 int *valp,
2421					 int write, void *data)
2422{
2423	if (write) {
2424		if (*lvalp > LONG_MAX / HZ)
2425			return 1;
2426		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2427	} else {
2428		int val = *valp;
2429		unsigned long lval;
2430		if (val < 0) {
2431			*negp = true;
2432			lval = (unsigned long)-val;
2433		} else {
2434			*negp = false;
2435			lval = (unsigned long)val;
2436		}
2437		*lvalp = lval / HZ;
2438	}
2439	return 0;
2440}
2441
2442static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2443						int *valp,
2444						int write, void *data)
2445{
2446	if (write) {
2447		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2448			return 1;
2449		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2450	} else {
2451		int val = *valp;
2452		unsigned long lval;
2453		if (val < 0) {
2454			*negp = true;
2455			lval = (unsigned long)-val;
2456		} else {
2457			*negp = false;
2458			lval = (unsigned long)val;
2459		}
2460		*lvalp = jiffies_to_clock_t(lval);
2461	}
2462	return 0;
2463}
2464
2465static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2466					    int *valp,
2467					    int write, void *data)
2468{
2469	if (write) {
2470		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2471
2472		if (jif > INT_MAX)
2473			return 1;
2474		*valp = (int)jif;
2475	} else {
2476		int val = *valp;
2477		unsigned long lval;
2478		if (val < 0) {
2479			*negp = true;
2480			lval = (unsigned long)-val;
2481		} else {
2482			*negp = false;
2483			lval = (unsigned long)val;
2484		}
2485		*lvalp = jiffies_to_msecs(lval);
2486	}
2487	return 0;
2488}
2489
2490/**
2491 * proc_dointvec_jiffies - read a vector of integers as seconds
2492 * @table: the sysctl table
2493 * @write: %TRUE if this is a write to the sysctl file
2494 * @buffer: the user buffer
2495 * @lenp: the size of the user buffer
2496 * @ppos: file position
2497 *
2498 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2499 * values from/to the user buffer, treated as an ASCII string.
2500 * The values read are assumed to be in seconds, and are converted into
2501 * jiffies.
2502 *
2503 * Returns 0 on success.
2504 */
2505int proc_dointvec_jiffies(struct ctl_table *table, int write,
2506			  void __user *buffer, size_t *lenp, loff_t *ppos)
2507{
2508    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2509		    	    do_proc_dointvec_jiffies_conv,NULL);
2510}
2511
2512/**
2513 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2514 * @table: the sysctl table
2515 * @write: %TRUE if this is a write to the sysctl file
2516 * @buffer: the user buffer
2517 * @lenp: the size of the user buffer
2518 * @ppos: pointer to the file position
2519 *
2520 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2521 * values from/to the user buffer, treated as an ASCII string.
2522 * The values read are assumed to be in 1/USER_HZ seconds, and
2523 * are converted into jiffies.
2524 *
2525 * Returns 0 on success.
2526 */
2527int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2528				 void __user *buffer, size_t *lenp, loff_t *ppos)
2529{
2530    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2531		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2532}
2533
2534/**
2535 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2536 * @table: the sysctl table
2537 * @write: %TRUE if this is a write to the sysctl file
2538 * @buffer: the user buffer
2539 * @lenp: the size of the user buffer
2540 * @ppos: file position
2541 * @ppos: the current position in the file
2542 *
2543 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2544 * values from/to the user buffer, treated as an ASCII string.
2545 * The values read are assumed to be in 1/1000 seconds, and
2546 * are converted into jiffies.
2547 *
2548 * Returns 0 on success.
2549 */
2550int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2551			     void __user *buffer, size_t *lenp, loff_t *ppos)
2552{
2553	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2554				do_proc_dointvec_ms_jiffies_conv, NULL);
2555}
2556
2557static int proc_do_cad_pid(struct ctl_table *table, int write,
2558			   void __user *buffer, size_t *lenp, loff_t *ppos)
2559{
2560	struct pid *new_pid;
2561	pid_t tmp;
2562	int r;
2563
2564	tmp = pid_vnr(cad_pid);
2565
2566	r = __do_proc_dointvec(&tmp, table, write, buffer,
2567			       lenp, ppos, NULL, NULL);
2568	if (r || !write)
2569		return r;
2570
2571	new_pid = find_get_pid(tmp);
2572	if (!new_pid)
2573		return -ESRCH;
2574
2575	put_pid(xchg(&cad_pid, new_pid));
2576	return 0;
2577}
2578
2579/**
2580 * proc_do_large_bitmap - read/write from/to a large bitmap
2581 * @table: the sysctl table
2582 * @write: %TRUE if this is a write to the sysctl file
2583 * @buffer: the user buffer
2584 * @lenp: the size of the user buffer
2585 * @ppos: file position
2586 *
2587 * The bitmap is stored at table->data and the bitmap length (in bits)
2588 * in table->maxlen.
2589 *
2590 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2591 * large bitmaps may be represented in a compact manner. Writing into
2592 * the file will clear the bitmap then update it with the given input.
2593 *
2594 * Returns 0 on success.
2595 */
2596int proc_do_large_bitmap(struct ctl_table *table, int write,
2597			 void __user *buffer, size_t *lenp, loff_t *ppos)
2598{
2599	int err = 0;
2600	bool first = 1;
2601	size_t left = *lenp;
2602	unsigned long bitmap_len = table->maxlen;
2603	unsigned long *bitmap = *(unsigned long **) table->data;
2604	unsigned long *tmp_bitmap = NULL;
2605	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2606
2607	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
2608		*lenp = 0;
2609		return 0;
2610	}
2611
2612	if (write) {
2613		unsigned long page = 0;
2614		char *kbuf;
2615
2616		if (left > PAGE_SIZE - 1)
2617			left = PAGE_SIZE - 1;
2618
2619		page = __get_free_page(GFP_TEMPORARY);
2620		kbuf = (char *) page;
2621		if (!kbuf)
2622			return -ENOMEM;
2623		if (copy_from_user(kbuf, buffer, left)) {
2624			free_page(page);
2625			return -EFAULT;
2626                }
2627		kbuf[left] = 0;
2628
2629		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2630				     GFP_KERNEL);
2631		if (!tmp_bitmap) {
2632			free_page(page);
2633			return -ENOMEM;
2634		}
2635		proc_skip_char(&kbuf, &left, '\n');
2636		while (!err && left) {
2637			unsigned long val_a, val_b;
2638			bool neg;
2639
2640			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2641					     sizeof(tr_a), &c);
2642			if (err)
2643				break;
2644			if (val_a >= bitmap_len || neg) {
2645				err = -EINVAL;
2646				break;
2647			}
2648
2649			val_b = val_a;
2650			if (left) {
2651				kbuf++;
2652				left--;
2653			}
2654
2655			if (c == '-') {
2656				err = proc_get_long(&kbuf, &left, &val_b,
2657						     &neg, tr_b, sizeof(tr_b),
2658						     &c);
2659				if (err)
2660					break;
2661				if (val_b >= bitmap_len || neg ||
2662				    val_a > val_b) {
2663					err = -EINVAL;
2664					break;
2665				}
2666				if (left) {
2667					kbuf++;
2668					left--;
2669				}
2670			}
2671
2672			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
2673			first = 0;
2674			proc_skip_char(&kbuf, &left, '\n');
2675		}
2676		free_page(page);
2677	} else {
2678		unsigned long bit_a, bit_b = 0;
2679
2680		while (left) {
2681			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2682			if (bit_a >= bitmap_len)
2683				break;
2684			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2685						   bit_a + 1) - 1;
2686
2687			if (!first) {
2688				err = proc_put_char(&buffer, &left, ',');
2689				if (err)
2690					break;
2691			}
2692			err = proc_put_long(&buffer, &left, bit_a, false);
2693			if (err)
2694				break;
2695			if (bit_a != bit_b) {
2696				err = proc_put_char(&buffer, &left, '-');
2697				if (err)
2698					break;
2699				err = proc_put_long(&buffer, &left, bit_b, false);
2700				if (err)
2701					break;
2702			}
2703
2704			first = 0; bit_b++;
2705		}
2706		if (!err)
2707			err = proc_put_char(&buffer, &left, '\n');
2708	}
2709
2710	if (!err) {
2711		if (write) {
2712			if (*ppos)
2713				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2714			else
2715				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
2716		}
2717		kfree(tmp_bitmap);
2718		*lenp -= left;
2719		*ppos += *lenp;
2720		return 0;
2721	} else {
2722		kfree(tmp_bitmap);
2723		return err;
2724	}
2725}
2726
2727#else /* CONFIG_PROC_SYSCTL */
2728
2729int proc_dostring(struct ctl_table *table, int write,
2730		  void __user *buffer, size_t *lenp, loff_t *ppos)
2731{
2732	return -ENOSYS;
2733}
2734
2735int proc_dointvec(struct ctl_table *table, int write,
2736		  void __user *buffer, size_t *lenp, loff_t *ppos)
2737{
2738	return -ENOSYS;
2739}
2740
2741int proc_dointvec_minmax(struct ctl_table *table, int write,
2742		    void __user *buffer, size_t *lenp, loff_t *ppos)
2743{
2744	return -ENOSYS;
2745}
2746
2747int proc_dointvec_jiffies(struct ctl_table *table, int write,
2748		    void __user *buffer, size_t *lenp, loff_t *ppos)
2749{
2750	return -ENOSYS;
2751}
2752
2753int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2754		    void __user *buffer, size_t *lenp, loff_t *ppos)
2755{
2756	return -ENOSYS;
2757}
2758
2759int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2760			     void __user *buffer, size_t *lenp, loff_t *ppos)
2761{
2762	return -ENOSYS;
2763}
2764
2765int proc_doulongvec_minmax(struct ctl_table *table, int write,
2766		    void __user *buffer, size_t *lenp, loff_t *ppos)
2767{
2768	return -ENOSYS;
2769}
2770
2771int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2772				      void __user *buffer,
2773				      size_t *lenp, loff_t *ppos)
2774{
2775    return -ENOSYS;
2776}
2777
2778
2779#endif /* CONFIG_PROC_SYSCTL */
2780
2781/*
2782 * No sense putting this after each symbol definition, twice,
2783 * exception granted :-)
2784 */
2785EXPORT_SYMBOL(proc_dointvec);
2786EXPORT_SYMBOL(proc_dointvec_jiffies);
2787EXPORT_SYMBOL(proc_dointvec_minmax);
2788EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2789EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2790EXPORT_SYMBOL(proc_dostring);
2791EXPORT_SYMBOL(proc_doulongvec_minmax);
2792EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2793