1/*
2 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
3 *
4 */
5#include <linux/stacktrace.h>
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/uaccess.h>
10#include <linux/ftrace.h>
11#include <linux/module.h>
12#include <linux/sysctl.h>
13#include <linux/init.h>
14
15#include <asm/setup.h>
16
17#include "trace.h"
18
19#define STACK_TRACE_ENTRIES 500
20
21#ifdef CC_USING_FENTRY
22# define fentry		1
23#else
24# define fentry		0
25#endif
26
27static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
28	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
29static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
30
31/*
32 * Reserve one entry for the passed in ip. This will allow
33 * us to remove most or all of the stack size overhead
34 * added by the stack tracer itself.
35 */
36static struct stack_trace max_stack_trace = {
37	.max_entries		= STACK_TRACE_ENTRIES - 1,
38	.entries		= &stack_dump_trace[1],
39};
40
41static unsigned long max_stack_size;
42static arch_spinlock_t max_stack_lock =
43	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
44
45static DEFINE_PER_CPU(int, trace_active);
46static DEFINE_MUTEX(stack_sysctl_mutex);
47
48int stack_tracer_enabled;
49static int last_stack_tracer_enabled;
50
51static inline void print_max_stack(void)
52{
53	long i;
54	int size;
55
56	pr_emerg("        Depth    Size   Location    (%d entries)\n"
57			   "        -----    ----   --------\n",
58			   max_stack_trace.nr_entries - 1);
59
60	for (i = 0; i < max_stack_trace.nr_entries; i++) {
61		if (stack_dump_trace[i] == ULONG_MAX)
62			break;
63		if (i+1 == max_stack_trace.nr_entries ||
64				stack_dump_trace[i+1] == ULONG_MAX)
65			size = stack_dump_index[i];
66		else
67			size = stack_dump_index[i] - stack_dump_index[i+1];
68
69		pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_dump_index[i],
70				size, (void *)stack_dump_trace[i]);
71	}
72}
73
74static inline void
75check_stack(unsigned long ip, unsigned long *stack)
76{
77	unsigned long this_size, flags; unsigned long *p, *top, *start;
78	static int tracer_frame;
79	int frame_size = ACCESS_ONCE(tracer_frame);
80	int i;
81
82	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
83	this_size = THREAD_SIZE - this_size;
84	/* Remove the frame of the tracer */
85	this_size -= frame_size;
86
87	if (this_size <= max_stack_size)
88		return;
89
90	/* we do not handle interrupt stacks yet */
91	if (!object_is_on_stack(stack))
92		return;
93
94	local_irq_save(flags);
95	arch_spin_lock(&max_stack_lock);
96
97	/* In case another CPU set the tracer_frame on us */
98	if (unlikely(!frame_size))
99		this_size -= tracer_frame;
100
101	/* a race could have already updated it */
102	if (this_size <= max_stack_size)
103		goto out;
104
105	max_stack_size = this_size;
106
107	max_stack_trace.nr_entries = 0;
108
109	if (using_ftrace_ops_list_func())
110		max_stack_trace.skip = 4;
111	else
112		max_stack_trace.skip = 3;
113
114	save_stack_trace(&max_stack_trace);
115
116	/*
117	 * Add the passed in ip from the function tracer.
118	 * Searching for this on the stack will skip over
119	 * most of the overhead from the stack tracer itself.
120	 */
121	stack_dump_trace[0] = ip;
122	max_stack_trace.nr_entries++;
123
124	/*
125	 * Now find where in the stack these are.
126	 */
127	i = 0;
128	start = stack;
129	top = (unsigned long *)
130		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
131
132	/*
133	 * Loop through all the entries. One of the entries may
134	 * for some reason be missed on the stack, so we may
135	 * have to account for them. If they are all there, this
136	 * loop will only happen once. This code only takes place
137	 * on a new max, so it is far from a fast path.
138	 */
139	while (i < max_stack_trace.nr_entries) {
140		int found = 0;
141
142		stack_dump_index[i] = this_size;
143		p = start;
144
145		for (; p < top && i < max_stack_trace.nr_entries; p++) {
146			if (*p == stack_dump_trace[i]) {
147				this_size = stack_dump_index[i++] =
148					(top - p) * sizeof(unsigned long);
149				found = 1;
150				/* Start the search from here */
151				start = p + 1;
152				/*
153				 * We do not want to show the overhead
154				 * of the stack tracer stack in the
155				 * max stack. If we haven't figured
156				 * out what that is, then figure it out
157				 * now.
158				 */
159				if (unlikely(!tracer_frame) && i == 1) {
160					tracer_frame = (p - stack) *
161						sizeof(unsigned long);
162					max_stack_size -= tracer_frame;
163				}
164			}
165		}
166
167		if (!found)
168			i++;
169	}
170
171	if (task_stack_end_corrupted(current)) {
172		print_max_stack();
173		BUG();
174	}
175
176 out:
177	arch_spin_unlock(&max_stack_lock);
178	local_irq_restore(flags);
179}
180
181static void
182stack_trace_call(unsigned long ip, unsigned long parent_ip,
183		 struct ftrace_ops *op, struct pt_regs *pt_regs)
184{
185	unsigned long stack;
186	int cpu;
187
188	preempt_disable_notrace();
189
190	cpu = raw_smp_processor_id();
191	/* no atomic needed, we only modify this variable by this cpu */
192	if (per_cpu(trace_active, cpu)++ != 0)
193		goto out;
194
195	/*
196	 * When fentry is used, the traced function does not get
197	 * its stack frame set up, and we lose the parent.
198	 * The ip is pretty useless because the function tracer
199	 * was called before that function set up its stack frame.
200	 * In this case, we use the parent ip.
201	 *
202	 * By adding the return address of either the parent ip
203	 * or the current ip we can disregard most of the stack usage
204	 * caused by the stack tracer itself.
205	 *
206	 * The function tracer always reports the address of where the
207	 * mcount call was, but the stack will hold the return address.
208	 */
209	if (fentry)
210		ip = parent_ip;
211	else
212		ip += MCOUNT_INSN_SIZE;
213
214	check_stack(ip, &stack);
215
216 out:
217	per_cpu(trace_active, cpu)--;
218	/* prevent recursion in schedule */
219	preempt_enable_notrace();
220}
221
222static struct ftrace_ops trace_ops __read_mostly =
223{
224	.func = stack_trace_call,
225	.flags = FTRACE_OPS_FL_RECURSION_SAFE,
226};
227
228static ssize_t
229stack_max_size_read(struct file *filp, char __user *ubuf,
230		    size_t count, loff_t *ppos)
231{
232	unsigned long *ptr = filp->private_data;
233	char buf[64];
234	int r;
235
236	r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
237	if (r > sizeof(buf))
238		r = sizeof(buf);
239	return simple_read_from_buffer(ubuf, count, ppos, buf, r);
240}
241
242static ssize_t
243stack_max_size_write(struct file *filp, const char __user *ubuf,
244		     size_t count, loff_t *ppos)
245{
246	long *ptr = filp->private_data;
247	unsigned long val, flags;
248	int ret;
249	int cpu;
250
251	ret = kstrtoul_from_user(ubuf, count, 10, &val);
252	if (ret)
253		return ret;
254
255	local_irq_save(flags);
256
257	/*
258	 * In case we trace inside arch_spin_lock() or after (NMI),
259	 * we will cause circular lock, so we also need to increase
260	 * the percpu trace_active here.
261	 */
262	cpu = smp_processor_id();
263	per_cpu(trace_active, cpu)++;
264
265	arch_spin_lock(&max_stack_lock);
266	*ptr = val;
267	arch_spin_unlock(&max_stack_lock);
268
269	per_cpu(trace_active, cpu)--;
270	local_irq_restore(flags);
271
272	return count;
273}
274
275static const struct file_operations stack_max_size_fops = {
276	.open		= tracing_open_generic,
277	.read		= stack_max_size_read,
278	.write		= stack_max_size_write,
279	.llseek		= default_llseek,
280};
281
282static void *
283__next(struct seq_file *m, loff_t *pos)
284{
285	long n = *pos - 1;
286
287	if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
288		return NULL;
289
290	m->private = (void *)n;
291	return &m->private;
292}
293
294static void *
295t_next(struct seq_file *m, void *v, loff_t *pos)
296{
297	(*pos)++;
298	return __next(m, pos);
299}
300
301static void *t_start(struct seq_file *m, loff_t *pos)
302{
303	int cpu;
304
305	local_irq_disable();
306
307	cpu = smp_processor_id();
308	per_cpu(trace_active, cpu)++;
309
310	arch_spin_lock(&max_stack_lock);
311
312	if (*pos == 0)
313		return SEQ_START_TOKEN;
314
315	return __next(m, pos);
316}
317
318static void t_stop(struct seq_file *m, void *p)
319{
320	int cpu;
321
322	arch_spin_unlock(&max_stack_lock);
323
324	cpu = smp_processor_id();
325	per_cpu(trace_active, cpu)--;
326
327	local_irq_enable();
328}
329
330static void trace_lookup_stack(struct seq_file *m, long i)
331{
332	unsigned long addr = stack_dump_trace[i];
333
334	seq_printf(m, "%pS\n", (void *)addr);
335}
336
337static void print_disabled(struct seq_file *m)
338{
339	seq_puts(m, "#\n"
340		 "#  Stack tracer disabled\n"
341		 "#\n"
342		 "# To enable the stack tracer, either add 'stacktrace' to the\n"
343		 "# kernel command line\n"
344		 "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
345		 "#\n");
346}
347
348static int t_show(struct seq_file *m, void *v)
349{
350	long i;
351	int size;
352
353	if (v == SEQ_START_TOKEN) {
354		seq_printf(m, "        Depth    Size   Location"
355			   "    (%d entries)\n"
356			   "        -----    ----   --------\n",
357			   max_stack_trace.nr_entries - 1);
358
359		if (!stack_tracer_enabled && !max_stack_size)
360			print_disabled(m);
361
362		return 0;
363	}
364
365	i = *(long *)v;
366
367	if (i >= max_stack_trace.nr_entries ||
368	    stack_dump_trace[i] == ULONG_MAX)
369		return 0;
370
371	if (i+1 == max_stack_trace.nr_entries ||
372	    stack_dump_trace[i+1] == ULONG_MAX)
373		size = stack_dump_index[i];
374	else
375		size = stack_dump_index[i] - stack_dump_index[i+1];
376
377	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_dump_index[i], size);
378
379	trace_lookup_stack(m, i);
380
381	return 0;
382}
383
384static const struct seq_operations stack_trace_seq_ops = {
385	.start		= t_start,
386	.next		= t_next,
387	.stop		= t_stop,
388	.show		= t_show,
389};
390
391static int stack_trace_open(struct inode *inode, struct file *file)
392{
393	return seq_open(file, &stack_trace_seq_ops);
394}
395
396static const struct file_operations stack_trace_fops = {
397	.open		= stack_trace_open,
398	.read		= seq_read,
399	.llseek		= seq_lseek,
400	.release	= seq_release,
401};
402
403static int
404stack_trace_filter_open(struct inode *inode, struct file *file)
405{
406	return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
407				 inode, file);
408}
409
410static const struct file_operations stack_trace_filter_fops = {
411	.open = stack_trace_filter_open,
412	.read = seq_read,
413	.write = ftrace_filter_write,
414	.llseek = tracing_lseek,
415	.release = ftrace_regex_release,
416};
417
418int
419stack_trace_sysctl(struct ctl_table *table, int write,
420		   void __user *buffer, size_t *lenp,
421		   loff_t *ppos)
422{
423	int ret;
424
425	mutex_lock(&stack_sysctl_mutex);
426
427	ret = proc_dointvec(table, write, buffer, lenp, ppos);
428
429	if (ret || !write ||
430	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
431		goto out;
432
433	last_stack_tracer_enabled = !!stack_tracer_enabled;
434
435	if (stack_tracer_enabled)
436		register_ftrace_function(&trace_ops);
437	else
438		unregister_ftrace_function(&trace_ops);
439
440 out:
441	mutex_unlock(&stack_sysctl_mutex);
442	return ret;
443}
444
445static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
446
447static __init int enable_stacktrace(char *str)
448{
449	if (strncmp(str, "_filter=", 8) == 0)
450		strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
451
452	stack_tracer_enabled = 1;
453	last_stack_tracer_enabled = 1;
454	return 1;
455}
456__setup("stacktrace", enable_stacktrace);
457
458static __init int stack_trace_init(void)
459{
460	struct dentry *d_tracer;
461
462	d_tracer = tracing_init_dentry();
463	if (IS_ERR(d_tracer))
464		return 0;
465
466	trace_create_file("stack_max_size", 0644, d_tracer,
467			&max_stack_size, &stack_max_size_fops);
468
469	trace_create_file("stack_trace", 0444, d_tracer,
470			NULL, &stack_trace_fops);
471
472	trace_create_file("stack_trace_filter", 0444, d_tracer,
473			NULL, &stack_trace_filter_fops);
474
475	if (stack_trace_filter_buf[0])
476		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
477
478	if (stack_tracer_enabled)
479		register_ftrace_function(&trace_ops);
480
481	return 0;
482}
483
484device_initcall(stack_trace_init);
485