1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 *    Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 *  Copyright (C) 2004-2006 Ingo Molnar
12 *  Copyright (C) 2004 Nadia Yvette Chambers
13 */
14#include <linux/ring_buffer.h>
15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h>
17#include <linux/writeback.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/notifier.h>
21#include <linux/irqflags.h>
22#include <linux/debugfs.h>
23#include <linux/tracefs.h>
24#include <linux/pagemap.h>
25#include <linux/hardirq.h>
26#include <linux/linkage.h>
27#include <linux/uaccess.h>
28#include <linux/kprobes.h>
29#include <linux/ftrace.h>
30#include <linux/module.h>
31#include <linux/percpu.h>
32#include <linux/splice.h>
33#include <linux/kdebug.h>
34#include <linux/string.h>
35#include <linux/mount.h>
36#include <linux/rwsem.h>
37#include <linux/slab.h>
38#include <linux/ctype.h>
39#include <linux/init.h>
40#include <linux/poll.h>
41#include <linux/nmi.h>
42#include <linux/fs.h>
43#include <linux/sched/rt.h>
44
45#include "trace.h"
46#include "trace_output.h"
47
48/*
49 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing.
51 */
52bool ring_buffer_expanded;
53
54/*
55 * We need to change this state when a selftest is running.
56 * A selftest will lurk into the ring-buffer to count the
57 * entries inserted during the selftest although some concurrent
58 * insertions into the ring-buffer such as trace_printk could occurred
59 * at the same time, giving false positive or negative results.
60 */
61static bool __read_mostly tracing_selftest_running;
62
63/*
64 * If a tracer is running, we do not want to run SELFTEST.
65 */
66bool __read_mostly tracing_selftest_disabled;
67
68/* Pipe tracepoints to printk */
69struct trace_iterator *tracepoint_print_iter;
70int tracepoint_printk;
71
72/* For tracers that don't implement custom flags */
73static struct tracer_opt dummy_tracer_opt[] = {
74	{ }
75};
76
77static struct tracer_flags dummy_tracer_flags = {
78	.val = 0,
79	.opts = dummy_tracer_opt
80};
81
82static int
83dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84{
85	return 0;
86}
87
88/*
89 * To prevent the comm cache from being overwritten when no
90 * tracing is active, only save the comm when a trace event
91 * occurred.
92 */
93static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95/*
96 * Kill all tracing for good (never come back).
97 * It is initialized to 1 but will turn to zero if the initialization
98 * of the tracer is successful. But that is the only place that sets
99 * this back to zero.
100 */
101static int tracing_disabled = 1;
102
103DEFINE_PER_CPU(int, ftrace_cpu_disabled);
104
105cpumask_var_t __read_mostly	tracing_buffer_mask;
106
107/*
108 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
109 *
110 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
111 * is set, then ftrace_dump is called. This will output the contents
112 * of the ftrace buffers to the console.  This is very useful for
113 * capturing traces that lead to crashes and outputing it to a
114 * serial console.
115 *
116 * It is default off, but you can enable it with either specifying
117 * "ftrace_dump_on_oops" in the kernel command line, or setting
118 * /proc/sys/kernel/ftrace_dump_on_oops
119 * Set 1 if you want to dump buffers of all CPUs
120 * Set 2 if you want to dump the buffer of the CPU that triggered oops
121 */
122
123enum ftrace_dump_mode ftrace_dump_on_oops;
124
125/* When set, tracing will stop when a WARN*() is hit */
126int __disable_trace_on_warning;
127
128#ifdef CONFIG_TRACE_ENUM_MAP_FILE
129/* Map of enums to their values, for "enum_map" file */
130struct trace_enum_map_head {
131	struct module			*mod;
132	unsigned long			length;
133};
134
135union trace_enum_map_item;
136
137struct trace_enum_map_tail {
138	/*
139	 * "end" is first and points to NULL as it must be different
140	 * than "mod" or "enum_string"
141	 */
142	union trace_enum_map_item	*next;
143	const char			*end;	/* points to NULL */
144};
145
146static DEFINE_MUTEX(trace_enum_mutex);
147
148/*
149 * The trace_enum_maps are saved in an array with two extra elements,
150 * one at the beginning, and one at the end. The beginning item contains
151 * the count of the saved maps (head.length), and the module they
152 * belong to if not built in (head.mod). The ending item contains a
153 * pointer to the next array of saved enum_map items.
154 */
155union trace_enum_map_item {
156	struct trace_enum_map		map;
157	struct trace_enum_map_head	head;
158	struct trace_enum_map_tail	tail;
159};
160
161static union trace_enum_map_item *trace_enum_maps;
162#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
163
164static int tracing_set_tracer(struct trace_array *tr, const char *buf);
165
166#define MAX_TRACER_SIZE		100
167static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
168static char *default_bootup_tracer;
169
170static bool allocate_snapshot;
171
172static int __init set_cmdline_ftrace(char *str)
173{
174	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
175	default_bootup_tracer = bootup_tracer_buf;
176	/* We are using ftrace early, expand it */
177	ring_buffer_expanded = true;
178	return 1;
179}
180__setup("ftrace=", set_cmdline_ftrace);
181
182static int __init set_ftrace_dump_on_oops(char *str)
183{
184	if (*str++ != '=' || !*str) {
185		ftrace_dump_on_oops = DUMP_ALL;
186		return 1;
187	}
188
189	if (!strcmp("orig_cpu", str)) {
190		ftrace_dump_on_oops = DUMP_ORIG;
191                return 1;
192        }
193
194        return 0;
195}
196__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
197
198static int __init stop_trace_on_warning(char *str)
199{
200	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
201		__disable_trace_on_warning = 1;
202	return 1;
203}
204__setup("traceoff_on_warning", stop_trace_on_warning);
205
206static int __init boot_alloc_snapshot(char *str)
207{
208	allocate_snapshot = true;
209	/* We also need the main ring buffer expanded */
210	ring_buffer_expanded = true;
211	return 1;
212}
213__setup("alloc_snapshot", boot_alloc_snapshot);
214
215
216static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
217static char *trace_boot_options __initdata;
218
219static int __init set_trace_boot_options(char *str)
220{
221	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
222	trace_boot_options = trace_boot_options_buf;
223	return 0;
224}
225__setup("trace_options=", set_trace_boot_options);
226
227static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
228static char *trace_boot_clock __initdata;
229
230static int __init set_trace_boot_clock(char *str)
231{
232	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
233	trace_boot_clock = trace_boot_clock_buf;
234	return 0;
235}
236__setup("trace_clock=", set_trace_boot_clock);
237
238static int __init set_tracepoint_printk(char *str)
239{
240	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
241		tracepoint_printk = 1;
242	return 1;
243}
244__setup("tp_printk", set_tracepoint_printk);
245
246unsigned long long ns2usecs(cycle_t nsec)
247{
248	nsec += 500;
249	do_div(nsec, 1000);
250	return nsec;
251}
252
253/*
254 * The global_trace is the descriptor that holds the tracing
255 * buffers for the live tracing. For each CPU, it contains
256 * a link list of pages that will store trace entries. The
257 * page descriptor of the pages in the memory is used to hold
258 * the link list by linking the lru item in the page descriptor
259 * to each of the pages in the buffer per CPU.
260 *
261 * For each active CPU there is a data field that holds the
262 * pages for the buffer for that CPU. Each CPU has the same number
263 * of pages allocated for its buffer.
264 */
265static struct trace_array	global_trace;
266
267LIST_HEAD(ftrace_trace_arrays);
268
269int trace_array_get(struct trace_array *this_tr)
270{
271	struct trace_array *tr;
272	int ret = -ENODEV;
273
274	mutex_lock(&trace_types_lock);
275	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
276		if (tr == this_tr) {
277			tr->ref++;
278			ret = 0;
279			break;
280		}
281	}
282	mutex_unlock(&trace_types_lock);
283
284	return ret;
285}
286
287static void __trace_array_put(struct trace_array *this_tr)
288{
289	WARN_ON(!this_tr->ref);
290	this_tr->ref--;
291}
292
293void trace_array_put(struct trace_array *this_tr)
294{
295	mutex_lock(&trace_types_lock);
296	__trace_array_put(this_tr);
297	mutex_unlock(&trace_types_lock);
298}
299
300int filter_check_discard(struct ftrace_event_file *file, void *rec,
301			 struct ring_buffer *buffer,
302			 struct ring_buffer_event *event)
303{
304	if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
305	    !filter_match_preds(file->filter, rec)) {
306		ring_buffer_discard_commit(buffer, event);
307		return 1;
308	}
309
310	return 0;
311}
312EXPORT_SYMBOL_GPL(filter_check_discard);
313
314int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
315			      struct ring_buffer *buffer,
316			      struct ring_buffer_event *event)
317{
318	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
319	    !filter_match_preds(call->filter, rec)) {
320		ring_buffer_discard_commit(buffer, event);
321		return 1;
322	}
323
324	return 0;
325}
326EXPORT_SYMBOL_GPL(call_filter_check_discard);
327
328static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
329{
330	u64 ts;
331
332	/* Early boot up does not have a buffer yet */
333	if (!buf->buffer)
334		return trace_clock_local();
335
336	ts = ring_buffer_time_stamp(buf->buffer, cpu);
337	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
338
339	return ts;
340}
341
342cycle_t ftrace_now(int cpu)
343{
344	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
345}
346
347/**
348 * tracing_is_enabled - Show if global_trace has been disabled
349 *
350 * Shows if the global trace has been enabled or not. It uses the
351 * mirror flag "buffer_disabled" to be used in fast paths such as for
352 * the irqsoff tracer. But it may be inaccurate due to races. If you
353 * need to know the accurate state, use tracing_is_on() which is a little
354 * slower, but accurate.
355 */
356int tracing_is_enabled(void)
357{
358	/*
359	 * For quick access (irqsoff uses this in fast path), just
360	 * return the mirror variable of the state of the ring buffer.
361	 * It's a little racy, but we don't really care.
362	 */
363	smp_rmb();
364	return !global_trace.buffer_disabled;
365}
366
367/*
368 * trace_buf_size is the size in bytes that is allocated
369 * for a buffer. Note, the number of bytes is always rounded
370 * to page size.
371 *
372 * This number is purposely set to a low number of 16384.
373 * If the dump on oops happens, it will be much appreciated
374 * to not have to wait for all that output. Anyway this can be
375 * boot time and run time configurable.
376 */
377#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
378
379static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
380
381/* trace_types holds a link list of available tracers. */
382static struct tracer		*trace_types __read_mostly;
383
384/*
385 * trace_types_lock is used to protect the trace_types list.
386 */
387DEFINE_MUTEX(trace_types_lock);
388
389/*
390 * serialize the access of the ring buffer
391 *
392 * ring buffer serializes readers, but it is low level protection.
393 * The validity of the events (which returns by ring_buffer_peek() ..etc)
394 * are not protected by ring buffer.
395 *
396 * The content of events may become garbage if we allow other process consumes
397 * these events concurrently:
398 *   A) the page of the consumed events may become a normal page
399 *      (not reader page) in ring buffer, and this page will be rewrited
400 *      by events producer.
401 *   B) The page of the consumed events may become a page for splice_read,
402 *      and this page will be returned to system.
403 *
404 * These primitives allow multi process access to different cpu ring buffer
405 * concurrently.
406 *
407 * These primitives don't distinguish read-only and read-consume access.
408 * Multi read-only access are also serialized.
409 */
410
411#ifdef CONFIG_SMP
412static DECLARE_RWSEM(all_cpu_access_lock);
413static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
414
415static inline void trace_access_lock(int cpu)
416{
417	if (cpu == RING_BUFFER_ALL_CPUS) {
418		/* gain it for accessing the whole ring buffer. */
419		down_write(&all_cpu_access_lock);
420	} else {
421		/* gain it for accessing a cpu ring buffer. */
422
423		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
424		down_read(&all_cpu_access_lock);
425
426		/* Secondly block other access to this @cpu ring buffer. */
427		mutex_lock(&per_cpu(cpu_access_lock, cpu));
428	}
429}
430
431static inline void trace_access_unlock(int cpu)
432{
433	if (cpu == RING_BUFFER_ALL_CPUS) {
434		up_write(&all_cpu_access_lock);
435	} else {
436		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
437		up_read(&all_cpu_access_lock);
438	}
439}
440
441static inline void trace_access_lock_init(void)
442{
443	int cpu;
444
445	for_each_possible_cpu(cpu)
446		mutex_init(&per_cpu(cpu_access_lock, cpu));
447}
448
449#else
450
451static DEFINE_MUTEX(access_lock);
452
453static inline void trace_access_lock(int cpu)
454{
455	(void)cpu;
456	mutex_lock(&access_lock);
457}
458
459static inline void trace_access_unlock(int cpu)
460{
461	(void)cpu;
462	mutex_unlock(&access_lock);
463}
464
465static inline void trace_access_lock_init(void)
466{
467}
468
469#endif
470
471/* trace_flags holds trace_options default values */
472unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
473	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
474	TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
475	TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
476
477static void tracer_tracing_on(struct trace_array *tr)
478{
479	if (tr->trace_buffer.buffer)
480		ring_buffer_record_on(tr->trace_buffer.buffer);
481	/*
482	 * This flag is looked at when buffers haven't been allocated
483	 * yet, or by some tracers (like irqsoff), that just want to
484	 * know if the ring buffer has been disabled, but it can handle
485	 * races of where it gets disabled but we still do a record.
486	 * As the check is in the fast path of the tracers, it is more
487	 * important to be fast than accurate.
488	 */
489	tr->buffer_disabled = 0;
490	/* Make the flag seen by readers */
491	smp_wmb();
492}
493
494/**
495 * tracing_on - enable tracing buffers
496 *
497 * This function enables tracing buffers that may have been
498 * disabled with tracing_off.
499 */
500void tracing_on(void)
501{
502	tracer_tracing_on(&global_trace);
503}
504EXPORT_SYMBOL_GPL(tracing_on);
505
506/**
507 * __trace_puts - write a constant string into the trace buffer.
508 * @ip:	   The address of the caller
509 * @str:   The constant string to write
510 * @size:  The size of the string.
511 */
512int __trace_puts(unsigned long ip, const char *str, int size)
513{
514	struct ring_buffer_event *event;
515	struct ring_buffer *buffer;
516	struct print_entry *entry;
517	unsigned long irq_flags;
518	int alloc;
519	int pc;
520
521	if (!(trace_flags & TRACE_ITER_PRINTK))
522		return 0;
523
524	pc = preempt_count();
525
526	if (unlikely(tracing_selftest_running || tracing_disabled))
527		return 0;
528
529	alloc = sizeof(*entry) + size + 2; /* possible \n added */
530
531	local_save_flags(irq_flags);
532	buffer = global_trace.trace_buffer.buffer;
533	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
534					  irq_flags, pc);
535	if (!event)
536		return 0;
537
538	entry = ring_buffer_event_data(event);
539	entry->ip = ip;
540
541	memcpy(&entry->buf, str, size);
542
543	/* Add a newline if necessary */
544	if (entry->buf[size - 1] != '\n') {
545		entry->buf[size] = '\n';
546		entry->buf[size + 1] = '\0';
547	} else
548		entry->buf[size] = '\0';
549
550	__buffer_unlock_commit(buffer, event);
551	ftrace_trace_stack(buffer, irq_flags, 4, pc);
552
553	return size;
554}
555EXPORT_SYMBOL_GPL(__trace_puts);
556
557/**
558 * __trace_bputs - write the pointer to a constant string into trace buffer
559 * @ip:	   The address of the caller
560 * @str:   The constant string to write to the buffer to
561 */
562int __trace_bputs(unsigned long ip, const char *str)
563{
564	struct ring_buffer_event *event;
565	struct ring_buffer *buffer;
566	struct bputs_entry *entry;
567	unsigned long irq_flags;
568	int size = sizeof(struct bputs_entry);
569	int pc;
570
571	if (!(trace_flags & TRACE_ITER_PRINTK))
572		return 0;
573
574	pc = preempt_count();
575
576	if (unlikely(tracing_selftest_running || tracing_disabled))
577		return 0;
578
579	local_save_flags(irq_flags);
580	buffer = global_trace.trace_buffer.buffer;
581	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
582					  irq_flags, pc);
583	if (!event)
584		return 0;
585
586	entry = ring_buffer_event_data(event);
587	entry->ip			= ip;
588	entry->str			= str;
589
590	__buffer_unlock_commit(buffer, event);
591	ftrace_trace_stack(buffer, irq_flags, 4, pc);
592
593	return 1;
594}
595EXPORT_SYMBOL_GPL(__trace_bputs);
596
597#ifdef CONFIG_TRACER_SNAPSHOT
598/**
599 * trace_snapshot - take a snapshot of the current buffer.
600 *
601 * This causes a swap between the snapshot buffer and the current live
602 * tracing buffer. You can use this to take snapshots of the live
603 * trace when some condition is triggered, but continue to trace.
604 *
605 * Note, make sure to allocate the snapshot with either
606 * a tracing_snapshot_alloc(), or by doing it manually
607 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
608 *
609 * If the snapshot buffer is not allocated, it will stop tracing.
610 * Basically making a permanent snapshot.
611 */
612void tracing_snapshot(void)
613{
614	struct trace_array *tr = &global_trace;
615	struct tracer *tracer = tr->current_trace;
616	unsigned long flags;
617
618	if (in_nmi()) {
619		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
620		internal_trace_puts("*** snapshot is being ignored        ***\n");
621		return;
622	}
623
624	if (!tr->allocated_snapshot) {
625		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
626		internal_trace_puts("*** stopping trace here!   ***\n");
627		tracing_off();
628		return;
629	}
630
631	/* Note, snapshot can not be used when the tracer uses it */
632	if (tracer->use_max_tr) {
633		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
634		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
635		return;
636	}
637
638	local_irq_save(flags);
639	update_max_tr(tr, current, smp_processor_id());
640	local_irq_restore(flags);
641}
642EXPORT_SYMBOL_GPL(tracing_snapshot);
643
644static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
645					struct trace_buffer *size_buf, int cpu_id);
646static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
647
648static int alloc_snapshot(struct trace_array *tr)
649{
650	int ret;
651
652	if (!tr->allocated_snapshot) {
653
654		/* allocate spare buffer */
655		ret = resize_buffer_duplicate_size(&tr->max_buffer,
656				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
657		if (ret < 0)
658			return ret;
659
660		tr->allocated_snapshot = true;
661	}
662
663	return 0;
664}
665
666static void free_snapshot(struct trace_array *tr)
667{
668	/*
669	 * We don't free the ring buffer. instead, resize it because
670	 * The max_tr ring buffer has some state (e.g. ring->clock) and
671	 * we want preserve it.
672	 */
673	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
674	set_buffer_entries(&tr->max_buffer, 1);
675	tracing_reset_online_cpus(&tr->max_buffer);
676	tr->allocated_snapshot = false;
677}
678
679/**
680 * tracing_alloc_snapshot - allocate snapshot buffer.
681 *
682 * This only allocates the snapshot buffer if it isn't already
683 * allocated - it doesn't also take a snapshot.
684 *
685 * This is meant to be used in cases where the snapshot buffer needs
686 * to be set up for events that can't sleep but need to be able to
687 * trigger a snapshot.
688 */
689int tracing_alloc_snapshot(void)
690{
691	struct trace_array *tr = &global_trace;
692	int ret;
693
694	ret = alloc_snapshot(tr);
695	WARN_ON(ret < 0);
696
697	return ret;
698}
699EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
700
701/**
702 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
703 *
704 * This is similar to trace_snapshot(), but it will allocate the
705 * snapshot buffer if it isn't already allocated. Use this only
706 * where it is safe to sleep, as the allocation may sleep.
707 *
708 * This causes a swap between the snapshot buffer and the current live
709 * tracing buffer. You can use this to take snapshots of the live
710 * trace when some condition is triggered, but continue to trace.
711 */
712void tracing_snapshot_alloc(void)
713{
714	int ret;
715
716	ret = tracing_alloc_snapshot();
717	if (ret < 0)
718		return;
719
720	tracing_snapshot();
721}
722EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
723#else
724void tracing_snapshot(void)
725{
726	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
727}
728EXPORT_SYMBOL_GPL(tracing_snapshot);
729int tracing_alloc_snapshot(void)
730{
731	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
732	return -ENODEV;
733}
734EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
735void tracing_snapshot_alloc(void)
736{
737	/* Give warning */
738	tracing_snapshot();
739}
740EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
741#endif /* CONFIG_TRACER_SNAPSHOT */
742
743static void tracer_tracing_off(struct trace_array *tr)
744{
745	if (tr->trace_buffer.buffer)
746		ring_buffer_record_off(tr->trace_buffer.buffer);
747	/*
748	 * This flag is looked at when buffers haven't been allocated
749	 * yet, or by some tracers (like irqsoff), that just want to
750	 * know if the ring buffer has been disabled, but it can handle
751	 * races of where it gets disabled but we still do a record.
752	 * As the check is in the fast path of the tracers, it is more
753	 * important to be fast than accurate.
754	 */
755	tr->buffer_disabled = 1;
756	/* Make the flag seen by readers */
757	smp_wmb();
758}
759
760/**
761 * tracing_off - turn off tracing buffers
762 *
763 * This function stops the tracing buffers from recording data.
764 * It does not disable any overhead the tracers themselves may
765 * be causing. This function simply causes all recording to
766 * the ring buffers to fail.
767 */
768void tracing_off(void)
769{
770	tracer_tracing_off(&global_trace);
771}
772EXPORT_SYMBOL_GPL(tracing_off);
773
774void disable_trace_on_warning(void)
775{
776	if (__disable_trace_on_warning)
777		tracing_off();
778}
779
780/**
781 * tracer_tracing_is_on - show real state of ring buffer enabled
782 * @tr : the trace array to know if ring buffer is enabled
783 *
784 * Shows real state of the ring buffer if it is enabled or not.
785 */
786static int tracer_tracing_is_on(struct trace_array *tr)
787{
788	if (tr->trace_buffer.buffer)
789		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
790	return !tr->buffer_disabled;
791}
792
793/**
794 * tracing_is_on - show state of ring buffers enabled
795 */
796int tracing_is_on(void)
797{
798	return tracer_tracing_is_on(&global_trace);
799}
800EXPORT_SYMBOL_GPL(tracing_is_on);
801
802static int __init set_buf_size(char *str)
803{
804	unsigned long buf_size;
805
806	if (!str)
807		return 0;
808	buf_size = memparse(str, &str);
809	/* nr_entries can not be zero */
810	if (buf_size == 0)
811		return 0;
812	trace_buf_size = buf_size;
813	return 1;
814}
815__setup("trace_buf_size=", set_buf_size);
816
817static int __init set_tracing_thresh(char *str)
818{
819	unsigned long threshold;
820	int ret;
821
822	if (!str)
823		return 0;
824	ret = kstrtoul(str, 0, &threshold);
825	if (ret < 0)
826		return 0;
827	tracing_thresh = threshold * 1000;
828	return 1;
829}
830__setup("tracing_thresh=", set_tracing_thresh);
831
832unsigned long nsecs_to_usecs(unsigned long nsecs)
833{
834	return nsecs / 1000;
835}
836
837/* These must match the bit postions in trace_iterator_flags */
838static const char *trace_options[] = {
839	"print-parent",
840	"sym-offset",
841	"sym-addr",
842	"verbose",
843	"raw",
844	"hex",
845	"bin",
846	"block",
847	"stacktrace",
848	"trace_printk",
849	"ftrace_preempt",
850	"branch",
851	"annotate",
852	"userstacktrace",
853	"sym-userobj",
854	"printk-msg-only",
855	"context-info",
856	"latency-format",
857	"sleep-time",
858	"graph-time",
859	"record-cmd",
860	"overwrite",
861	"disable_on_free",
862	"irq-info",
863	"markers",
864	"function-trace",
865	NULL
866};
867
868static struct {
869	u64 (*func)(void);
870	const char *name;
871	int in_ns;		/* is this clock in nanoseconds? */
872} trace_clocks[] = {
873	{ trace_clock_local,		"local",	1 },
874	{ trace_clock_global,		"global",	1 },
875	{ trace_clock_counter,		"counter",	0 },
876	{ trace_clock_jiffies,		"uptime",	0 },
877	{ trace_clock,			"perf",		1 },
878	{ ktime_get_mono_fast_ns,	"mono",		1 },
879	ARCH_TRACE_CLOCKS
880};
881
882/*
883 * trace_parser_get_init - gets the buffer for trace parser
884 */
885int trace_parser_get_init(struct trace_parser *parser, int size)
886{
887	memset(parser, 0, sizeof(*parser));
888
889	parser->buffer = kmalloc(size, GFP_KERNEL);
890	if (!parser->buffer)
891		return 1;
892
893	parser->size = size;
894	return 0;
895}
896
897/*
898 * trace_parser_put - frees the buffer for trace parser
899 */
900void trace_parser_put(struct trace_parser *parser)
901{
902	kfree(parser->buffer);
903}
904
905/*
906 * trace_get_user - reads the user input string separated by  space
907 * (matched by isspace(ch))
908 *
909 * For each string found the 'struct trace_parser' is updated,
910 * and the function returns.
911 *
912 * Returns number of bytes read.
913 *
914 * See kernel/trace/trace.h for 'struct trace_parser' details.
915 */
916int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
917	size_t cnt, loff_t *ppos)
918{
919	char ch;
920	size_t read = 0;
921	ssize_t ret;
922
923	if (!*ppos)
924		trace_parser_clear(parser);
925
926	ret = get_user(ch, ubuf++);
927	if (ret)
928		goto out;
929
930	read++;
931	cnt--;
932
933	/*
934	 * The parser is not finished with the last write,
935	 * continue reading the user input without skipping spaces.
936	 */
937	if (!parser->cont) {
938		/* skip white space */
939		while (cnt && isspace(ch)) {
940			ret = get_user(ch, ubuf++);
941			if (ret)
942				goto out;
943			read++;
944			cnt--;
945		}
946
947		/* only spaces were written */
948		if (isspace(ch)) {
949			*ppos += read;
950			ret = read;
951			goto out;
952		}
953
954		parser->idx = 0;
955	}
956
957	/* read the non-space input */
958	while (cnt && !isspace(ch)) {
959		if (parser->idx < parser->size - 1)
960			parser->buffer[parser->idx++] = ch;
961		else {
962			ret = -EINVAL;
963			goto out;
964		}
965		ret = get_user(ch, ubuf++);
966		if (ret)
967			goto out;
968		read++;
969		cnt--;
970	}
971
972	/* We either got finished input or we have to wait for another call. */
973	if (isspace(ch)) {
974		parser->buffer[parser->idx] = 0;
975		parser->cont = false;
976	} else if (parser->idx < parser->size - 1) {
977		parser->cont = true;
978		parser->buffer[parser->idx++] = ch;
979	} else {
980		ret = -EINVAL;
981		goto out;
982	}
983
984	*ppos += read;
985	ret = read;
986
987out:
988	return ret;
989}
990
991/* TODO add a seq_buf_to_buffer() */
992static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
993{
994	int len;
995
996	if (trace_seq_used(s) <= s->seq.readpos)
997		return -EBUSY;
998
999	len = trace_seq_used(s) - s->seq.readpos;
1000	if (cnt > len)
1001		cnt = len;
1002	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1003
1004	s->seq.readpos += cnt;
1005	return cnt;
1006}
1007
1008unsigned long __read_mostly	tracing_thresh;
1009
1010#ifdef CONFIG_TRACER_MAX_TRACE
1011/*
1012 * Copy the new maximum trace into the separate maximum-trace
1013 * structure. (this way the maximum trace is permanently saved,
1014 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1015 */
1016static void
1017__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1018{
1019	struct trace_buffer *trace_buf = &tr->trace_buffer;
1020	struct trace_buffer *max_buf = &tr->max_buffer;
1021	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1022	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1023
1024	max_buf->cpu = cpu;
1025	max_buf->time_start = data->preempt_timestamp;
1026
1027	max_data->saved_latency = tr->max_latency;
1028	max_data->critical_start = data->critical_start;
1029	max_data->critical_end = data->critical_end;
1030
1031	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1032	max_data->pid = tsk->pid;
1033	/*
1034	 * If tsk == current, then use current_uid(), as that does not use
1035	 * RCU. The irq tracer can be called out of RCU scope.
1036	 */
1037	if (tsk == current)
1038		max_data->uid = current_uid();
1039	else
1040		max_data->uid = task_uid(tsk);
1041
1042	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1043	max_data->policy = tsk->policy;
1044	max_data->rt_priority = tsk->rt_priority;
1045
1046	/* record this tasks comm */
1047	tracing_record_cmdline(tsk);
1048}
1049
1050/**
1051 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1052 * @tr: tracer
1053 * @tsk: the task with the latency
1054 * @cpu: The cpu that initiated the trace.
1055 *
1056 * Flip the buffers between the @tr and the max_tr and record information
1057 * about which task was the cause of this latency.
1058 */
1059void
1060update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1061{
1062	struct ring_buffer *buf;
1063
1064	if (tr->stop_count)
1065		return;
1066
1067	WARN_ON_ONCE(!irqs_disabled());
1068
1069	if (!tr->allocated_snapshot) {
1070		/* Only the nop tracer should hit this when disabling */
1071		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1072		return;
1073	}
1074
1075	arch_spin_lock(&tr->max_lock);
1076
1077	buf = tr->trace_buffer.buffer;
1078	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1079	tr->max_buffer.buffer = buf;
1080
1081	__update_max_tr(tr, tsk, cpu);
1082	arch_spin_unlock(&tr->max_lock);
1083}
1084
1085/**
1086 * update_max_tr_single - only copy one trace over, and reset the rest
1087 * @tr - tracer
1088 * @tsk - task with the latency
1089 * @cpu - the cpu of the buffer to copy.
1090 *
1091 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1092 */
1093void
1094update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1095{
1096	int ret;
1097
1098	if (tr->stop_count)
1099		return;
1100
1101	WARN_ON_ONCE(!irqs_disabled());
1102	if (!tr->allocated_snapshot) {
1103		/* Only the nop tracer should hit this when disabling */
1104		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1105		return;
1106	}
1107
1108	arch_spin_lock(&tr->max_lock);
1109
1110	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1111
1112	if (ret == -EBUSY) {
1113		/*
1114		 * We failed to swap the buffer due to a commit taking
1115		 * place on this CPU. We fail to record, but we reset
1116		 * the max trace buffer (no one writes directly to it)
1117		 * and flag that it failed.
1118		 */
1119		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1120			"Failed to swap buffers due to commit in progress\n");
1121	}
1122
1123	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1124
1125	__update_max_tr(tr, tsk, cpu);
1126	arch_spin_unlock(&tr->max_lock);
1127}
1128#endif /* CONFIG_TRACER_MAX_TRACE */
1129
1130static int wait_on_pipe(struct trace_iterator *iter, bool full)
1131{
1132	/* Iterators are static, they should be filled or empty */
1133	if (trace_buffer_iter(iter, iter->cpu_file))
1134		return 0;
1135
1136	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1137				full);
1138}
1139
1140#ifdef CONFIG_FTRACE_STARTUP_TEST
1141static int run_tracer_selftest(struct tracer *type)
1142{
1143	struct trace_array *tr = &global_trace;
1144	struct tracer *saved_tracer = tr->current_trace;
1145	int ret;
1146
1147	if (!type->selftest || tracing_selftest_disabled)
1148		return 0;
1149
1150	/*
1151	 * Run a selftest on this tracer.
1152	 * Here we reset the trace buffer, and set the current
1153	 * tracer to be this tracer. The tracer can then run some
1154	 * internal tracing to verify that everything is in order.
1155	 * If we fail, we do not register this tracer.
1156	 */
1157	tracing_reset_online_cpus(&tr->trace_buffer);
1158
1159	tr->current_trace = type;
1160
1161#ifdef CONFIG_TRACER_MAX_TRACE
1162	if (type->use_max_tr) {
1163		/* If we expanded the buffers, make sure the max is expanded too */
1164		if (ring_buffer_expanded)
1165			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1166					   RING_BUFFER_ALL_CPUS);
1167		tr->allocated_snapshot = true;
1168	}
1169#endif
1170
1171	/* the test is responsible for initializing and enabling */
1172	pr_info("Testing tracer %s: ", type->name);
1173	ret = type->selftest(type, tr);
1174	/* the test is responsible for resetting too */
1175	tr->current_trace = saved_tracer;
1176	if (ret) {
1177		printk(KERN_CONT "FAILED!\n");
1178		/* Add the warning after printing 'FAILED' */
1179		WARN_ON(1);
1180		return -1;
1181	}
1182	/* Only reset on passing, to avoid touching corrupted buffers */
1183	tracing_reset_online_cpus(&tr->trace_buffer);
1184
1185#ifdef CONFIG_TRACER_MAX_TRACE
1186	if (type->use_max_tr) {
1187		tr->allocated_snapshot = false;
1188
1189		/* Shrink the max buffer again */
1190		if (ring_buffer_expanded)
1191			ring_buffer_resize(tr->max_buffer.buffer, 1,
1192					   RING_BUFFER_ALL_CPUS);
1193	}
1194#endif
1195
1196	printk(KERN_CONT "PASSED\n");
1197	return 0;
1198}
1199#else
1200static inline int run_tracer_selftest(struct tracer *type)
1201{
1202	return 0;
1203}
1204#endif /* CONFIG_FTRACE_STARTUP_TEST */
1205
1206/**
1207 * register_tracer - register a tracer with the ftrace system.
1208 * @type - the plugin for the tracer
1209 *
1210 * Register a new plugin tracer.
1211 */
1212int register_tracer(struct tracer *type)
1213{
1214	struct tracer *t;
1215	int ret = 0;
1216
1217	if (!type->name) {
1218		pr_info("Tracer must have a name\n");
1219		return -1;
1220	}
1221
1222	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1223		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1224		return -1;
1225	}
1226
1227	mutex_lock(&trace_types_lock);
1228
1229	tracing_selftest_running = true;
1230
1231	for (t = trace_types; t; t = t->next) {
1232		if (strcmp(type->name, t->name) == 0) {
1233			/* already found */
1234			pr_info("Tracer %s already registered\n",
1235				type->name);
1236			ret = -1;
1237			goto out;
1238		}
1239	}
1240
1241	if (!type->set_flag)
1242		type->set_flag = &dummy_set_flag;
1243	if (!type->flags)
1244		type->flags = &dummy_tracer_flags;
1245	else
1246		if (!type->flags->opts)
1247			type->flags->opts = dummy_tracer_opt;
1248
1249	ret = run_tracer_selftest(type);
1250	if (ret < 0)
1251		goto out;
1252
1253	type->next = trace_types;
1254	trace_types = type;
1255
1256 out:
1257	tracing_selftest_running = false;
1258	mutex_unlock(&trace_types_lock);
1259
1260	if (ret || !default_bootup_tracer)
1261		goto out_unlock;
1262
1263	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1264		goto out_unlock;
1265
1266	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1267	/* Do we want this tracer to start on bootup? */
1268	tracing_set_tracer(&global_trace, type->name);
1269	default_bootup_tracer = NULL;
1270	/* disable other selftests, since this will break it. */
1271	tracing_selftest_disabled = true;
1272#ifdef CONFIG_FTRACE_STARTUP_TEST
1273	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1274	       type->name);
1275#endif
1276
1277 out_unlock:
1278	return ret;
1279}
1280
1281void tracing_reset(struct trace_buffer *buf, int cpu)
1282{
1283	struct ring_buffer *buffer = buf->buffer;
1284
1285	if (!buffer)
1286		return;
1287
1288	ring_buffer_record_disable(buffer);
1289
1290	/* Make sure all commits have finished */
1291	synchronize_sched();
1292	ring_buffer_reset_cpu(buffer, cpu);
1293
1294	ring_buffer_record_enable(buffer);
1295}
1296
1297void tracing_reset_online_cpus(struct trace_buffer *buf)
1298{
1299	struct ring_buffer *buffer = buf->buffer;
1300	int cpu;
1301
1302	if (!buffer)
1303		return;
1304
1305	ring_buffer_record_disable(buffer);
1306
1307	/* Make sure all commits have finished */
1308	synchronize_sched();
1309
1310	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1311
1312	for_each_online_cpu(cpu)
1313		ring_buffer_reset_cpu(buffer, cpu);
1314
1315	ring_buffer_record_enable(buffer);
1316}
1317
1318/* Must have trace_types_lock held */
1319void tracing_reset_all_online_cpus(void)
1320{
1321	struct trace_array *tr;
1322
1323	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1324		tracing_reset_online_cpus(&tr->trace_buffer);
1325#ifdef CONFIG_TRACER_MAX_TRACE
1326		tracing_reset_online_cpus(&tr->max_buffer);
1327#endif
1328	}
1329}
1330
1331#define SAVED_CMDLINES_DEFAULT 128
1332#define NO_CMDLINE_MAP UINT_MAX
1333static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1334struct saved_cmdlines_buffer {
1335	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1336	unsigned *map_cmdline_to_pid;
1337	unsigned cmdline_num;
1338	int cmdline_idx;
1339	char *saved_cmdlines;
1340};
1341static struct saved_cmdlines_buffer *savedcmd;
1342
1343/* temporary disable recording */
1344static atomic_t trace_record_cmdline_disabled __read_mostly;
1345
1346static inline char *get_saved_cmdlines(int idx)
1347{
1348	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1349}
1350
1351static inline void set_cmdline(int idx, const char *cmdline)
1352{
1353	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1354}
1355
1356static int allocate_cmdlines_buffer(unsigned int val,
1357				    struct saved_cmdlines_buffer *s)
1358{
1359	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1360					GFP_KERNEL);
1361	if (!s->map_cmdline_to_pid)
1362		return -ENOMEM;
1363
1364	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1365	if (!s->saved_cmdlines) {
1366		kfree(s->map_cmdline_to_pid);
1367		return -ENOMEM;
1368	}
1369
1370	s->cmdline_idx = 0;
1371	s->cmdline_num = val;
1372	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1373	       sizeof(s->map_pid_to_cmdline));
1374	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1375	       val * sizeof(*s->map_cmdline_to_pid));
1376
1377	return 0;
1378}
1379
1380static int trace_create_savedcmd(void)
1381{
1382	int ret;
1383
1384	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1385	if (!savedcmd)
1386		return -ENOMEM;
1387
1388	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1389	if (ret < 0) {
1390		kfree(savedcmd);
1391		savedcmd = NULL;
1392		return -ENOMEM;
1393	}
1394
1395	return 0;
1396}
1397
1398int is_tracing_stopped(void)
1399{
1400	return global_trace.stop_count;
1401}
1402
1403/**
1404 * tracing_start - quick start of the tracer
1405 *
1406 * If tracing is enabled but was stopped by tracing_stop,
1407 * this will start the tracer back up.
1408 */
1409void tracing_start(void)
1410{
1411	struct ring_buffer *buffer;
1412	unsigned long flags;
1413
1414	if (tracing_disabled)
1415		return;
1416
1417	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1418	if (--global_trace.stop_count) {
1419		if (global_trace.stop_count < 0) {
1420			/* Someone screwed up their debugging */
1421			WARN_ON_ONCE(1);
1422			global_trace.stop_count = 0;
1423		}
1424		goto out;
1425	}
1426
1427	/* Prevent the buffers from switching */
1428	arch_spin_lock(&global_trace.max_lock);
1429
1430	buffer = global_trace.trace_buffer.buffer;
1431	if (buffer)
1432		ring_buffer_record_enable(buffer);
1433
1434#ifdef CONFIG_TRACER_MAX_TRACE
1435	buffer = global_trace.max_buffer.buffer;
1436	if (buffer)
1437		ring_buffer_record_enable(buffer);
1438#endif
1439
1440	arch_spin_unlock(&global_trace.max_lock);
1441
1442 out:
1443	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1444}
1445
1446static void tracing_start_tr(struct trace_array *tr)
1447{
1448	struct ring_buffer *buffer;
1449	unsigned long flags;
1450
1451	if (tracing_disabled)
1452		return;
1453
1454	/* If global, we need to also start the max tracer */
1455	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1456		return tracing_start();
1457
1458	raw_spin_lock_irqsave(&tr->start_lock, flags);
1459
1460	if (--tr->stop_count) {
1461		if (tr->stop_count < 0) {
1462			/* Someone screwed up their debugging */
1463			WARN_ON_ONCE(1);
1464			tr->stop_count = 0;
1465		}
1466		goto out;
1467	}
1468
1469	buffer = tr->trace_buffer.buffer;
1470	if (buffer)
1471		ring_buffer_record_enable(buffer);
1472
1473 out:
1474	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1475}
1476
1477/**
1478 * tracing_stop - quick stop of the tracer
1479 *
1480 * Light weight way to stop tracing. Use in conjunction with
1481 * tracing_start.
1482 */
1483void tracing_stop(void)
1484{
1485	struct ring_buffer *buffer;
1486	unsigned long flags;
1487
1488	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1489	if (global_trace.stop_count++)
1490		goto out;
1491
1492	/* Prevent the buffers from switching */
1493	arch_spin_lock(&global_trace.max_lock);
1494
1495	buffer = global_trace.trace_buffer.buffer;
1496	if (buffer)
1497		ring_buffer_record_disable(buffer);
1498
1499#ifdef CONFIG_TRACER_MAX_TRACE
1500	buffer = global_trace.max_buffer.buffer;
1501	if (buffer)
1502		ring_buffer_record_disable(buffer);
1503#endif
1504
1505	arch_spin_unlock(&global_trace.max_lock);
1506
1507 out:
1508	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1509}
1510
1511static void tracing_stop_tr(struct trace_array *tr)
1512{
1513	struct ring_buffer *buffer;
1514	unsigned long flags;
1515
1516	/* If global, we need to also stop the max tracer */
1517	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1518		return tracing_stop();
1519
1520	raw_spin_lock_irqsave(&tr->start_lock, flags);
1521	if (tr->stop_count++)
1522		goto out;
1523
1524	buffer = tr->trace_buffer.buffer;
1525	if (buffer)
1526		ring_buffer_record_disable(buffer);
1527
1528 out:
1529	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1530}
1531
1532void trace_stop_cmdline_recording(void);
1533
1534static int trace_save_cmdline(struct task_struct *tsk)
1535{
1536	unsigned pid, idx;
1537
1538	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1539		return 0;
1540
1541	/*
1542	 * It's not the end of the world if we don't get
1543	 * the lock, but we also don't want to spin
1544	 * nor do we want to disable interrupts,
1545	 * so if we miss here, then better luck next time.
1546	 */
1547	if (!arch_spin_trylock(&trace_cmdline_lock))
1548		return 0;
1549
1550	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1551	if (idx == NO_CMDLINE_MAP) {
1552		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1553
1554		/*
1555		 * Check whether the cmdline buffer at idx has a pid
1556		 * mapped. We are going to overwrite that entry so we
1557		 * need to clear the map_pid_to_cmdline. Otherwise we
1558		 * would read the new comm for the old pid.
1559		 */
1560		pid = savedcmd->map_cmdline_to_pid[idx];
1561		if (pid != NO_CMDLINE_MAP)
1562			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1563
1564		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1565		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1566
1567		savedcmd->cmdline_idx = idx;
1568	}
1569
1570	set_cmdline(idx, tsk->comm);
1571
1572	arch_spin_unlock(&trace_cmdline_lock);
1573
1574	return 1;
1575}
1576
1577static void __trace_find_cmdline(int pid, char comm[])
1578{
1579	unsigned map;
1580
1581	if (!pid) {
1582		strcpy(comm, "<idle>");
1583		return;
1584	}
1585
1586	if (WARN_ON_ONCE(pid < 0)) {
1587		strcpy(comm, "<XXX>");
1588		return;
1589	}
1590
1591	if (pid > PID_MAX_DEFAULT) {
1592		strcpy(comm, "<...>");
1593		return;
1594	}
1595
1596	map = savedcmd->map_pid_to_cmdline[pid];
1597	if (map != NO_CMDLINE_MAP)
1598		strcpy(comm, get_saved_cmdlines(map));
1599	else
1600		strcpy(comm, "<...>");
1601}
1602
1603void trace_find_cmdline(int pid, char comm[])
1604{
1605	preempt_disable();
1606	arch_spin_lock(&trace_cmdline_lock);
1607
1608	__trace_find_cmdline(pid, comm);
1609
1610	arch_spin_unlock(&trace_cmdline_lock);
1611	preempt_enable();
1612}
1613
1614void tracing_record_cmdline(struct task_struct *tsk)
1615{
1616	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1617		return;
1618
1619	if (!__this_cpu_read(trace_cmdline_save))
1620		return;
1621
1622	if (trace_save_cmdline(tsk))
1623		__this_cpu_write(trace_cmdline_save, false);
1624}
1625
1626void
1627tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1628			     int pc)
1629{
1630	struct task_struct *tsk = current;
1631
1632	entry->preempt_count		= pc & 0xff;
1633	entry->pid			= (tsk) ? tsk->pid : 0;
1634	entry->flags =
1635#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1636		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1637#else
1638		TRACE_FLAG_IRQS_NOSUPPORT |
1639#endif
1640		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1641		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1642		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1643		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1644}
1645EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1646
1647struct ring_buffer_event *
1648trace_buffer_lock_reserve(struct ring_buffer *buffer,
1649			  int type,
1650			  unsigned long len,
1651			  unsigned long flags, int pc)
1652{
1653	struct ring_buffer_event *event;
1654
1655	event = ring_buffer_lock_reserve(buffer, len);
1656	if (event != NULL) {
1657		struct trace_entry *ent = ring_buffer_event_data(event);
1658
1659		tracing_generic_entry_update(ent, flags, pc);
1660		ent->type = type;
1661	}
1662
1663	return event;
1664}
1665
1666void
1667__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1668{
1669	__this_cpu_write(trace_cmdline_save, true);
1670	ring_buffer_unlock_commit(buffer, event);
1671}
1672
1673static inline void
1674__trace_buffer_unlock_commit(struct ring_buffer *buffer,
1675			     struct ring_buffer_event *event,
1676			     unsigned long flags, int pc)
1677{
1678	__buffer_unlock_commit(buffer, event);
1679
1680	ftrace_trace_stack(buffer, flags, 6, pc);
1681	ftrace_trace_userstack(buffer, flags, pc);
1682}
1683
1684void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1685				struct ring_buffer_event *event,
1686				unsigned long flags, int pc)
1687{
1688	__trace_buffer_unlock_commit(buffer, event, flags, pc);
1689}
1690EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1691
1692static struct ring_buffer *temp_buffer;
1693
1694struct ring_buffer_event *
1695trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1696			  struct ftrace_event_file *ftrace_file,
1697			  int type, unsigned long len,
1698			  unsigned long flags, int pc)
1699{
1700	struct ring_buffer_event *entry;
1701
1702	*current_rb = ftrace_file->tr->trace_buffer.buffer;
1703	entry = trace_buffer_lock_reserve(*current_rb,
1704					 type, len, flags, pc);
1705	/*
1706	 * If tracing is off, but we have triggers enabled
1707	 * we still need to look at the event data. Use the temp_buffer
1708	 * to store the trace event for the tigger to use. It's recusive
1709	 * safe and will not be recorded anywhere.
1710	 */
1711	if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1712		*current_rb = temp_buffer;
1713		entry = trace_buffer_lock_reserve(*current_rb,
1714						  type, len, flags, pc);
1715	}
1716	return entry;
1717}
1718EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1719
1720struct ring_buffer_event *
1721trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1722				  int type, unsigned long len,
1723				  unsigned long flags, int pc)
1724{
1725	*current_rb = global_trace.trace_buffer.buffer;
1726	return trace_buffer_lock_reserve(*current_rb,
1727					 type, len, flags, pc);
1728}
1729EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1730
1731void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1732					struct ring_buffer_event *event,
1733					unsigned long flags, int pc)
1734{
1735	__trace_buffer_unlock_commit(buffer, event, flags, pc);
1736}
1737EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1738
1739void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1740				     struct ring_buffer_event *event,
1741				     unsigned long flags, int pc,
1742				     struct pt_regs *regs)
1743{
1744	__buffer_unlock_commit(buffer, event);
1745
1746	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1747	ftrace_trace_userstack(buffer, flags, pc);
1748}
1749EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1750
1751void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1752					 struct ring_buffer_event *event)
1753{
1754	ring_buffer_discard_commit(buffer, event);
1755}
1756EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1757
1758void
1759trace_function(struct trace_array *tr,
1760	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
1761	       int pc)
1762{
1763	struct ftrace_event_call *call = &event_function;
1764	struct ring_buffer *buffer = tr->trace_buffer.buffer;
1765	struct ring_buffer_event *event;
1766	struct ftrace_entry *entry;
1767
1768	/* If we are reading the ring buffer, don't trace */
1769	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1770		return;
1771
1772	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1773					  flags, pc);
1774	if (!event)
1775		return;
1776	entry	= ring_buffer_event_data(event);
1777	entry->ip			= ip;
1778	entry->parent_ip		= parent_ip;
1779
1780	if (!call_filter_check_discard(call, entry, buffer, event))
1781		__buffer_unlock_commit(buffer, event);
1782}
1783
1784#ifdef CONFIG_STACKTRACE
1785
1786#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1787struct ftrace_stack {
1788	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
1789};
1790
1791static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1792static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1793
1794static void __ftrace_trace_stack(struct ring_buffer *buffer,
1795				 unsigned long flags,
1796				 int skip, int pc, struct pt_regs *regs)
1797{
1798	struct ftrace_event_call *call = &event_kernel_stack;
1799	struct ring_buffer_event *event;
1800	struct stack_entry *entry;
1801	struct stack_trace trace;
1802	int use_stack;
1803	int size = FTRACE_STACK_ENTRIES;
1804
1805	trace.nr_entries	= 0;
1806	trace.skip		= skip;
1807
1808	/*
1809	 * Since events can happen in NMIs there's no safe way to
1810	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1811	 * or NMI comes in, it will just have to use the default
1812	 * FTRACE_STACK_SIZE.
1813	 */
1814	preempt_disable_notrace();
1815
1816	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1817	/*
1818	 * We don't need any atomic variables, just a barrier.
1819	 * If an interrupt comes in, we don't care, because it would
1820	 * have exited and put the counter back to what we want.
1821	 * We just need a barrier to keep gcc from moving things
1822	 * around.
1823	 */
1824	barrier();
1825	if (use_stack == 1) {
1826		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
1827		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
1828
1829		if (regs)
1830			save_stack_trace_regs(regs, &trace);
1831		else
1832			save_stack_trace(&trace);
1833
1834		if (trace.nr_entries > size)
1835			size = trace.nr_entries;
1836	} else
1837		/* From now on, use_stack is a boolean */
1838		use_stack = 0;
1839
1840	size *= sizeof(unsigned long);
1841
1842	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1843					  sizeof(*entry) + size, flags, pc);
1844	if (!event)
1845		goto out;
1846	entry = ring_buffer_event_data(event);
1847
1848	memset(&entry->caller, 0, size);
1849
1850	if (use_stack)
1851		memcpy(&entry->caller, trace.entries,
1852		       trace.nr_entries * sizeof(unsigned long));
1853	else {
1854		trace.max_entries	= FTRACE_STACK_ENTRIES;
1855		trace.entries		= entry->caller;
1856		if (regs)
1857			save_stack_trace_regs(regs, &trace);
1858		else
1859			save_stack_trace(&trace);
1860	}
1861
1862	entry->size = trace.nr_entries;
1863
1864	if (!call_filter_check_discard(call, entry, buffer, event))
1865		__buffer_unlock_commit(buffer, event);
1866
1867 out:
1868	/* Again, don't let gcc optimize things here */
1869	barrier();
1870	__this_cpu_dec(ftrace_stack_reserve);
1871	preempt_enable_notrace();
1872
1873}
1874
1875void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1876			     int skip, int pc, struct pt_regs *regs)
1877{
1878	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1879		return;
1880
1881	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1882}
1883
1884void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1885			int skip, int pc)
1886{
1887	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1888		return;
1889
1890	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1891}
1892
1893void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1894		   int pc)
1895{
1896	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1897}
1898
1899/**
1900 * trace_dump_stack - record a stack back trace in the trace buffer
1901 * @skip: Number of functions to skip (helper handlers)
1902 */
1903void trace_dump_stack(int skip)
1904{
1905	unsigned long flags;
1906
1907	if (tracing_disabled || tracing_selftest_running)
1908		return;
1909
1910	local_save_flags(flags);
1911
1912	/*
1913	 * Skip 3 more, seems to get us at the caller of
1914	 * this function.
1915	 */
1916	skip += 3;
1917	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
1918			     flags, skip, preempt_count(), NULL);
1919}
1920
1921static DEFINE_PER_CPU(int, user_stack_count);
1922
1923void
1924ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1925{
1926	struct ftrace_event_call *call = &event_user_stack;
1927	struct ring_buffer_event *event;
1928	struct userstack_entry *entry;
1929	struct stack_trace trace;
1930
1931	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1932		return;
1933
1934	/*
1935	 * NMIs can not handle page faults, even with fix ups.
1936	 * The save user stack can (and often does) fault.
1937	 */
1938	if (unlikely(in_nmi()))
1939		return;
1940
1941	/*
1942	 * prevent recursion, since the user stack tracing may
1943	 * trigger other kernel events.
1944	 */
1945	preempt_disable();
1946	if (__this_cpu_read(user_stack_count))
1947		goto out;
1948
1949	__this_cpu_inc(user_stack_count);
1950
1951	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1952					  sizeof(*entry), flags, pc);
1953	if (!event)
1954		goto out_drop_count;
1955	entry	= ring_buffer_event_data(event);
1956
1957	entry->tgid		= current->tgid;
1958	memset(&entry->caller, 0, sizeof(entry->caller));
1959
1960	trace.nr_entries	= 0;
1961	trace.max_entries	= FTRACE_STACK_ENTRIES;
1962	trace.skip		= 0;
1963	trace.entries		= entry->caller;
1964
1965	save_stack_trace_user(&trace);
1966	if (!call_filter_check_discard(call, entry, buffer, event))
1967		__buffer_unlock_commit(buffer, event);
1968
1969 out_drop_count:
1970	__this_cpu_dec(user_stack_count);
1971 out:
1972	preempt_enable();
1973}
1974
1975#ifdef UNUSED
1976static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1977{
1978	ftrace_trace_userstack(tr, flags, preempt_count());
1979}
1980#endif /* UNUSED */
1981
1982#endif /* CONFIG_STACKTRACE */
1983
1984/* created for use with alloc_percpu */
1985struct trace_buffer_struct {
1986	char buffer[TRACE_BUF_SIZE];
1987};
1988
1989static struct trace_buffer_struct *trace_percpu_buffer;
1990static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1991static struct trace_buffer_struct *trace_percpu_irq_buffer;
1992static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1993
1994/*
1995 * The buffer used is dependent on the context. There is a per cpu
1996 * buffer for normal context, softirq contex, hard irq context and
1997 * for NMI context. Thise allows for lockless recording.
1998 *
1999 * Note, if the buffers failed to be allocated, then this returns NULL
2000 */
2001static char *get_trace_buf(void)
2002{
2003	struct trace_buffer_struct *percpu_buffer;
2004
2005	/*
2006	 * If we have allocated per cpu buffers, then we do not
2007	 * need to do any locking.
2008	 */
2009	if (in_nmi())
2010		percpu_buffer = trace_percpu_nmi_buffer;
2011	else if (in_irq())
2012		percpu_buffer = trace_percpu_irq_buffer;
2013	else if (in_softirq())
2014		percpu_buffer = trace_percpu_sirq_buffer;
2015	else
2016		percpu_buffer = trace_percpu_buffer;
2017
2018	if (!percpu_buffer)
2019		return NULL;
2020
2021	return this_cpu_ptr(&percpu_buffer->buffer[0]);
2022}
2023
2024static int alloc_percpu_trace_buffer(void)
2025{
2026	struct trace_buffer_struct *buffers;
2027	struct trace_buffer_struct *sirq_buffers;
2028	struct trace_buffer_struct *irq_buffers;
2029	struct trace_buffer_struct *nmi_buffers;
2030
2031	buffers = alloc_percpu(struct trace_buffer_struct);
2032	if (!buffers)
2033		goto err_warn;
2034
2035	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2036	if (!sirq_buffers)
2037		goto err_sirq;
2038
2039	irq_buffers = alloc_percpu(struct trace_buffer_struct);
2040	if (!irq_buffers)
2041		goto err_irq;
2042
2043	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2044	if (!nmi_buffers)
2045		goto err_nmi;
2046
2047	trace_percpu_buffer = buffers;
2048	trace_percpu_sirq_buffer = sirq_buffers;
2049	trace_percpu_irq_buffer = irq_buffers;
2050	trace_percpu_nmi_buffer = nmi_buffers;
2051
2052	return 0;
2053
2054 err_nmi:
2055	free_percpu(irq_buffers);
2056 err_irq:
2057	free_percpu(sirq_buffers);
2058 err_sirq:
2059	free_percpu(buffers);
2060 err_warn:
2061	WARN(1, "Could not allocate percpu trace_printk buffer");
2062	return -ENOMEM;
2063}
2064
2065static int buffers_allocated;
2066
2067void trace_printk_init_buffers(void)
2068{
2069	if (buffers_allocated)
2070		return;
2071
2072	if (alloc_percpu_trace_buffer())
2073		return;
2074
2075	/* trace_printk() is for debug use only. Don't use it in production. */
2076
2077	pr_warning("\n");
2078	pr_warning("**********************************************************\n");
2079	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2080	pr_warning("**                                                      **\n");
2081	pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2082	pr_warning("**                                                      **\n");
2083	pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2084	pr_warning("** unsafe for production use.                           **\n");
2085	pr_warning("**                                                      **\n");
2086	pr_warning("** If you see this message and you are not debugging    **\n");
2087	pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2088	pr_warning("**                                                      **\n");
2089	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2090	pr_warning("**********************************************************\n");
2091
2092	/* Expand the buffers to set size */
2093	tracing_update_buffers();
2094
2095	buffers_allocated = 1;
2096
2097	/*
2098	 * trace_printk_init_buffers() can be called by modules.
2099	 * If that happens, then we need to start cmdline recording
2100	 * directly here. If the global_trace.buffer is already
2101	 * allocated here, then this was called by module code.
2102	 */
2103	if (global_trace.trace_buffer.buffer)
2104		tracing_start_cmdline_record();
2105}
2106
2107void trace_printk_start_comm(void)
2108{
2109	/* Start tracing comms if trace printk is set */
2110	if (!buffers_allocated)
2111		return;
2112	tracing_start_cmdline_record();
2113}
2114
2115static void trace_printk_start_stop_comm(int enabled)
2116{
2117	if (!buffers_allocated)
2118		return;
2119
2120	if (enabled)
2121		tracing_start_cmdline_record();
2122	else
2123		tracing_stop_cmdline_record();
2124}
2125
2126/**
2127 * trace_vbprintk - write binary msg to tracing buffer
2128 *
2129 */
2130int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2131{
2132	struct ftrace_event_call *call = &event_bprint;
2133	struct ring_buffer_event *event;
2134	struct ring_buffer *buffer;
2135	struct trace_array *tr = &global_trace;
2136	struct bprint_entry *entry;
2137	unsigned long flags;
2138	char *tbuffer;
2139	int len = 0, size, pc;
2140
2141	if (unlikely(tracing_selftest_running || tracing_disabled))
2142		return 0;
2143
2144	/* Don't pollute graph traces with trace_vprintk internals */
2145	pause_graph_tracing();
2146
2147	pc = preempt_count();
2148	preempt_disable_notrace();
2149
2150	tbuffer = get_trace_buf();
2151	if (!tbuffer) {
2152		len = 0;
2153		goto out;
2154	}
2155
2156	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2157
2158	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2159		goto out;
2160
2161	local_save_flags(flags);
2162	size = sizeof(*entry) + sizeof(u32) * len;
2163	buffer = tr->trace_buffer.buffer;
2164	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2165					  flags, pc);
2166	if (!event)
2167		goto out;
2168	entry = ring_buffer_event_data(event);
2169	entry->ip			= ip;
2170	entry->fmt			= fmt;
2171
2172	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2173	if (!call_filter_check_discard(call, entry, buffer, event)) {
2174		__buffer_unlock_commit(buffer, event);
2175		ftrace_trace_stack(buffer, flags, 6, pc);
2176	}
2177
2178out:
2179	preempt_enable_notrace();
2180	unpause_graph_tracing();
2181
2182	return len;
2183}
2184EXPORT_SYMBOL_GPL(trace_vbprintk);
2185
2186static int
2187__trace_array_vprintk(struct ring_buffer *buffer,
2188		      unsigned long ip, const char *fmt, va_list args)
2189{
2190	struct ftrace_event_call *call = &event_print;
2191	struct ring_buffer_event *event;
2192	int len = 0, size, pc;
2193	struct print_entry *entry;
2194	unsigned long flags;
2195	char *tbuffer;
2196
2197	if (tracing_disabled || tracing_selftest_running)
2198		return 0;
2199
2200	/* Don't pollute graph traces with trace_vprintk internals */
2201	pause_graph_tracing();
2202
2203	pc = preempt_count();
2204	preempt_disable_notrace();
2205
2206
2207	tbuffer = get_trace_buf();
2208	if (!tbuffer) {
2209		len = 0;
2210		goto out;
2211	}
2212
2213	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2214
2215	local_save_flags(flags);
2216	size = sizeof(*entry) + len + 1;
2217	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2218					  flags, pc);
2219	if (!event)
2220		goto out;
2221	entry = ring_buffer_event_data(event);
2222	entry->ip = ip;
2223
2224	memcpy(&entry->buf, tbuffer, len + 1);
2225	if (!call_filter_check_discard(call, entry, buffer, event)) {
2226		__buffer_unlock_commit(buffer, event);
2227		ftrace_trace_stack(buffer, flags, 6, pc);
2228	}
2229 out:
2230	preempt_enable_notrace();
2231	unpause_graph_tracing();
2232
2233	return len;
2234}
2235
2236int trace_array_vprintk(struct trace_array *tr,
2237			unsigned long ip, const char *fmt, va_list args)
2238{
2239	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2240}
2241
2242int trace_array_printk(struct trace_array *tr,
2243		       unsigned long ip, const char *fmt, ...)
2244{
2245	int ret;
2246	va_list ap;
2247
2248	if (!(trace_flags & TRACE_ITER_PRINTK))
2249		return 0;
2250
2251	va_start(ap, fmt);
2252	ret = trace_array_vprintk(tr, ip, fmt, ap);
2253	va_end(ap);
2254	return ret;
2255}
2256
2257int trace_array_printk_buf(struct ring_buffer *buffer,
2258			   unsigned long ip, const char *fmt, ...)
2259{
2260	int ret;
2261	va_list ap;
2262
2263	if (!(trace_flags & TRACE_ITER_PRINTK))
2264		return 0;
2265
2266	va_start(ap, fmt);
2267	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2268	va_end(ap);
2269	return ret;
2270}
2271
2272int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2273{
2274	return trace_array_vprintk(&global_trace, ip, fmt, args);
2275}
2276EXPORT_SYMBOL_GPL(trace_vprintk);
2277
2278static void trace_iterator_increment(struct trace_iterator *iter)
2279{
2280	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2281
2282	iter->idx++;
2283	if (buf_iter)
2284		ring_buffer_read(buf_iter, NULL);
2285}
2286
2287static struct trace_entry *
2288peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2289		unsigned long *lost_events)
2290{
2291	struct ring_buffer_event *event;
2292	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2293
2294	if (buf_iter)
2295		event = ring_buffer_iter_peek(buf_iter, ts);
2296	else
2297		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2298					 lost_events);
2299
2300	if (event) {
2301		iter->ent_size = ring_buffer_event_length(event);
2302		return ring_buffer_event_data(event);
2303	}
2304	iter->ent_size = 0;
2305	return NULL;
2306}
2307
2308static struct trace_entry *
2309__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2310		  unsigned long *missing_events, u64 *ent_ts)
2311{
2312	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2313	struct trace_entry *ent, *next = NULL;
2314	unsigned long lost_events = 0, next_lost = 0;
2315	int cpu_file = iter->cpu_file;
2316	u64 next_ts = 0, ts;
2317	int next_cpu = -1;
2318	int next_size = 0;
2319	int cpu;
2320
2321	/*
2322	 * If we are in a per_cpu trace file, don't bother by iterating over
2323	 * all cpu and peek directly.
2324	 */
2325	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2326		if (ring_buffer_empty_cpu(buffer, cpu_file))
2327			return NULL;
2328		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2329		if (ent_cpu)
2330			*ent_cpu = cpu_file;
2331
2332		return ent;
2333	}
2334
2335	for_each_tracing_cpu(cpu) {
2336
2337		if (ring_buffer_empty_cpu(buffer, cpu))
2338			continue;
2339
2340		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2341
2342		/*
2343		 * Pick the entry with the smallest timestamp:
2344		 */
2345		if (ent && (!next || ts < next_ts)) {
2346			next = ent;
2347			next_cpu = cpu;
2348			next_ts = ts;
2349			next_lost = lost_events;
2350			next_size = iter->ent_size;
2351		}
2352	}
2353
2354	iter->ent_size = next_size;
2355
2356	if (ent_cpu)
2357		*ent_cpu = next_cpu;
2358
2359	if (ent_ts)
2360		*ent_ts = next_ts;
2361
2362	if (missing_events)
2363		*missing_events = next_lost;
2364
2365	return next;
2366}
2367
2368/* Find the next real entry, without updating the iterator itself */
2369struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2370					  int *ent_cpu, u64 *ent_ts)
2371{
2372	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2373}
2374
2375/* Find the next real entry, and increment the iterator to the next entry */
2376void *trace_find_next_entry_inc(struct trace_iterator *iter)
2377{
2378	iter->ent = __find_next_entry(iter, &iter->cpu,
2379				      &iter->lost_events, &iter->ts);
2380
2381	if (iter->ent)
2382		trace_iterator_increment(iter);
2383
2384	return iter->ent ? iter : NULL;
2385}
2386
2387static void trace_consume(struct trace_iterator *iter)
2388{
2389	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2390			    &iter->lost_events);
2391}
2392
2393static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2394{
2395	struct trace_iterator *iter = m->private;
2396	int i = (int)*pos;
2397	void *ent;
2398
2399	WARN_ON_ONCE(iter->leftover);
2400
2401	(*pos)++;
2402
2403	/* can't go backwards */
2404	if (iter->idx > i)
2405		return NULL;
2406
2407	if (iter->idx < 0)
2408		ent = trace_find_next_entry_inc(iter);
2409	else
2410		ent = iter;
2411
2412	while (ent && iter->idx < i)
2413		ent = trace_find_next_entry_inc(iter);
2414
2415	iter->pos = *pos;
2416
2417	return ent;
2418}
2419
2420void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2421{
2422	struct ring_buffer_event *event;
2423	struct ring_buffer_iter *buf_iter;
2424	unsigned long entries = 0;
2425	u64 ts;
2426
2427	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2428
2429	buf_iter = trace_buffer_iter(iter, cpu);
2430	if (!buf_iter)
2431		return;
2432
2433	ring_buffer_iter_reset(buf_iter);
2434
2435	/*
2436	 * We could have the case with the max latency tracers
2437	 * that a reset never took place on a cpu. This is evident
2438	 * by the timestamp being before the start of the buffer.
2439	 */
2440	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2441		if (ts >= iter->trace_buffer->time_start)
2442			break;
2443		entries++;
2444		ring_buffer_read(buf_iter, NULL);
2445	}
2446
2447	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2448}
2449
2450/*
2451 * The current tracer is copied to avoid a global locking
2452 * all around.
2453 */
2454static void *s_start(struct seq_file *m, loff_t *pos)
2455{
2456	struct trace_iterator *iter = m->private;
2457	struct trace_array *tr = iter->tr;
2458	int cpu_file = iter->cpu_file;
2459	void *p = NULL;
2460	loff_t l = 0;
2461	int cpu;
2462
2463	/*
2464	 * copy the tracer to avoid using a global lock all around.
2465	 * iter->trace is a copy of current_trace, the pointer to the
2466	 * name may be used instead of a strcmp(), as iter->trace->name
2467	 * will point to the same string as current_trace->name.
2468	 */
2469	mutex_lock(&trace_types_lock);
2470	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2471		*iter->trace = *tr->current_trace;
2472	mutex_unlock(&trace_types_lock);
2473
2474#ifdef CONFIG_TRACER_MAX_TRACE
2475	if (iter->snapshot && iter->trace->use_max_tr)
2476		return ERR_PTR(-EBUSY);
2477#endif
2478
2479	if (!iter->snapshot)
2480		atomic_inc(&trace_record_cmdline_disabled);
2481
2482	if (*pos != iter->pos) {
2483		iter->ent = NULL;
2484		iter->cpu = 0;
2485		iter->idx = -1;
2486
2487		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2488			for_each_tracing_cpu(cpu)
2489				tracing_iter_reset(iter, cpu);
2490		} else
2491			tracing_iter_reset(iter, cpu_file);
2492
2493		iter->leftover = 0;
2494		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2495			;
2496
2497	} else {
2498		/*
2499		 * If we overflowed the seq_file before, then we want
2500		 * to just reuse the trace_seq buffer again.
2501		 */
2502		if (iter->leftover)
2503			p = iter;
2504		else {
2505			l = *pos - 1;
2506			p = s_next(m, p, &l);
2507		}
2508	}
2509
2510	trace_event_read_lock();
2511	trace_access_lock(cpu_file);
2512	return p;
2513}
2514
2515static void s_stop(struct seq_file *m, void *p)
2516{
2517	struct trace_iterator *iter = m->private;
2518
2519#ifdef CONFIG_TRACER_MAX_TRACE
2520	if (iter->snapshot && iter->trace->use_max_tr)
2521		return;
2522#endif
2523
2524	if (!iter->snapshot)
2525		atomic_dec(&trace_record_cmdline_disabled);
2526
2527	trace_access_unlock(iter->cpu_file);
2528	trace_event_read_unlock();
2529}
2530
2531static void
2532get_total_entries(struct trace_buffer *buf,
2533		  unsigned long *total, unsigned long *entries)
2534{
2535	unsigned long count;
2536	int cpu;
2537
2538	*total = 0;
2539	*entries = 0;
2540
2541	for_each_tracing_cpu(cpu) {
2542		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2543		/*
2544		 * If this buffer has skipped entries, then we hold all
2545		 * entries for the trace and we need to ignore the
2546		 * ones before the time stamp.
2547		 */
2548		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2549			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2550			/* total is the same as the entries */
2551			*total += count;
2552		} else
2553			*total += count +
2554				ring_buffer_overrun_cpu(buf->buffer, cpu);
2555		*entries += count;
2556	}
2557}
2558
2559static void print_lat_help_header(struct seq_file *m)
2560{
2561	seq_puts(m, "#                  _------=> CPU#            \n"
2562		    "#                 / _-----=> irqs-off        \n"
2563		    "#                | / _----=> need-resched    \n"
2564		    "#                || / _---=> hardirq/softirq \n"
2565		    "#                ||| / _--=> preempt-depth   \n"
2566		    "#                |||| /     delay            \n"
2567		    "#  cmd     pid   ||||| time  |   caller      \n"
2568		    "#     \\   /      |||||  \\    |   /         \n");
2569}
2570
2571static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2572{
2573	unsigned long total;
2574	unsigned long entries;
2575
2576	get_total_entries(buf, &total, &entries);
2577	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2578		   entries, total, num_online_cpus());
2579	seq_puts(m, "#\n");
2580}
2581
2582static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2583{
2584	print_event_info(buf, m);
2585	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2586		    "#              | |       |          |         |\n");
2587}
2588
2589static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2590{
2591	print_event_info(buf, m);
2592	seq_puts(m, "#                              _-----=> irqs-off\n"
2593		    "#                             / _----=> need-resched\n"
2594		    "#                            | / _---=> hardirq/softirq\n"
2595		    "#                            || / _--=> preempt-depth\n"
2596		    "#                            ||| /     delay\n"
2597		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2598		    "#              | |       |   ||||       |         |\n");
2599}
2600
2601void
2602print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2603{
2604	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2605	struct trace_buffer *buf = iter->trace_buffer;
2606	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2607	struct tracer *type = iter->trace;
2608	unsigned long entries;
2609	unsigned long total;
2610	const char *name = "preemption";
2611
2612	name = type->name;
2613
2614	get_total_entries(buf, &total, &entries);
2615
2616	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2617		   name, UTS_RELEASE);
2618	seq_puts(m, "# -----------------------------------"
2619		 "---------------------------------\n");
2620	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2621		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2622		   nsecs_to_usecs(data->saved_latency),
2623		   entries,
2624		   total,
2625		   buf->cpu,
2626#if defined(CONFIG_PREEMPT_NONE)
2627		   "server",
2628#elif defined(CONFIG_PREEMPT_VOLUNTARY)
2629		   "desktop",
2630#elif defined(CONFIG_PREEMPT)
2631		   "preempt",
2632#else
2633		   "unknown",
2634#endif
2635		   /* These are reserved for later use */
2636		   0, 0, 0, 0);
2637#ifdef CONFIG_SMP
2638	seq_printf(m, " #P:%d)\n", num_online_cpus());
2639#else
2640	seq_puts(m, ")\n");
2641#endif
2642	seq_puts(m, "#    -----------------\n");
2643	seq_printf(m, "#    | task: %.16s-%d "
2644		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2645		   data->comm, data->pid,
2646		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2647		   data->policy, data->rt_priority);
2648	seq_puts(m, "#    -----------------\n");
2649
2650	if (data->critical_start) {
2651		seq_puts(m, "#  => started at: ");
2652		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2653		trace_print_seq(m, &iter->seq);
2654		seq_puts(m, "\n#  => ended at:   ");
2655		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2656		trace_print_seq(m, &iter->seq);
2657		seq_puts(m, "\n#\n");
2658	}
2659
2660	seq_puts(m, "#\n");
2661}
2662
2663static void test_cpu_buff_start(struct trace_iterator *iter)
2664{
2665	struct trace_seq *s = &iter->seq;
2666
2667	if (!(trace_flags & TRACE_ITER_ANNOTATE))
2668		return;
2669
2670	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2671		return;
2672
2673	if (cpumask_test_cpu(iter->cpu, iter->started))
2674		return;
2675
2676	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2677		return;
2678
2679	cpumask_set_cpu(iter->cpu, iter->started);
2680
2681	/* Don't print started cpu buffer for the first entry of the trace */
2682	if (iter->idx > 1)
2683		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2684				iter->cpu);
2685}
2686
2687static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2688{
2689	struct trace_seq *s = &iter->seq;
2690	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2691	struct trace_entry *entry;
2692	struct trace_event *event;
2693
2694	entry = iter->ent;
2695
2696	test_cpu_buff_start(iter);
2697
2698	event = ftrace_find_event(entry->type);
2699
2700	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2701		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2702			trace_print_lat_context(iter);
2703		else
2704			trace_print_context(iter);
2705	}
2706
2707	if (trace_seq_has_overflowed(s))
2708		return TRACE_TYPE_PARTIAL_LINE;
2709
2710	if (event)
2711		return event->funcs->trace(iter, sym_flags, event);
2712
2713	trace_seq_printf(s, "Unknown type %d\n", entry->type);
2714
2715	return trace_handle_return(s);
2716}
2717
2718static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2719{
2720	struct trace_seq *s = &iter->seq;
2721	struct trace_entry *entry;
2722	struct trace_event *event;
2723
2724	entry = iter->ent;
2725
2726	if (trace_flags & TRACE_ITER_CONTEXT_INFO)
2727		trace_seq_printf(s, "%d %d %llu ",
2728				 entry->pid, iter->cpu, iter->ts);
2729
2730	if (trace_seq_has_overflowed(s))
2731		return TRACE_TYPE_PARTIAL_LINE;
2732
2733	event = ftrace_find_event(entry->type);
2734	if (event)
2735		return event->funcs->raw(iter, 0, event);
2736
2737	trace_seq_printf(s, "%d ?\n", entry->type);
2738
2739	return trace_handle_return(s);
2740}
2741
2742static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2743{
2744	struct trace_seq *s = &iter->seq;
2745	unsigned char newline = '\n';
2746	struct trace_entry *entry;
2747	struct trace_event *event;
2748
2749	entry = iter->ent;
2750
2751	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2752		SEQ_PUT_HEX_FIELD(s, entry->pid);
2753		SEQ_PUT_HEX_FIELD(s, iter->cpu);
2754		SEQ_PUT_HEX_FIELD(s, iter->ts);
2755		if (trace_seq_has_overflowed(s))
2756			return TRACE_TYPE_PARTIAL_LINE;
2757	}
2758
2759	event = ftrace_find_event(entry->type);
2760	if (event) {
2761		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2762		if (ret != TRACE_TYPE_HANDLED)
2763			return ret;
2764	}
2765
2766	SEQ_PUT_FIELD(s, newline);
2767
2768	return trace_handle_return(s);
2769}
2770
2771static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2772{
2773	struct trace_seq *s = &iter->seq;
2774	struct trace_entry *entry;
2775	struct trace_event *event;
2776
2777	entry = iter->ent;
2778
2779	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2780		SEQ_PUT_FIELD(s, entry->pid);
2781		SEQ_PUT_FIELD(s, iter->cpu);
2782		SEQ_PUT_FIELD(s, iter->ts);
2783		if (trace_seq_has_overflowed(s))
2784			return TRACE_TYPE_PARTIAL_LINE;
2785	}
2786
2787	event = ftrace_find_event(entry->type);
2788	return event ? event->funcs->binary(iter, 0, event) :
2789		TRACE_TYPE_HANDLED;
2790}
2791
2792int trace_empty(struct trace_iterator *iter)
2793{
2794	struct ring_buffer_iter *buf_iter;
2795	int cpu;
2796
2797	/* If we are looking at one CPU buffer, only check that one */
2798	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2799		cpu = iter->cpu_file;
2800		buf_iter = trace_buffer_iter(iter, cpu);
2801		if (buf_iter) {
2802			if (!ring_buffer_iter_empty(buf_iter))
2803				return 0;
2804		} else {
2805			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2806				return 0;
2807		}
2808		return 1;
2809	}
2810
2811	for_each_tracing_cpu(cpu) {
2812		buf_iter = trace_buffer_iter(iter, cpu);
2813		if (buf_iter) {
2814			if (!ring_buffer_iter_empty(buf_iter))
2815				return 0;
2816		} else {
2817			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2818				return 0;
2819		}
2820	}
2821
2822	return 1;
2823}
2824
2825/*  Called with trace_event_read_lock() held. */
2826enum print_line_t print_trace_line(struct trace_iterator *iter)
2827{
2828	enum print_line_t ret;
2829
2830	if (iter->lost_events) {
2831		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2832				 iter->cpu, iter->lost_events);
2833		if (trace_seq_has_overflowed(&iter->seq))
2834			return TRACE_TYPE_PARTIAL_LINE;
2835	}
2836
2837	if (iter->trace && iter->trace->print_line) {
2838		ret = iter->trace->print_line(iter);
2839		if (ret != TRACE_TYPE_UNHANDLED)
2840			return ret;
2841	}
2842
2843	if (iter->ent->type == TRACE_BPUTS &&
2844			trace_flags & TRACE_ITER_PRINTK &&
2845			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2846		return trace_print_bputs_msg_only(iter);
2847
2848	if (iter->ent->type == TRACE_BPRINT &&
2849			trace_flags & TRACE_ITER_PRINTK &&
2850			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2851		return trace_print_bprintk_msg_only(iter);
2852
2853	if (iter->ent->type == TRACE_PRINT &&
2854			trace_flags & TRACE_ITER_PRINTK &&
2855			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2856		return trace_print_printk_msg_only(iter);
2857
2858	if (trace_flags & TRACE_ITER_BIN)
2859		return print_bin_fmt(iter);
2860
2861	if (trace_flags & TRACE_ITER_HEX)
2862		return print_hex_fmt(iter);
2863
2864	if (trace_flags & TRACE_ITER_RAW)
2865		return print_raw_fmt(iter);
2866
2867	return print_trace_fmt(iter);
2868}
2869
2870void trace_latency_header(struct seq_file *m)
2871{
2872	struct trace_iterator *iter = m->private;
2873
2874	/* print nothing if the buffers are empty */
2875	if (trace_empty(iter))
2876		return;
2877
2878	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2879		print_trace_header(m, iter);
2880
2881	if (!(trace_flags & TRACE_ITER_VERBOSE))
2882		print_lat_help_header(m);
2883}
2884
2885void trace_default_header(struct seq_file *m)
2886{
2887	struct trace_iterator *iter = m->private;
2888
2889	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2890		return;
2891
2892	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2893		/* print nothing if the buffers are empty */
2894		if (trace_empty(iter))
2895			return;
2896		print_trace_header(m, iter);
2897		if (!(trace_flags & TRACE_ITER_VERBOSE))
2898			print_lat_help_header(m);
2899	} else {
2900		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2901			if (trace_flags & TRACE_ITER_IRQ_INFO)
2902				print_func_help_header_irq(iter->trace_buffer, m);
2903			else
2904				print_func_help_header(iter->trace_buffer, m);
2905		}
2906	}
2907}
2908
2909static void test_ftrace_alive(struct seq_file *m)
2910{
2911	if (!ftrace_is_dead())
2912		return;
2913	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2914		    "#          MAY BE MISSING FUNCTION EVENTS\n");
2915}
2916
2917#ifdef CONFIG_TRACER_MAX_TRACE
2918static void show_snapshot_main_help(struct seq_file *m)
2919{
2920	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2921		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2922		    "#                      Takes a snapshot of the main buffer.\n"
2923		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2924		    "#                      (Doesn't have to be '2' works with any number that\n"
2925		    "#                       is not a '0' or '1')\n");
2926}
2927
2928static void show_snapshot_percpu_help(struct seq_file *m)
2929{
2930	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2931#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2932	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2933		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
2934#else
2935	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2936		    "#                     Must use main snapshot file to allocate.\n");
2937#endif
2938	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2939		    "#                      (Doesn't have to be '2' works with any number that\n"
2940		    "#                       is not a '0' or '1')\n");
2941}
2942
2943static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2944{
2945	if (iter->tr->allocated_snapshot)
2946		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2947	else
2948		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2949
2950	seq_puts(m, "# Snapshot commands:\n");
2951	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2952		show_snapshot_main_help(m);
2953	else
2954		show_snapshot_percpu_help(m);
2955}
2956#else
2957/* Should never be called */
2958static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2959#endif
2960
2961static int s_show(struct seq_file *m, void *v)
2962{
2963	struct trace_iterator *iter = v;
2964	int ret;
2965
2966	if (iter->ent == NULL) {
2967		if (iter->tr) {
2968			seq_printf(m, "# tracer: %s\n", iter->trace->name);
2969			seq_puts(m, "#\n");
2970			test_ftrace_alive(m);
2971		}
2972		if (iter->snapshot && trace_empty(iter))
2973			print_snapshot_help(m, iter);
2974		else if (iter->trace && iter->trace->print_header)
2975			iter->trace->print_header(m);
2976		else
2977			trace_default_header(m);
2978
2979	} else if (iter->leftover) {
2980		/*
2981		 * If we filled the seq_file buffer earlier, we
2982		 * want to just show it now.
2983		 */
2984		ret = trace_print_seq(m, &iter->seq);
2985
2986		/* ret should this time be zero, but you never know */
2987		iter->leftover = ret;
2988
2989	} else {
2990		print_trace_line(iter);
2991		ret = trace_print_seq(m, &iter->seq);
2992		/*
2993		 * If we overflow the seq_file buffer, then it will
2994		 * ask us for this data again at start up.
2995		 * Use that instead.
2996		 *  ret is 0 if seq_file write succeeded.
2997		 *        -1 otherwise.
2998		 */
2999		iter->leftover = ret;
3000	}
3001
3002	return 0;
3003}
3004
3005/*
3006 * Should be used after trace_array_get(), trace_types_lock
3007 * ensures that i_cdev was already initialized.
3008 */
3009static inline int tracing_get_cpu(struct inode *inode)
3010{
3011	if (inode->i_cdev) /* See trace_create_cpu_file() */
3012		return (long)inode->i_cdev - 1;
3013	return RING_BUFFER_ALL_CPUS;
3014}
3015
3016static const struct seq_operations tracer_seq_ops = {
3017	.start		= s_start,
3018	.next		= s_next,
3019	.stop		= s_stop,
3020	.show		= s_show,
3021};
3022
3023static struct trace_iterator *
3024__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3025{
3026	struct trace_array *tr = inode->i_private;
3027	struct trace_iterator *iter;
3028	int cpu;
3029
3030	if (tracing_disabled)
3031		return ERR_PTR(-ENODEV);
3032
3033	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3034	if (!iter)
3035		return ERR_PTR(-ENOMEM);
3036
3037	iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
3038				    GFP_KERNEL);
3039	if (!iter->buffer_iter)
3040		goto release;
3041
3042	/*
3043	 * We make a copy of the current tracer to avoid concurrent
3044	 * changes on it while we are reading.
3045	 */
3046	mutex_lock(&trace_types_lock);
3047	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3048	if (!iter->trace)
3049		goto fail;
3050
3051	*iter->trace = *tr->current_trace;
3052
3053	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3054		goto fail;
3055
3056	iter->tr = tr;
3057
3058#ifdef CONFIG_TRACER_MAX_TRACE
3059	/* Currently only the top directory has a snapshot */
3060	if (tr->current_trace->print_max || snapshot)
3061		iter->trace_buffer = &tr->max_buffer;
3062	else
3063#endif
3064		iter->trace_buffer = &tr->trace_buffer;
3065	iter->snapshot = snapshot;
3066	iter->pos = -1;
3067	iter->cpu_file = tracing_get_cpu(inode);
3068	mutex_init(&iter->mutex);
3069
3070	/* Notify the tracer early; before we stop tracing. */
3071	if (iter->trace && iter->trace->open)
3072		iter->trace->open(iter);
3073
3074	/* Annotate start of buffers if we had overruns */
3075	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3076		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3077
3078	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3079	if (trace_clocks[tr->clock_id].in_ns)
3080		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3081
3082	/* stop the trace while dumping if we are not opening "snapshot" */
3083	if (!iter->snapshot)
3084		tracing_stop_tr(tr);
3085
3086	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3087		for_each_tracing_cpu(cpu) {
3088			iter->buffer_iter[cpu] =
3089				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3090		}
3091		ring_buffer_read_prepare_sync();
3092		for_each_tracing_cpu(cpu) {
3093			ring_buffer_read_start(iter->buffer_iter[cpu]);
3094			tracing_iter_reset(iter, cpu);
3095		}
3096	} else {
3097		cpu = iter->cpu_file;
3098		iter->buffer_iter[cpu] =
3099			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3100		ring_buffer_read_prepare_sync();
3101		ring_buffer_read_start(iter->buffer_iter[cpu]);
3102		tracing_iter_reset(iter, cpu);
3103	}
3104
3105	mutex_unlock(&trace_types_lock);
3106
3107	return iter;
3108
3109 fail:
3110	mutex_unlock(&trace_types_lock);
3111	kfree(iter->trace);
3112	kfree(iter->buffer_iter);
3113release:
3114	seq_release_private(inode, file);
3115	return ERR_PTR(-ENOMEM);
3116}
3117
3118int tracing_open_generic(struct inode *inode, struct file *filp)
3119{
3120	if (tracing_disabled)
3121		return -ENODEV;
3122
3123	filp->private_data = inode->i_private;
3124	return 0;
3125}
3126
3127bool tracing_is_disabled(void)
3128{
3129	return (tracing_disabled) ? true: false;
3130}
3131
3132/*
3133 * Open and update trace_array ref count.
3134 * Must have the current trace_array passed to it.
3135 */
3136static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3137{
3138	struct trace_array *tr = inode->i_private;
3139
3140	if (tracing_disabled)
3141		return -ENODEV;
3142
3143	if (trace_array_get(tr) < 0)
3144		return -ENODEV;
3145
3146	filp->private_data = inode->i_private;
3147
3148	return 0;
3149}
3150
3151static int tracing_release(struct inode *inode, struct file *file)
3152{
3153	struct trace_array *tr = inode->i_private;
3154	struct seq_file *m = file->private_data;
3155	struct trace_iterator *iter;
3156	int cpu;
3157
3158	if (!(file->f_mode & FMODE_READ)) {
3159		trace_array_put(tr);
3160		return 0;
3161	}
3162
3163	/* Writes do not use seq_file */
3164	iter = m->private;
3165	mutex_lock(&trace_types_lock);
3166
3167	for_each_tracing_cpu(cpu) {
3168		if (iter->buffer_iter[cpu])
3169			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3170	}
3171
3172	if (iter->trace && iter->trace->close)
3173		iter->trace->close(iter);
3174
3175	if (!iter->snapshot)
3176		/* reenable tracing if it was previously enabled */
3177		tracing_start_tr(tr);
3178
3179	__trace_array_put(tr);
3180
3181	mutex_unlock(&trace_types_lock);
3182
3183	mutex_destroy(&iter->mutex);
3184	free_cpumask_var(iter->started);
3185	kfree(iter->trace);
3186	kfree(iter->buffer_iter);
3187	seq_release_private(inode, file);
3188
3189	return 0;
3190}
3191
3192static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3193{
3194	struct trace_array *tr = inode->i_private;
3195
3196	trace_array_put(tr);
3197	return 0;
3198}
3199
3200static int tracing_single_release_tr(struct inode *inode, struct file *file)
3201{
3202	struct trace_array *tr = inode->i_private;
3203
3204	trace_array_put(tr);
3205
3206	return single_release(inode, file);
3207}
3208
3209static int tracing_open(struct inode *inode, struct file *file)
3210{
3211	struct trace_array *tr = inode->i_private;
3212	struct trace_iterator *iter;
3213	int ret = 0;
3214
3215	if (trace_array_get(tr) < 0)
3216		return -ENODEV;
3217
3218	/* If this file was open for write, then erase contents */
3219	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3220		int cpu = tracing_get_cpu(inode);
3221
3222		if (cpu == RING_BUFFER_ALL_CPUS)
3223			tracing_reset_online_cpus(&tr->trace_buffer);
3224		else
3225			tracing_reset(&tr->trace_buffer, cpu);
3226	}
3227
3228	if (file->f_mode & FMODE_READ) {
3229		iter = __tracing_open(inode, file, false);
3230		if (IS_ERR(iter))
3231			ret = PTR_ERR(iter);
3232		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3233			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3234	}
3235
3236	if (ret < 0)
3237		trace_array_put(tr);
3238
3239	return ret;
3240}
3241
3242/*
3243 * Some tracers are not suitable for instance buffers.
3244 * A tracer is always available for the global array (toplevel)
3245 * or if it explicitly states that it is.
3246 */
3247static bool
3248trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3249{
3250	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3251}
3252
3253/* Find the next tracer that this trace array may use */
3254static struct tracer *
3255get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3256{
3257	while (t && !trace_ok_for_array(t, tr))
3258		t = t->next;
3259
3260	return t;
3261}
3262
3263static void *
3264t_next(struct seq_file *m, void *v, loff_t *pos)
3265{
3266	struct trace_array *tr = m->private;
3267	struct tracer *t = v;
3268
3269	(*pos)++;
3270
3271	if (t)
3272		t = get_tracer_for_array(tr, t->next);
3273
3274	return t;
3275}
3276
3277static void *t_start(struct seq_file *m, loff_t *pos)
3278{
3279	struct trace_array *tr = m->private;
3280	struct tracer *t;
3281	loff_t l = 0;
3282
3283	mutex_lock(&trace_types_lock);
3284
3285	t = get_tracer_for_array(tr, trace_types);
3286	for (; t && l < *pos; t = t_next(m, t, &l))
3287			;
3288
3289	return t;
3290}
3291
3292static void t_stop(struct seq_file *m, void *p)
3293{
3294	mutex_unlock(&trace_types_lock);
3295}
3296
3297static int t_show(struct seq_file *m, void *v)
3298{
3299	struct tracer *t = v;
3300
3301	if (!t)
3302		return 0;
3303
3304	seq_puts(m, t->name);
3305	if (t->next)
3306		seq_putc(m, ' ');
3307	else
3308		seq_putc(m, '\n');
3309
3310	return 0;
3311}
3312
3313static const struct seq_operations show_traces_seq_ops = {
3314	.start		= t_start,
3315	.next		= t_next,
3316	.stop		= t_stop,
3317	.show		= t_show,
3318};
3319
3320static int show_traces_open(struct inode *inode, struct file *file)
3321{
3322	struct trace_array *tr = inode->i_private;
3323	struct seq_file *m;
3324	int ret;
3325
3326	if (tracing_disabled)
3327		return -ENODEV;
3328
3329	ret = seq_open(file, &show_traces_seq_ops);
3330	if (ret)
3331		return ret;
3332
3333	m = file->private_data;
3334	m->private = tr;
3335
3336	return 0;
3337}
3338
3339static ssize_t
3340tracing_write_stub(struct file *filp, const char __user *ubuf,
3341		   size_t count, loff_t *ppos)
3342{
3343	return count;
3344}
3345
3346loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3347{
3348	int ret;
3349
3350	if (file->f_mode & FMODE_READ)
3351		ret = seq_lseek(file, offset, whence);
3352	else
3353		file->f_pos = ret = 0;
3354
3355	return ret;
3356}
3357
3358static const struct file_operations tracing_fops = {
3359	.open		= tracing_open,
3360	.read		= seq_read,
3361	.write		= tracing_write_stub,
3362	.llseek		= tracing_lseek,
3363	.release	= tracing_release,
3364};
3365
3366static const struct file_operations show_traces_fops = {
3367	.open		= show_traces_open,
3368	.read		= seq_read,
3369	.release	= seq_release,
3370	.llseek		= seq_lseek,
3371};
3372
3373/*
3374 * The tracer itself will not take this lock, but still we want
3375 * to provide a consistent cpumask to user-space:
3376 */
3377static DEFINE_MUTEX(tracing_cpumask_update_lock);
3378
3379/*
3380 * Temporary storage for the character representation of the
3381 * CPU bitmask (and one more byte for the newline):
3382 */
3383static char mask_str[NR_CPUS + 1];
3384
3385static ssize_t
3386tracing_cpumask_read(struct file *filp, char __user *ubuf,
3387		     size_t count, loff_t *ppos)
3388{
3389	struct trace_array *tr = file_inode(filp)->i_private;
3390	int len;
3391
3392	mutex_lock(&tracing_cpumask_update_lock);
3393
3394	len = snprintf(mask_str, count, "%*pb\n",
3395		       cpumask_pr_args(tr->tracing_cpumask));
3396	if (len >= count) {
3397		count = -EINVAL;
3398		goto out_err;
3399	}
3400	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3401
3402out_err:
3403	mutex_unlock(&tracing_cpumask_update_lock);
3404
3405	return count;
3406}
3407
3408static ssize_t
3409tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3410		      size_t count, loff_t *ppos)
3411{
3412	struct trace_array *tr = file_inode(filp)->i_private;
3413	cpumask_var_t tracing_cpumask_new;
3414	int err, cpu;
3415
3416	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3417		return -ENOMEM;
3418
3419	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3420	if (err)
3421		goto err_unlock;
3422
3423	mutex_lock(&tracing_cpumask_update_lock);
3424
3425	local_irq_disable();
3426	arch_spin_lock(&tr->max_lock);
3427	for_each_tracing_cpu(cpu) {
3428		/*
3429		 * Increase/decrease the disabled counter if we are
3430		 * about to flip a bit in the cpumask:
3431		 */
3432		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3433				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3434			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3435			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3436		}
3437		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3438				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3439			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3440			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3441		}
3442	}
3443	arch_spin_unlock(&tr->max_lock);
3444	local_irq_enable();
3445
3446	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3447
3448	mutex_unlock(&tracing_cpumask_update_lock);
3449	free_cpumask_var(tracing_cpumask_new);
3450
3451	return count;
3452
3453err_unlock:
3454	free_cpumask_var(tracing_cpumask_new);
3455
3456	return err;
3457}
3458
3459static const struct file_operations tracing_cpumask_fops = {
3460	.open		= tracing_open_generic_tr,
3461	.read		= tracing_cpumask_read,
3462	.write		= tracing_cpumask_write,
3463	.release	= tracing_release_generic_tr,
3464	.llseek		= generic_file_llseek,
3465};
3466
3467static int tracing_trace_options_show(struct seq_file *m, void *v)
3468{
3469	struct tracer_opt *trace_opts;
3470	struct trace_array *tr = m->private;
3471	u32 tracer_flags;
3472	int i;
3473
3474	mutex_lock(&trace_types_lock);
3475	tracer_flags = tr->current_trace->flags->val;
3476	trace_opts = tr->current_trace->flags->opts;
3477
3478	for (i = 0; trace_options[i]; i++) {
3479		if (trace_flags & (1 << i))
3480			seq_printf(m, "%s\n", trace_options[i]);
3481		else
3482			seq_printf(m, "no%s\n", trace_options[i]);
3483	}
3484
3485	for (i = 0; trace_opts[i].name; i++) {
3486		if (tracer_flags & trace_opts[i].bit)
3487			seq_printf(m, "%s\n", trace_opts[i].name);
3488		else
3489			seq_printf(m, "no%s\n", trace_opts[i].name);
3490	}
3491	mutex_unlock(&trace_types_lock);
3492
3493	return 0;
3494}
3495
3496static int __set_tracer_option(struct trace_array *tr,
3497			       struct tracer_flags *tracer_flags,
3498			       struct tracer_opt *opts, int neg)
3499{
3500	struct tracer *trace = tr->current_trace;
3501	int ret;
3502
3503	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3504	if (ret)
3505		return ret;
3506
3507	if (neg)
3508		tracer_flags->val &= ~opts->bit;
3509	else
3510		tracer_flags->val |= opts->bit;
3511	return 0;
3512}
3513
3514/* Try to assign a tracer specific option */
3515static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3516{
3517	struct tracer *trace = tr->current_trace;
3518	struct tracer_flags *tracer_flags = trace->flags;
3519	struct tracer_opt *opts = NULL;
3520	int i;
3521
3522	for (i = 0; tracer_flags->opts[i].name; i++) {
3523		opts = &tracer_flags->opts[i];
3524
3525		if (strcmp(cmp, opts->name) == 0)
3526			return __set_tracer_option(tr, trace->flags, opts, neg);
3527	}
3528
3529	return -EINVAL;
3530}
3531
3532/* Some tracers require overwrite to stay enabled */
3533int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3534{
3535	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3536		return -1;
3537
3538	return 0;
3539}
3540
3541int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3542{
3543	/* do nothing if flag is already set */
3544	if (!!(trace_flags & mask) == !!enabled)
3545		return 0;
3546
3547	/* Give the tracer a chance to approve the change */
3548	if (tr->current_trace->flag_changed)
3549		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3550			return -EINVAL;
3551
3552	if (enabled)
3553		trace_flags |= mask;
3554	else
3555		trace_flags &= ~mask;
3556
3557	if (mask == TRACE_ITER_RECORD_CMD)
3558		trace_event_enable_cmd_record(enabled);
3559
3560	if (mask == TRACE_ITER_OVERWRITE) {
3561		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3562#ifdef CONFIG_TRACER_MAX_TRACE
3563		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3564#endif
3565	}
3566
3567	if (mask == TRACE_ITER_PRINTK)
3568		trace_printk_start_stop_comm(enabled);
3569
3570	return 0;
3571}
3572
3573static int trace_set_options(struct trace_array *tr, char *option)
3574{
3575	char *cmp;
3576	int neg = 0;
3577	int ret = -ENODEV;
3578	int i;
3579
3580	cmp = strstrip(option);
3581
3582	if (strncmp(cmp, "no", 2) == 0) {
3583		neg = 1;
3584		cmp += 2;
3585	}
3586
3587	mutex_lock(&trace_types_lock);
3588
3589	for (i = 0; trace_options[i]; i++) {
3590		if (strcmp(cmp, trace_options[i]) == 0) {
3591			ret = set_tracer_flag(tr, 1 << i, !neg);
3592			break;
3593		}
3594	}
3595
3596	/* If no option could be set, test the specific tracer options */
3597	if (!trace_options[i])
3598		ret = set_tracer_option(tr, cmp, neg);
3599
3600	mutex_unlock(&trace_types_lock);
3601
3602	return ret;
3603}
3604
3605static ssize_t
3606tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3607			size_t cnt, loff_t *ppos)
3608{
3609	struct seq_file *m = filp->private_data;
3610	struct trace_array *tr = m->private;
3611	char buf[64];
3612	int ret;
3613
3614	if (cnt >= sizeof(buf))
3615		return -EINVAL;
3616
3617	if (copy_from_user(&buf, ubuf, cnt))
3618		return -EFAULT;
3619
3620	buf[cnt] = 0;
3621
3622	ret = trace_set_options(tr, buf);
3623	if (ret < 0)
3624		return ret;
3625
3626	*ppos += cnt;
3627
3628	return cnt;
3629}
3630
3631static int tracing_trace_options_open(struct inode *inode, struct file *file)
3632{
3633	struct trace_array *tr = inode->i_private;
3634	int ret;
3635
3636	if (tracing_disabled)
3637		return -ENODEV;
3638
3639	if (trace_array_get(tr) < 0)
3640		return -ENODEV;
3641
3642	ret = single_open(file, tracing_trace_options_show, inode->i_private);
3643	if (ret < 0)
3644		trace_array_put(tr);
3645
3646	return ret;
3647}
3648
3649static const struct file_operations tracing_iter_fops = {
3650	.open		= tracing_trace_options_open,
3651	.read		= seq_read,
3652	.llseek		= seq_lseek,
3653	.release	= tracing_single_release_tr,
3654	.write		= tracing_trace_options_write,
3655};
3656
3657static const char readme_msg[] =
3658	"tracing mini-HOWTO:\n\n"
3659	"# echo 0 > tracing_on : quick way to disable tracing\n"
3660	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3661	" Important files:\n"
3662	"  trace\t\t\t- The static contents of the buffer\n"
3663	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
3664	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3665	"  current_tracer\t- function and latency tracers\n"
3666	"  available_tracers\t- list of configured tracers for current_tracer\n"
3667	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3668	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3669	"  trace_clock\t\t-change the clock used to order events\n"
3670	"       local:   Per cpu clock but may not be synced across CPUs\n"
3671	"      global:   Synced across CPUs but slows tracing down.\n"
3672	"     counter:   Not a clock, but just an increment\n"
3673	"      uptime:   Jiffy counter from time of boot\n"
3674	"        perf:   Same clock that perf events use\n"
3675#ifdef CONFIG_X86_64
3676	"     x86-tsc:   TSC cycle counter\n"
3677#endif
3678	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3679	"  tracing_cpumask\t- Limit which CPUs to trace\n"
3680	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3681	"\t\t\t  Remove sub-buffer with rmdir\n"
3682	"  trace_options\t\t- Set format or modify how tracing happens\n"
3683	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3684	"\t\t\t  option name\n"
3685	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3686#ifdef CONFIG_DYNAMIC_FTRACE
3687	"\n  available_filter_functions - list of functions that can be filtered on\n"
3688	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
3689	"\t\t\t  functions\n"
3690	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3691	"\t     modules: Can select a group via module\n"
3692	"\t      Format: :mod:<module-name>\n"
3693	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3694	"\t    triggers: a command to perform when function is hit\n"
3695	"\t      Format: <function>:<trigger>[:count]\n"
3696	"\t     trigger: traceon, traceoff\n"
3697	"\t\t      enable_event:<system>:<event>\n"
3698	"\t\t      disable_event:<system>:<event>\n"
3699#ifdef CONFIG_STACKTRACE
3700	"\t\t      stacktrace\n"
3701#endif
3702#ifdef CONFIG_TRACER_SNAPSHOT
3703	"\t\t      snapshot\n"
3704#endif
3705	"\t\t      dump\n"
3706	"\t\t      cpudump\n"
3707	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3708	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3709	"\t     The first one will disable tracing every time do_fault is hit\n"
3710	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3711	"\t       The first time do trap is hit and it disables tracing, the\n"
3712	"\t       counter will decrement to 2. If tracing is already disabled,\n"
3713	"\t       the counter will not decrement. It only decrements when the\n"
3714	"\t       trigger did work\n"
3715	"\t     To remove trigger without count:\n"
3716	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3717	"\t     To remove trigger with a count:\n"
3718	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3719	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3720	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3721	"\t    modules: Can select a group via module command :mod:\n"
3722	"\t    Does not accept triggers\n"
3723#endif /* CONFIG_DYNAMIC_FTRACE */
3724#ifdef CONFIG_FUNCTION_TRACER
3725	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3726	"\t\t    (function)\n"
3727#endif
3728#ifdef CONFIG_FUNCTION_GRAPH_TRACER
3729	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3730	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3731	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3732#endif
3733#ifdef CONFIG_TRACER_SNAPSHOT
3734	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3735	"\t\t\t  snapshot buffer. Read the contents for more\n"
3736	"\t\t\t  information\n"
3737#endif
3738#ifdef CONFIG_STACK_TRACER
3739	"  stack_trace\t\t- Shows the max stack trace when active\n"
3740	"  stack_max_size\t- Shows current max stack size that was traced\n"
3741	"\t\t\t  Write into this file to reset the max size (trigger a\n"
3742	"\t\t\t  new trace)\n"
3743#ifdef CONFIG_DYNAMIC_FTRACE
3744	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3745	"\t\t\t  traces\n"
3746#endif
3747#endif /* CONFIG_STACK_TRACER */
3748	"  events/\t\t- Directory containing all trace event subsystems:\n"
3749	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3750	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
3751	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3752	"\t\t\t  events\n"
3753	"      filter\t\t- If set, only events passing filter are traced\n"
3754	"  events/<system>/<event>/\t- Directory containing control files for\n"
3755	"\t\t\t  <event>:\n"
3756	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3757	"      filter\t\t- If set, only events passing filter are traced\n"
3758	"      trigger\t\t- If set, a command to perform when event is hit\n"
3759	"\t    Format: <trigger>[:count][if <filter>]\n"
3760	"\t   trigger: traceon, traceoff\n"
3761	"\t            enable_event:<system>:<event>\n"
3762	"\t            disable_event:<system>:<event>\n"
3763#ifdef CONFIG_STACKTRACE
3764	"\t\t    stacktrace\n"
3765#endif
3766#ifdef CONFIG_TRACER_SNAPSHOT
3767	"\t\t    snapshot\n"
3768#endif
3769	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3770	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3771	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3772	"\t                  events/block/block_unplug/trigger\n"
3773	"\t   The first disables tracing every time block_unplug is hit.\n"
3774	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3775	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3776	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3777	"\t   Like function triggers, the counter is only decremented if it\n"
3778	"\t    enabled or disabled tracing.\n"
3779	"\t   To remove a trigger without a count:\n"
3780	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
3781	"\t   To remove a trigger with a count:\n"
3782	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3783	"\t   Filters can be ignored when removing a trigger.\n"
3784;
3785
3786static ssize_t
3787tracing_readme_read(struct file *filp, char __user *ubuf,
3788		       size_t cnt, loff_t *ppos)
3789{
3790	return simple_read_from_buffer(ubuf, cnt, ppos,
3791					readme_msg, strlen(readme_msg));
3792}
3793
3794static const struct file_operations tracing_readme_fops = {
3795	.open		= tracing_open_generic,
3796	.read		= tracing_readme_read,
3797	.llseek		= generic_file_llseek,
3798};
3799
3800static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3801{
3802	unsigned int *ptr = v;
3803
3804	if (*pos || m->count)
3805		ptr++;
3806
3807	(*pos)++;
3808
3809	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3810	     ptr++) {
3811		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3812			continue;
3813
3814		return ptr;
3815	}
3816
3817	return NULL;
3818}
3819
3820static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3821{
3822	void *v;
3823	loff_t l = 0;
3824
3825	preempt_disable();
3826	arch_spin_lock(&trace_cmdline_lock);
3827
3828	v = &savedcmd->map_cmdline_to_pid[0];
3829	while (l <= *pos) {
3830		v = saved_cmdlines_next(m, v, &l);
3831		if (!v)
3832			return NULL;
3833	}
3834
3835	return v;
3836}
3837
3838static void saved_cmdlines_stop(struct seq_file *m, void *v)
3839{
3840	arch_spin_unlock(&trace_cmdline_lock);
3841	preempt_enable();
3842}
3843
3844static int saved_cmdlines_show(struct seq_file *m, void *v)
3845{
3846	char buf[TASK_COMM_LEN];
3847	unsigned int *pid = v;
3848
3849	__trace_find_cmdline(*pid, buf);
3850	seq_printf(m, "%d %s\n", *pid, buf);
3851	return 0;
3852}
3853
3854static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3855	.start		= saved_cmdlines_start,
3856	.next		= saved_cmdlines_next,
3857	.stop		= saved_cmdlines_stop,
3858	.show		= saved_cmdlines_show,
3859};
3860
3861static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3862{
3863	if (tracing_disabled)
3864		return -ENODEV;
3865
3866	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3867}
3868
3869static const struct file_operations tracing_saved_cmdlines_fops = {
3870	.open		= tracing_saved_cmdlines_open,
3871	.read		= seq_read,
3872	.llseek		= seq_lseek,
3873	.release	= seq_release,
3874};
3875
3876static ssize_t
3877tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3878				 size_t cnt, loff_t *ppos)
3879{
3880	char buf[64];
3881	int r;
3882
3883	arch_spin_lock(&trace_cmdline_lock);
3884	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3885	arch_spin_unlock(&trace_cmdline_lock);
3886
3887	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3888}
3889
3890static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3891{
3892	kfree(s->saved_cmdlines);
3893	kfree(s->map_cmdline_to_pid);
3894	kfree(s);
3895}
3896
3897static int tracing_resize_saved_cmdlines(unsigned int val)
3898{
3899	struct saved_cmdlines_buffer *s, *savedcmd_temp;
3900
3901	s = kmalloc(sizeof(*s), GFP_KERNEL);
3902	if (!s)
3903		return -ENOMEM;
3904
3905	if (allocate_cmdlines_buffer(val, s) < 0) {
3906		kfree(s);
3907		return -ENOMEM;
3908	}
3909
3910	arch_spin_lock(&trace_cmdline_lock);
3911	savedcmd_temp = savedcmd;
3912	savedcmd = s;
3913	arch_spin_unlock(&trace_cmdline_lock);
3914	free_saved_cmdlines_buffer(savedcmd_temp);
3915
3916	return 0;
3917}
3918
3919static ssize_t
3920tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3921				  size_t cnt, loff_t *ppos)
3922{
3923	unsigned long val;
3924	int ret;
3925
3926	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3927	if (ret)
3928		return ret;
3929
3930	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
3931	if (!val || val > PID_MAX_DEFAULT)
3932		return -EINVAL;
3933
3934	ret = tracing_resize_saved_cmdlines((unsigned int)val);
3935	if (ret < 0)
3936		return ret;
3937
3938	*ppos += cnt;
3939
3940	return cnt;
3941}
3942
3943static const struct file_operations tracing_saved_cmdlines_size_fops = {
3944	.open		= tracing_open_generic,
3945	.read		= tracing_saved_cmdlines_size_read,
3946	.write		= tracing_saved_cmdlines_size_write,
3947};
3948
3949#ifdef CONFIG_TRACE_ENUM_MAP_FILE
3950static union trace_enum_map_item *
3951update_enum_map(union trace_enum_map_item *ptr)
3952{
3953	if (!ptr->map.enum_string) {
3954		if (ptr->tail.next) {
3955			ptr = ptr->tail.next;
3956			/* Set ptr to the next real item (skip head) */
3957			ptr++;
3958		} else
3959			return NULL;
3960	}
3961	return ptr;
3962}
3963
3964static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
3965{
3966	union trace_enum_map_item *ptr = v;
3967
3968	/*
3969	 * Paranoid! If ptr points to end, we don't want to increment past it.
3970	 * This really should never happen.
3971	 */
3972	ptr = update_enum_map(ptr);
3973	if (WARN_ON_ONCE(!ptr))
3974		return NULL;
3975
3976	ptr++;
3977
3978	(*pos)++;
3979
3980	ptr = update_enum_map(ptr);
3981
3982	return ptr;
3983}
3984
3985static void *enum_map_start(struct seq_file *m, loff_t *pos)
3986{
3987	union trace_enum_map_item *v;
3988	loff_t l = 0;
3989
3990	mutex_lock(&trace_enum_mutex);
3991
3992	v = trace_enum_maps;
3993	if (v)
3994		v++;
3995
3996	while (v && l < *pos) {
3997		v = enum_map_next(m, v, &l);
3998	}
3999
4000	return v;
4001}
4002
4003static void enum_map_stop(struct seq_file *m, void *v)
4004{
4005	mutex_unlock(&trace_enum_mutex);
4006}
4007
4008static int enum_map_show(struct seq_file *m, void *v)
4009{
4010	union trace_enum_map_item *ptr = v;
4011
4012	seq_printf(m, "%s %ld (%s)\n",
4013		   ptr->map.enum_string, ptr->map.enum_value,
4014		   ptr->map.system);
4015
4016	return 0;
4017}
4018
4019static const struct seq_operations tracing_enum_map_seq_ops = {
4020	.start		= enum_map_start,
4021	.next		= enum_map_next,
4022	.stop		= enum_map_stop,
4023	.show		= enum_map_show,
4024};
4025
4026static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4027{
4028	if (tracing_disabled)
4029		return -ENODEV;
4030
4031	return seq_open(filp, &tracing_enum_map_seq_ops);
4032}
4033
4034static const struct file_operations tracing_enum_map_fops = {
4035	.open		= tracing_enum_map_open,
4036	.read		= seq_read,
4037	.llseek		= seq_lseek,
4038	.release	= seq_release,
4039};
4040
4041static inline union trace_enum_map_item *
4042trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4043{
4044	/* Return tail of array given the head */
4045	return ptr + ptr->head.length + 1;
4046}
4047
4048static void
4049trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4050			   int len)
4051{
4052	struct trace_enum_map **stop;
4053	struct trace_enum_map **map;
4054	union trace_enum_map_item *map_array;
4055	union trace_enum_map_item *ptr;
4056
4057	stop = start + len;
4058
4059	/*
4060	 * The trace_enum_maps contains the map plus a head and tail item,
4061	 * where the head holds the module and length of array, and the
4062	 * tail holds a pointer to the next list.
4063	 */
4064	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4065	if (!map_array) {
4066		pr_warning("Unable to allocate trace enum mapping\n");
4067		return;
4068	}
4069
4070	mutex_lock(&trace_enum_mutex);
4071
4072	if (!trace_enum_maps)
4073		trace_enum_maps = map_array;
4074	else {
4075		ptr = trace_enum_maps;
4076		for (;;) {
4077			ptr = trace_enum_jmp_to_tail(ptr);
4078			if (!ptr->tail.next)
4079				break;
4080			ptr = ptr->tail.next;
4081
4082		}
4083		ptr->tail.next = map_array;
4084	}
4085	map_array->head.mod = mod;
4086	map_array->head.length = len;
4087	map_array++;
4088
4089	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4090		map_array->map = **map;
4091		map_array++;
4092	}
4093	memset(map_array, 0, sizeof(*map_array));
4094
4095	mutex_unlock(&trace_enum_mutex);
4096}
4097
4098static void trace_create_enum_file(struct dentry *d_tracer)
4099{
4100	trace_create_file("enum_map", 0444, d_tracer,
4101			  NULL, &tracing_enum_map_fops);
4102}
4103
4104#else /* CONFIG_TRACE_ENUM_MAP_FILE */
4105static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4106static inline void trace_insert_enum_map_file(struct module *mod,
4107			      struct trace_enum_map **start, int len) { }
4108#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4109
4110static void trace_insert_enum_map(struct module *mod,
4111				  struct trace_enum_map **start, int len)
4112{
4113	struct trace_enum_map **map;
4114
4115	if (len <= 0)
4116		return;
4117
4118	map = start;
4119
4120	trace_event_enum_update(map, len);
4121
4122	trace_insert_enum_map_file(mod, start, len);
4123}
4124
4125static ssize_t
4126tracing_set_trace_read(struct file *filp, char __user *ubuf,
4127		       size_t cnt, loff_t *ppos)
4128{
4129	struct trace_array *tr = filp->private_data;
4130	char buf[MAX_TRACER_SIZE+2];
4131	int r;
4132
4133	mutex_lock(&trace_types_lock);
4134	r = sprintf(buf, "%s\n", tr->current_trace->name);
4135	mutex_unlock(&trace_types_lock);
4136
4137	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4138}
4139
4140int tracer_init(struct tracer *t, struct trace_array *tr)
4141{
4142	tracing_reset_online_cpus(&tr->trace_buffer);
4143	return t->init(tr);
4144}
4145
4146static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4147{
4148	int cpu;
4149
4150	for_each_tracing_cpu(cpu)
4151		per_cpu_ptr(buf->data, cpu)->entries = val;
4152}
4153
4154#ifdef CONFIG_TRACER_MAX_TRACE
4155/* resize @tr's buffer to the size of @size_tr's entries */
4156static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4157					struct trace_buffer *size_buf, int cpu_id)
4158{
4159	int cpu, ret = 0;
4160
4161	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4162		for_each_tracing_cpu(cpu) {
4163			ret = ring_buffer_resize(trace_buf->buffer,
4164				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4165			if (ret < 0)
4166				break;
4167			per_cpu_ptr(trace_buf->data, cpu)->entries =
4168				per_cpu_ptr(size_buf->data, cpu)->entries;
4169		}
4170	} else {
4171		ret = ring_buffer_resize(trace_buf->buffer,
4172				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4173		if (ret == 0)
4174			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4175				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4176	}
4177
4178	return ret;
4179}
4180#endif /* CONFIG_TRACER_MAX_TRACE */
4181
4182static int __tracing_resize_ring_buffer(struct trace_array *tr,
4183					unsigned long size, int cpu)
4184{
4185	int ret;
4186
4187	/*
4188	 * If kernel or user changes the size of the ring buffer
4189	 * we use the size that was given, and we can forget about
4190	 * expanding it later.
4191	 */
4192	ring_buffer_expanded = true;
4193
4194	/* May be called before buffers are initialized */
4195	if (!tr->trace_buffer.buffer)
4196		return 0;
4197
4198	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4199	if (ret < 0)
4200		return ret;
4201
4202#ifdef CONFIG_TRACER_MAX_TRACE
4203	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4204	    !tr->current_trace->use_max_tr)
4205		goto out;
4206
4207	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4208	if (ret < 0) {
4209		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4210						     &tr->trace_buffer, cpu);
4211		if (r < 0) {
4212			/*
4213			 * AARGH! We are left with different
4214			 * size max buffer!!!!
4215			 * The max buffer is our "snapshot" buffer.
4216			 * When a tracer needs a snapshot (one of the
4217			 * latency tracers), it swaps the max buffer
4218			 * with the saved snap shot. We succeeded to
4219			 * update the size of the main buffer, but failed to
4220			 * update the size of the max buffer. But when we tried
4221			 * to reset the main buffer to the original size, we
4222			 * failed there too. This is very unlikely to
4223			 * happen, but if it does, warn and kill all
4224			 * tracing.
4225			 */
4226			WARN_ON(1);
4227			tracing_disabled = 1;
4228		}
4229		return ret;
4230	}
4231
4232	if (cpu == RING_BUFFER_ALL_CPUS)
4233		set_buffer_entries(&tr->max_buffer, size);
4234	else
4235		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4236
4237 out:
4238#endif /* CONFIG_TRACER_MAX_TRACE */
4239
4240	if (cpu == RING_BUFFER_ALL_CPUS)
4241		set_buffer_entries(&tr->trace_buffer, size);
4242	else
4243		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4244
4245	return ret;
4246}
4247
4248static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4249					  unsigned long size, int cpu_id)
4250{
4251	int ret = size;
4252
4253	mutex_lock(&trace_types_lock);
4254
4255	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4256		/* make sure, this cpu is enabled in the mask */
4257		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4258			ret = -EINVAL;
4259			goto out;
4260		}
4261	}
4262
4263	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4264	if (ret < 0)
4265		ret = -ENOMEM;
4266
4267out:
4268	mutex_unlock(&trace_types_lock);
4269
4270	return ret;
4271}
4272
4273
4274/**
4275 * tracing_update_buffers - used by tracing facility to expand ring buffers
4276 *
4277 * To save on memory when the tracing is never used on a system with it
4278 * configured in. The ring buffers are set to a minimum size. But once
4279 * a user starts to use the tracing facility, then they need to grow
4280 * to their default size.
4281 *
4282 * This function is to be called when a tracer is about to be used.
4283 */
4284int tracing_update_buffers(void)
4285{
4286	int ret = 0;
4287
4288	mutex_lock(&trace_types_lock);
4289	if (!ring_buffer_expanded)
4290		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4291						RING_BUFFER_ALL_CPUS);
4292	mutex_unlock(&trace_types_lock);
4293
4294	return ret;
4295}
4296
4297struct trace_option_dentry;
4298
4299static struct trace_option_dentry *
4300create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4301
4302static void
4303destroy_trace_option_files(struct trace_option_dentry *topts);
4304
4305/*
4306 * Used to clear out the tracer before deletion of an instance.
4307 * Must have trace_types_lock held.
4308 */
4309static void tracing_set_nop(struct trace_array *tr)
4310{
4311	if (tr->current_trace == &nop_trace)
4312		return;
4313
4314	tr->current_trace->enabled--;
4315
4316	if (tr->current_trace->reset)
4317		tr->current_trace->reset(tr);
4318
4319	tr->current_trace = &nop_trace;
4320}
4321
4322static void update_tracer_options(struct trace_array *tr, struct tracer *t)
4323{
4324	static struct trace_option_dentry *topts;
4325
4326	/* Only enable if the directory has been created already. */
4327	if (!tr->dir)
4328		return;
4329
4330	/* Currently, only the top instance has options */
4331	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL))
4332		return;
4333
4334	destroy_trace_option_files(topts);
4335	topts = create_trace_option_files(tr, t);
4336}
4337
4338static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4339{
4340	struct tracer *t;
4341#ifdef CONFIG_TRACER_MAX_TRACE
4342	bool had_max_tr;
4343#endif
4344	int ret = 0;
4345
4346	mutex_lock(&trace_types_lock);
4347
4348	if (!ring_buffer_expanded) {
4349		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4350						RING_BUFFER_ALL_CPUS);
4351		if (ret < 0)
4352			goto out;
4353		ret = 0;
4354	}
4355
4356	for (t = trace_types; t; t = t->next) {
4357		if (strcmp(t->name, buf) == 0)
4358			break;
4359	}
4360	if (!t) {
4361		ret = -EINVAL;
4362		goto out;
4363	}
4364	if (t == tr->current_trace)
4365		goto out;
4366
4367	/* Some tracers are only allowed for the top level buffer */
4368	if (!trace_ok_for_array(t, tr)) {
4369		ret = -EINVAL;
4370		goto out;
4371	}
4372
4373	/* If trace pipe files are being read, we can't change the tracer */
4374	if (tr->current_trace->ref) {
4375		ret = -EBUSY;
4376		goto out;
4377	}
4378
4379	trace_branch_disable();
4380
4381	tr->current_trace->enabled--;
4382
4383	if (tr->current_trace->reset)
4384		tr->current_trace->reset(tr);
4385
4386	/* Current trace needs to be nop_trace before synchronize_sched */
4387	tr->current_trace = &nop_trace;
4388
4389#ifdef CONFIG_TRACER_MAX_TRACE
4390	had_max_tr = tr->allocated_snapshot;
4391
4392	if (had_max_tr && !t->use_max_tr) {
4393		/*
4394		 * We need to make sure that the update_max_tr sees that
4395		 * current_trace changed to nop_trace to keep it from
4396		 * swapping the buffers after we resize it.
4397		 * The update_max_tr is called from interrupts disabled
4398		 * so a synchronized_sched() is sufficient.
4399		 */
4400		synchronize_sched();
4401		free_snapshot(tr);
4402	}
4403#endif
4404	update_tracer_options(tr, t);
4405
4406#ifdef CONFIG_TRACER_MAX_TRACE
4407	if (t->use_max_tr && !had_max_tr) {
4408		ret = alloc_snapshot(tr);
4409		if (ret < 0)
4410			goto out;
4411	}
4412#endif
4413
4414	if (t->init) {
4415		ret = tracer_init(t, tr);
4416		if (ret)
4417			goto out;
4418	}
4419
4420	tr->current_trace = t;
4421	tr->current_trace->enabled++;
4422	trace_branch_enable(tr);
4423 out:
4424	mutex_unlock(&trace_types_lock);
4425
4426	return ret;
4427}
4428
4429static ssize_t
4430tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4431			size_t cnt, loff_t *ppos)
4432{
4433	struct trace_array *tr = filp->private_data;
4434	char buf[MAX_TRACER_SIZE+1];
4435	int i;
4436	size_t ret;
4437	int err;
4438
4439	ret = cnt;
4440
4441	if (cnt > MAX_TRACER_SIZE)
4442		cnt = MAX_TRACER_SIZE;
4443
4444	if (copy_from_user(&buf, ubuf, cnt))
4445		return -EFAULT;
4446
4447	buf[cnt] = 0;
4448
4449	/* strip ending whitespace. */
4450	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4451		buf[i] = 0;
4452
4453	err = tracing_set_tracer(tr, buf);
4454	if (err)
4455		return err;
4456
4457	*ppos += ret;
4458
4459	return ret;
4460}
4461
4462static ssize_t
4463tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4464		   size_t cnt, loff_t *ppos)
4465{
4466	char buf[64];
4467	int r;
4468
4469	r = snprintf(buf, sizeof(buf), "%ld\n",
4470		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4471	if (r > sizeof(buf))
4472		r = sizeof(buf);
4473	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4474}
4475
4476static ssize_t
4477tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4478		    size_t cnt, loff_t *ppos)
4479{
4480	unsigned long val;
4481	int ret;
4482
4483	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4484	if (ret)
4485		return ret;
4486
4487	*ptr = val * 1000;
4488
4489	return cnt;
4490}
4491
4492static ssize_t
4493tracing_thresh_read(struct file *filp, char __user *ubuf,
4494		    size_t cnt, loff_t *ppos)
4495{
4496	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4497}
4498
4499static ssize_t
4500tracing_thresh_write(struct file *filp, const char __user *ubuf,
4501		     size_t cnt, loff_t *ppos)
4502{
4503	struct trace_array *tr = filp->private_data;
4504	int ret;
4505
4506	mutex_lock(&trace_types_lock);
4507	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4508	if (ret < 0)
4509		goto out;
4510
4511	if (tr->current_trace->update_thresh) {
4512		ret = tr->current_trace->update_thresh(tr);
4513		if (ret < 0)
4514			goto out;
4515	}
4516
4517	ret = cnt;
4518out:
4519	mutex_unlock(&trace_types_lock);
4520
4521	return ret;
4522}
4523
4524static ssize_t
4525tracing_max_lat_read(struct file *filp, char __user *ubuf,
4526		     size_t cnt, loff_t *ppos)
4527{
4528	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4529}
4530
4531static ssize_t
4532tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4533		      size_t cnt, loff_t *ppos)
4534{
4535	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4536}
4537
4538static int tracing_open_pipe(struct inode *inode, struct file *filp)
4539{
4540	struct trace_array *tr = inode->i_private;
4541	struct trace_iterator *iter;
4542	int ret = 0;
4543
4544	if (tracing_disabled)
4545		return -ENODEV;
4546
4547	if (trace_array_get(tr) < 0)
4548		return -ENODEV;
4549
4550	mutex_lock(&trace_types_lock);
4551
4552	/* create a buffer to store the information to pass to userspace */
4553	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4554	if (!iter) {
4555		ret = -ENOMEM;
4556		__trace_array_put(tr);
4557		goto out;
4558	}
4559
4560	trace_seq_init(&iter->seq);
4561	iter->trace = tr->current_trace;
4562
4563	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4564		ret = -ENOMEM;
4565		goto fail;
4566	}
4567
4568	/* trace pipe does not show start of buffer */
4569	cpumask_setall(iter->started);
4570
4571	if (trace_flags & TRACE_ITER_LATENCY_FMT)
4572		iter->iter_flags |= TRACE_FILE_LAT_FMT;
4573
4574	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4575	if (trace_clocks[tr->clock_id].in_ns)
4576		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4577
4578	iter->tr = tr;
4579	iter->trace_buffer = &tr->trace_buffer;
4580	iter->cpu_file = tracing_get_cpu(inode);
4581	mutex_init(&iter->mutex);
4582	filp->private_data = iter;
4583
4584	if (iter->trace->pipe_open)
4585		iter->trace->pipe_open(iter);
4586
4587	nonseekable_open(inode, filp);
4588
4589	tr->current_trace->ref++;
4590out:
4591	mutex_unlock(&trace_types_lock);
4592	return ret;
4593
4594fail:
4595	kfree(iter->trace);
4596	kfree(iter);
4597	__trace_array_put(tr);
4598	mutex_unlock(&trace_types_lock);
4599	return ret;
4600}
4601
4602static int tracing_release_pipe(struct inode *inode, struct file *file)
4603{
4604	struct trace_iterator *iter = file->private_data;
4605	struct trace_array *tr = inode->i_private;
4606
4607	mutex_lock(&trace_types_lock);
4608
4609	tr->current_trace->ref--;
4610
4611	if (iter->trace->pipe_close)
4612		iter->trace->pipe_close(iter);
4613
4614	mutex_unlock(&trace_types_lock);
4615
4616	free_cpumask_var(iter->started);
4617	mutex_destroy(&iter->mutex);
4618	kfree(iter);
4619
4620	trace_array_put(tr);
4621
4622	return 0;
4623}
4624
4625static unsigned int
4626trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4627{
4628	/* Iterators are static, they should be filled or empty */
4629	if (trace_buffer_iter(iter, iter->cpu_file))
4630		return POLLIN | POLLRDNORM;
4631
4632	if (trace_flags & TRACE_ITER_BLOCK)
4633		/*
4634		 * Always select as readable when in blocking mode
4635		 */
4636		return POLLIN | POLLRDNORM;
4637	else
4638		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4639					     filp, poll_table);
4640}
4641
4642static unsigned int
4643tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4644{
4645	struct trace_iterator *iter = filp->private_data;
4646
4647	return trace_poll(iter, filp, poll_table);
4648}
4649
4650/* Must be called with iter->mutex held. */
4651static int tracing_wait_pipe(struct file *filp)
4652{
4653	struct trace_iterator *iter = filp->private_data;
4654	int ret;
4655
4656	while (trace_empty(iter)) {
4657
4658		if ((filp->f_flags & O_NONBLOCK)) {
4659			return -EAGAIN;
4660		}
4661
4662		/*
4663		 * We block until we read something and tracing is disabled.
4664		 * We still block if tracing is disabled, but we have never
4665		 * read anything. This allows a user to cat this file, and
4666		 * then enable tracing. But after we have read something,
4667		 * we give an EOF when tracing is again disabled.
4668		 *
4669		 * iter->pos will be 0 if we haven't read anything.
4670		 */
4671		if (!tracing_is_on() && iter->pos)
4672			break;
4673
4674		mutex_unlock(&iter->mutex);
4675
4676		ret = wait_on_pipe(iter, false);
4677
4678		mutex_lock(&iter->mutex);
4679
4680		if (ret)
4681			return ret;
4682	}
4683
4684	return 1;
4685}
4686
4687/*
4688 * Consumer reader.
4689 */
4690static ssize_t
4691tracing_read_pipe(struct file *filp, char __user *ubuf,
4692		  size_t cnt, loff_t *ppos)
4693{
4694	struct trace_iterator *iter = filp->private_data;
4695	ssize_t sret;
4696
4697	/* return any leftover data */
4698	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4699	if (sret != -EBUSY)
4700		return sret;
4701
4702	trace_seq_init(&iter->seq);
4703
4704	/*
4705	 * Avoid more than one consumer on a single file descriptor
4706	 * This is just a matter of traces coherency, the ring buffer itself
4707	 * is protected.
4708	 */
4709	mutex_lock(&iter->mutex);
4710	if (iter->trace->read) {
4711		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4712		if (sret)
4713			goto out;
4714	}
4715
4716waitagain:
4717	sret = tracing_wait_pipe(filp);
4718	if (sret <= 0)
4719		goto out;
4720
4721	/* stop when tracing is finished */
4722	if (trace_empty(iter)) {
4723		sret = 0;
4724		goto out;
4725	}
4726
4727	if (cnt >= PAGE_SIZE)
4728		cnt = PAGE_SIZE - 1;
4729
4730	/* reset all but tr, trace, and overruns */
4731	memset(&iter->seq, 0,
4732	       sizeof(struct trace_iterator) -
4733	       offsetof(struct trace_iterator, seq));
4734	cpumask_clear(iter->started);
4735	iter->pos = -1;
4736
4737	trace_event_read_lock();
4738	trace_access_lock(iter->cpu_file);
4739	while (trace_find_next_entry_inc(iter) != NULL) {
4740		enum print_line_t ret;
4741		int save_len = iter->seq.seq.len;
4742
4743		ret = print_trace_line(iter);
4744		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4745			/* don't print partial lines */
4746			iter->seq.seq.len = save_len;
4747			break;
4748		}
4749		if (ret != TRACE_TYPE_NO_CONSUME)
4750			trace_consume(iter);
4751
4752		if (trace_seq_used(&iter->seq) >= cnt)
4753			break;
4754
4755		/*
4756		 * Setting the full flag means we reached the trace_seq buffer
4757		 * size and we should leave by partial output condition above.
4758		 * One of the trace_seq_* functions is not used properly.
4759		 */
4760		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4761			  iter->ent->type);
4762	}
4763	trace_access_unlock(iter->cpu_file);
4764	trace_event_read_unlock();
4765
4766	/* Now copy what we have to the user */
4767	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4768	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4769		trace_seq_init(&iter->seq);
4770
4771	/*
4772	 * If there was nothing to send to user, in spite of consuming trace
4773	 * entries, go back to wait for more entries.
4774	 */
4775	if (sret == -EBUSY)
4776		goto waitagain;
4777
4778out:
4779	mutex_unlock(&iter->mutex);
4780
4781	return sret;
4782}
4783
4784static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4785				     unsigned int idx)
4786{
4787	__free_page(spd->pages[idx]);
4788}
4789
4790static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4791	.can_merge		= 0,
4792	.confirm		= generic_pipe_buf_confirm,
4793	.release		= generic_pipe_buf_release,
4794	.steal			= generic_pipe_buf_steal,
4795	.get			= generic_pipe_buf_get,
4796};
4797
4798static size_t
4799tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4800{
4801	size_t count;
4802	int save_len;
4803	int ret;
4804
4805	/* Seq buffer is page-sized, exactly what we need. */
4806	for (;;) {
4807		save_len = iter->seq.seq.len;
4808		ret = print_trace_line(iter);
4809
4810		if (trace_seq_has_overflowed(&iter->seq)) {
4811			iter->seq.seq.len = save_len;
4812			break;
4813		}
4814
4815		/*
4816		 * This should not be hit, because it should only
4817		 * be set if the iter->seq overflowed. But check it
4818		 * anyway to be safe.
4819		 */
4820		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4821			iter->seq.seq.len = save_len;
4822			break;
4823		}
4824
4825		count = trace_seq_used(&iter->seq) - save_len;
4826		if (rem < count) {
4827			rem = 0;
4828			iter->seq.seq.len = save_len;
4829			break;
4830		}
4831
4832		if (ret != TRACE_TYPE_NO_CONSUME)
4833			trace_consume(iter);
4834		rem -= count;
4835		if (!trace_find_next_entry_inc(iter))	{
4836			rem = 0;
4837			iter->ent = NULL;
4838			break;
4839		}
4840	}
4841
4842	return rem;
4843}
4844
4845static ssize_t tracing_splice_read_pipe(struct file *filp,
4846					loff_t *ppos,
4847					struct pipe_inode_info *pipe,
4848					size_t len,
4849					unsigned int flags)
4850{
4851	struct page *pages_def[PIPE_DEF_BUFFERS];
4852	struct partial_page partial_def[PIPE_DEF_BUFFERS];
4853	struct trace_iterator *iter = filp->private_data;
4854	struct splice_pipe_desc spd = {
4855		.pages		= pages_def,
4856		.partial	= partial_def,
4857		.nr_pages	= 0, /* This gets updated below. */
4858		.nr_pages_max	= PIPE_DEF_BUFFERS,
4859		.flags		= flags,
4860		.ops		= &tracing_pipe_buf_ops,
4861		.spd_release	= tracing_spd_release_pipe,
4862	};
4863	ssize_t ret;
4864	size_t rem;
4865	unsigned int i;
4866
4867	if (splice_grow_spd(pipe, &spd))
4868		return -ENOMEM;
4869
4870	mutex_lock(&iter->mutex);
4871
4872	if (iter->trace->splice_read) {
4873		ret = iter->trace->splice_read(iter, filp,
4874					       ppos, pipe, len, flags);
4875		if (ret)
4876			goto out_err;
4877	}
4878
4879	ret = tracing_wait_pipe(filp);
4880	if (ret <= 0)
4881		goto out_err;
4882
4883	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4884		ret = -EFAULT;
4885		goto out_err;
4886	}
4887
4888	trace_event_read_lock();
4889	trace_access_lock(iter->cpu_file);
4890
4891	/* Fill as many pages as possible. */
4892	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4893		spd.pages[i] = alloc_page(GFP_KERNEL);
4894		if (!spd.pages[i])
4895			break;
4896
4897		rem = tracing_fill_pipe_page(rem, iter);
4898
4899		/* Copy the data into the page, so we can start over. */
4900		ret = trace_seq_to_buffer(&iter->seq,
4901					  page_address(spd.pages[i]),
4902					  trace_seq_used(&iter->seq));
4903		if (ret < 0) {
4904			__free_page(spd.pages[i]);
4905			break;
4906		}
4907		spd.partial[i].offset = 0;
4908		spd.partial[i].len = trace_seq_used(&iter->seq);
4909
4910		trace_seq_init(&iter->seq);
4911	}
4912
4913	trace_access_unlock(iter->cpu_file);
4914	trace_event_read_unlock();
4915	mutex_unlock(&iter->mutex);
4916
4917	spd.nr_pages = i;
4918
4919	if (i)
4920		ret = splice_to_pipe(pipe, &spd);
4921	else
4922		ret = 0;
4923out:
4924	splice_shrink_spd(&spd);
4925	return ret;
4926
4927out_err:
4928	mutex_unlock(&iter->mutex);
4929	goto out;
4930}
4931
4932static ssize_t
4933tracing_entries_read(struct file *filp, char __user *ubuf,
4934		     size_t cnt, loff_t *ppos)
4935{
4936	struct inode *inode = file_inode(filp);
4937	struct trace_array *tr = inode->i_private;
4938	int cpu = tracing_get_cpu(inode);
4939	char buf[64];
4940	int r = 0;
4941	ssize_t ret;
4942
4943	mutex_lock(&trace_types_lock);
4944
4945	if (cpu == RING_BUFFER_ALL_CPUS) {
4946		int cpu, buf_size_same;
4947		unsigned long size;
4948
4949		size = 0;
4950		buf_size_same = 1;
4951		/* check if all cpu sizes are same */
4952		for_each_tracing_cpu(cpu) {
4953			/* fill in the size from first enabled cpu */
4954			if (size == 0)
4955				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4956			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4957				buf_size_same = 0;
4958				break;
4959			}
4960		}
4961
4962		if (buf_size_same) {
4963			if (!ring_buffer_expanded)
4964				r = sprintf(buf, "%lu (expanded: %lu)\n",
4965					    size >> 10,
4966					    trace_buf_size >> 10);
4967			else
4968				r = sprintf(buf, "%lu\n", size >> 10);
4969		} else
4970			r = sprintf(buf, "X\n");
4971	} else
4972		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4973
4974	mutex_unlock(&trace_types_lock);
4975
4976	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4977	return ret;
4978}
4979
4980static ssize_t
4981tracing_entries_write(struct file *filp, const char __user *ubuf,
4982		      size_t cnt, loff_t *ppos)
4983{
4984	struct inode *inode = file_inode(filp);
4985	struct trace_array *tr = inode->i_private;
4986	unsigned long val;
4987	int ret;
4988
4989	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4990	if (ret)
4991		return ret;
4992
4993	/* must have at least 1 entry */
4994	if (!val)
4995		return -EINVAL;
4996
4997	/* value is in KB */
4998	val <<= 10;
4999	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5000	if (ret < 0)
5001		return ret;
5002
5003	*ppos += cnt;
5004
5005	return cnt;
5006}
5007
5008static ssize_t
5009tracing_total_entries_read(struct file *filp, char __user *ubuf,
5010				size_t cnt, loff_t *ppos)
5011{
5012	struct trace_array *tr = filp->private_data;
5013	char buf[64];
5014	int r, cpu;
5015	unsigned long size = 0, expanded_size = 0;
5016
5017	mutex_lock(&trace_types_lock);
5018	for_each_tracing_cpu(cpu) {
5019		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5020		if (!ring_buffer_expanded)
5021			expanded_size += trace_buf_size >> 10;
5022	}
5023	if (ring_buffer_expanded)
5024		r = sprintf(buf, "%lu\n", size);
5025	else
5026		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5027	mutex_unlock(&trace_types_lock);
5028
5029	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5030}
5031
5032static ssize_t
5033tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5034			  size_t cnt, loff_t *ppos)
5035{
5036	/*
5037	 * There is no need to read what the user has written, this function
5038	 * is just to make sure that there is no error when "echo" is used
5039	 */
5040
5041	*ppos += cnt;
5042
5043	return cnt;
5044}
5045
5046static int
5047tracing_free_buffer_release(struct inode *inode, struct file *filp)
5048{
5049	struct trace_array *tr = inode->i_private;
5050
5051	/* disable tracing ? */
5052	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
5053		tracer_tracing_off(tr);
5054	/* resize the ring buffer to 0 */
5055	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5056
5057	trace_array_put(tr);
5058
5059	return 0;
5060}
5061
5062static ssize_t
5063tracing_mark_write(struct file *filp, const char __user *ubuf,
5064					size_t cnt, loff_t *fpos)
5065{
5066	unsigned long addr = (unsigned long)ubuf;
5067	struct trace_array *tr = filp->private_data;
5068	struct ring_buffer_event *event;
5069	struct ring_buffer *buffer;
5070	struct print_entry *entry;
5071	unsigned long irq_flags;
5072	struct page *pages[2];
5073	void *map_page[2];
5074	int nr_pages = 1;
5075	ssize_t written;
5076	int offset;
5077	int size;
5078	int len;
5079	int ret;
5080	int i;
5081
5082	if (tracing_disabled)
5083		return -EINVAL;
5084
5085	if (!(trace_flags & TRACE_ITER_MARKERS))
5086		return -EINVAL;
5087
5088	if (cnt > TRACE_BUF_SIZE)
5089		cnt = TRACE_BUF_SIZE;
5090
5091	/*
5092	 * Userspace is injecting traces into the kernel trace buffer.
5093	 * We want to be as non intrusive as possible.
5094	 * To do so, we do not want to allocate any special buffers
5095	 * or take any locks, but instead write the userspace data
5096	 * straight into the ring buffer.
5097	 *
5098	 * First we need to pin the userspace buffer into memory,
5099	 * which, most likely it is, because it just referenced it.
5100	 * But there's no guarantee that it is. By using get_user_pages_fast()
5101	 * and kmap_atomic/kunmap_atomic() we can get access to the
5102	 * pages directly. We then write the data directly into the
5103	 * ring buffer.
5104	 */
5105	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5106
5107	/* check if we cross pages */
5108	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5109		nr_pages = 2;
5110
5111	offset = addr & (PAGE_SIZE - 1);
5112	addr &= PAGE_MASK;
5113
5114	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5115	if (ret < nr_pages) {
5116		while (--ret >= 0)
5117			put_page(pages[ret]);
5118		written = -EFAULT;
5119		goto out;
5120	}
5121
5122	for (i = 0; i < nr_pages; i++)
5123		map_page[i] = kmap_atomic(pages[i]);
5124
5125	local_save_flags(irq_flags);
5126	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5127	buffer = tr->trace_buffer.buffer;
5128	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5129					  irq_flags, preempt_count());
5130	if (!event) {
5131		/* Ring buffer disabled, return as if not open for write */
5132		written = -EBADF;
5133		goto out_unlock;
5134	}
5135
5136	entry = ring_buffer_event_data(event);
5137	entry->ip = _THIS_IP_;
5138
5139	if (nr_pages == 2) {
5140		len = PAGE_SIZE - offset;
5141		memcpy(&entry->buf, map_page[0] + offset, len);
5142		memcpy(&entry->buf[len], map_page[1], cnt - len);
5143	} else
5144		memcpy(&entry->buf, map_page[0] + offset, cnt);
5145
5146	if (entry->buf[cnt - 1] != '\n') {
5147		entry->buf[cnt] = '\n';
5148		entry->buf[cnt + 1] = '\0';
5149	} else
5150		entry->buf[cnt] = '\0';
5151
5152	__buffer_unlock_commit(buffer, event);
5153
5154	written = cnt;
5155
5156	*fpos += written;
5157
5158 out_unlock:
5159	for (i = nr_pages - 1; i >= 0; i--) {
5160		kunmap_atomic(map_page[i]);
5161		put_page(pages[i]);
5162	}
5163 out:
5164	return written;
5165}
5166
5167static int tracing_clock_show(struct seq_file *m, void *v)
5168{
5169	struct trace_array *tr = m->private;
5170	int i;
5171
5172	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5173		seq_printf(m,
5174			"%s%s%s%s", i ? " " : "",
5175			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5176			i == tr->clock_id ? "]" : "");
5177	seq_putc(m, '\n');
5178
5179	return 0;
5180}
5181
5182static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5183{
5184	int i;
5185
5186	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5187		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5188			break;
5189	}
5190	if (i == ARRAY_SIZE(trace_clocks))
5191		return -EINVAL;
5192
5193	mutex_lock(&trace_types_lock);
5194
5195	tr->clock_id = i;
5196
5197	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5198
5199	/*
5200	 * New clock may not be consistent with the previous clock.
5201	 * Reset the buffer so that it doesn't have incomparable timestamps.
5202	 */
5203	tracing_reset_online_cpus(&tr->trace_buffer);
5204
5205#ifdef CONFIG_TRACER_MAX_TRACE
5206	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5207		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5208	tracing_reset_online_cpus(&tr->max_buffer);
5209#endif
5210
5211	mutex_unlock(&trace_types_lock);
5212
5213	return 0;
5214}
5215
5216static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5217				   size_t cnt, loff_t *fpos)
5218{
5219	struct seq_file *m = filp->private_data;
5220	struct trace_array *tr = m->private;
5221	char buf[64];
5222	const char *clockstr;
5223	int ret;
5224
5225	if (cnt >= sizeof(buf))
5226		return -EINVAL;
5227
5228	if (copy_from_user(&buf, ubuf, cnt))
5229		return -EFAULT;
5230
5231	buf[cnt] = 0;
5232
5233	clockstr = strstrip(buf);
5234
5235	ret = tracing_set_clock(tr, clockstr);
5236	if (ret)
5237		return ret;
5238
5239	*fpos += cnt;
5240
5241	return cnt;
5242}
5243
5244static int tracing_clock_open(struct inode *inode, struct file *file)
5245{
5246	struct trace_array *tr = inode->i_private;
5247	int ret;
5248
5249	if (tracing_disabled)
5250		return -ENODEV;
5251
5252	if (trace_array_get(tr))
5253		return -ENODEV;
5254
5255	ret = single_open(file, tracing_clock_show, inode->i_private);
5256	if (ret < 0)
5257		trace_array_put(tr);
5258
5259	return ret;
5260}
5261
5262struct ftrace_buffer_info {
5263	struct trace_iterator	iter;
5264	void			*spare;
5265	unsigned int		read;
5266};
5267
5268#ifdef CONFIG_TRACER_SNAPSHOT
5269static int tracing_snapshot_open(struct inode *inode, struct file *file)
5270{
5271	struct trace_array *tr = inode->i_private;
5272	struct trace_iterator *iter;
5273	struct seq_file *m;
5274	int ret = 0;
5275
5276	if (trace_array_get(tr) < 0)
5277		return -ENODEV;
5278
5279	if (file->f_mode & FMODE_READ) {
5280		iter = __tracing_open(inode, file, true);
5281		if (IS_ERR(iter))
5282			ret = PTR_ERR(iter);
5283	} else {
5284		/* Writes still need the seq_file to hold the private data */
5285		ret = -ENOMEM;
5286		m = kzalloc(sizeof(*m), GFP_KERNEL);
5287		if (!m)
5288			goto out;
5289		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5290		if (!iter) {
5291			kfree(m);
5292			goto out;
5293		}
5294		ret = 0;
5295
5296		iter->tr = tr;
5297		iter->trace_buffer = &tr->max_buffer;
5298		iter->cpu_file = tracing_get_cpu(inode);
5299		m->private = iter;
5300		file->private_data = m;
5301	}
5302out:
5303	if (ret < 0)
5304		trace_array_put(tr);
5305
5306	return ret;
5307}
5308
5309static ssize_t
5310tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5311		       loff_t *ppos)
5312{
5313	struct seq_file *m = filp->private_data;
5314	struct trace_iterator *iter = m->private;
5315	struct trace_array *tr = iter->tr;
5316	unsigned long val;
5317	int ret;
5318
5319	ret = tracing_update_buffers();
5320	if (ret < 0)
5321		return ret;
5322
5323	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5324	if (ret)
5325		return ret;
5326
5327	mutex_lock(&trace_types_lock);
5328
5329	if (tr->current_trace->use_max_tr) {
5330		ret = -EBUSY;
5331		goto out;
5332	}
5333
5334	switch (val) {
5335	case 0:
5336		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5337			ret = -EINVAL;
5338			break;
5339		}
5340		if (tr->allocated_snapshot)
5341			free_snapshot(tr);
5342		break;
5343	case 1:
5344/* Only allow per-cpu swap if the ring buffer supports it */
5345#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5346		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5347			ret = -EINVAL;
5348			break;
5349		}
5350#endif
5351		if (!tr->allocated_snapshot) {
5352			ret = alloc_snapshot(tr);
5353			if (ret < 0)
5354				break;
5355		}
5356		local_irq_disable();
5357		/* Now, we're going to swap */
5358		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5359			update_max_tr(tr, current, smp_processor_id());
5360		else
5361			update_max_tr_single(tr, current, iter->cpu_file);
5362		local_irq_enable();
5363		break;
5364	default:
5365		if (tr->allocated_snapshot) {
5366			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5367				tracing_reset_online_cpus(&tr->max_buffer);
5368			else
5369				tracing_reset(&tr->max_buffer, iter->cpu_file);
5370		}
5371		break;
5372	}
5373
5374	if (ret >= 0) {
5375		*ppos += cnt;
5376		ret = cnt;
5377	}
5378out:
5379	mutex_unlock(&trace_types_lock);
5380	return ret;
5381}
5382
5383static int tracing_snapshot_release(struct inode *inode, struct file *file)
5384{
5385	struct seq_file *m = file->private_data;
5386	int ret;
5387
5388	ret = tracing_release(inode, file);
5389
5390	if (file->f_mode & FMODE_READ)
5391		return ret;
5392
5393	/* If write only, the seq_file is just a stub */
5394	if (m)
5395		kfree(m->private);
5396	kfree(m);
5397
5398	return 0;
5399}
5400
5401static int tracing_buffers_open(struct inode *inode, struct file *filp);
5402static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5403				    size_t count, loff_t *ppos);
5404static int tracing_buffers_release(struct inode *inode, struct file *file);
5405static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5406		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5407
5408static int snapshot_raw_open(struct inode *inode, struct file *filp)
5409{
5410	struct ftrace_buffer_info *info;
5411	int ret;
5412
5413	ret = tracing_buffers_open(inode, filp);
5414	if (ret < 0)
5415		return ret;
5416
5417	info = filp->private_data;
5418
5419	if (info->iter.trace->use_max_tr) {
5420		tracing_buffers_release(inode, filp);
5421		return -EBUSY;
5422	}
5423
5424	info->iter.snapshot = true;
5425	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5426
5427	return ret;
5428}
5429
5430#endif /* CONFIG_TRACER_SNAPSHOT */
5431
5432
5433static const struct file_operations tracing_thresh_fops = {
5434	.open		= tracing_open_generic,
5435	.read		= tracing_thresh_read,
5436	.write		= tracing_thresh_write,
5437	.llseek		= generic_file_llseek,
5438};
5439
5440static const struct file_operations tracing_max_lat_fops = {
5441	.open		= tracing_open_generic,
5442	.read		= tracing_max_lat_read,
5443	.write		= tracing_max_lat_write,
5444	.llseek		= generic_file_llseek,
5445};
5446
5447static const struct file_operations set_tracer_fops = {
5448	.open		= tracing_open_generic,
5449	.read		= tracing_set_trace_read,
5450	.write		= tracing_set_trace_write,
5451	.llseek		= generic_file_llseek,
5452};
5453
5454static const struct file_operations tracing_pipe_fops = {
5455	.open		= tracing_open_pipe,
5456	.poll		= tracing_poll_pipe,
5457	.read		= tracing_read_pipe,
5458	.splice_read	= tracing_splice_read_pipe,
5459	.release	= tracing_release_pipe,
5460	.llseek		= no_llseek,
5461};
5462
5463static const struct file_operations tracing_entries_fops = {
5464	.open		= tracing_open_generic_tr,
5465	.read		= tracing_entries_read,
5466	.write		= tracing_entries_write,
5467	.llseek		= generic_file_llseek,
5468	.release	= tracing_release_generic_tr,
5469};
5470
5471static const struct file_operations tracing_total_entries_fops = {
5472	.open		= tracing_open_generic_tr,
5473	.read		= tracing_total_entries_read,
5474	.llseek		= generic_file_llseek,
5475	.release	= tracing_release_generic_tr,
5476};
5477
5478static const struct file_operations tracing_free_buffer_fops = {
5479	.open		= tracing_open_generic_tr,
5480	.write		= tracing_free_buffer_write,
5481	.release	= tracing_free_buffer_release,
5482};
5483
5484static const struct file_operations tracing_mark_fops = {
5485	.open		= tracing_open_generic_tr,
5486	.write		= tracing_mark_write,
5487	.llseek		= generic_file_llseek,
5488	.release	= tracing_release_generic_tr,
5489};
5490
5491static const struct file_operations trace_clock_fops = {
5492	.open		= tracing_clock_open,
5493	.read		= seq_read,
5494	.llseek		= seq_lseek,
5495	.release	= tracing_single_release_tr,
5496	.write		= tracing_clock_write,
5497};
5498
5499#ifdef CONFIG_TRACER_SNAPSHOT
5500static const struct file_operations snapshot_fops = {
5501	.open		= tracing_snapshot_open,
5502	.read		= seq_read,
5503	.write		= tracing_snapshot_write,
5504	.llseek		= tracing_lseek,
5505	.release	= tracing_snapshot_release,
5506};
5507
5508static const struct file_operations snapshot_raw_fops = {
5509	.open		= snapshot_raw_open,
5510	.read		= tracing_buffers_read,
5511	.release	= tracing_buffers_release,
5512	.splice_read	= tracing_buffers_splice_read,
5513	.llseek		= no_llseek,
5514};
5515
5516#endif /* CONFIG_TRACER_SNAPSHOT */
5517
5518static int tracing_buffers_open(struct inode *inode, struct file *filp)
5519{
5520	struct trace_array *tr = inode->i_private;
5521	struct ftrace_buffer_info *info;
5522	int ret;
5523
5524	if (tracing_disabled)
5525		return -ENODEV;
5526
5527	if (trace_array_get(tr) < 0)
5528		return -ENODEV;
5529
5530	info = kzalloc(sizeof(*info), GFP_KERNEL);
5531	if (!info) {
5532		trace_array_put(tr);
5533		return -ENOMEM;
5534	}
5535
5536	mutex_lock(&trace_types_lock);
5537
5538	info->iter.tr		= tr;
5539	info->iter.cpu_file	= tracing_get_cpu(inode);
5540	info->iter.trace	= tr->current_trace;
5541	info->iter.trace_buffer = &tr->trace_buffer;
5542	info->spare		= NULL;
5543	/* Force reading ring buffer for first read */
5544	info->read		= (unsigned int)-1;
5545
5546	filp->private_data = info;
5547
5548	tr->current_trace->ref++;
5549
5550	mutex_unlock(&trace_types_lock);
5551
5552	ret = nonseekable_open(inode, filp);
5553	if (ret < 0)
5554		trace_array_put(tr);
5555
5556	return ret;
5557}
5558
5559static unsigned int
5560tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5561{
5562	struct ftrace_buffer_info *info = filp->private_data;
5563	struct trace_iterator *iter = &info->iter;
5564
5565	return trace_poll(iter, filp, poll_table);
5566}
5567
5568static ssize_t
5569tracing_buffers_read(struct file *filp, char __user *ubuf,
5570		     size_t count, loff_t *ppos)
5571{
5572	struct ftrace_buffer_info *info = filp->private_data;
5573	struct trace_iterator *iter = &info->iter;
5574	ssize_t ret;
5575	ssize_t size;
5576
5577	if (!count)
5578		return 0;
5579
5580#ifdef CONFIG_TRACER_MAX_TRACE
5581	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5582		return -EBUSY;
5583#endif
5584
5585	if (!info->spare)
5586		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5587							  iter->cpu_file);
5588	if (!info->spare)
5589		return -ENOMEM;
5590
5591	/* Do we have previous read data to read? */
5592	if (info->read < PAGE_SIZE)
5593		goto read;
5594
5595 again:
5596	trace_access_lock(iter->cpu_file);
5597	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5598				    &info->spare,
5599				    count,
5600				    iter->cpu_file, 0);
5601	trace_access_unlock(iter->cpu_file);
5602
5603	if (ret < 0) {
5604		if (trace_empty(iter)) {
5605			if ((filp->f_flags & O_NONBLOCK))
5606				return -EAGAIN;
5607
5608			ret = wait_on_pipe(iter, false);
5609			if (ret)
5610				return ret;
5611
5612			goto again;
5613		}
5614		return 0;
5615	}
5616
5617	info->read = 0;
5618 read:
5619	size = PAGE_SIZE - info->read;
5620	if (size > count)
5621		size = count;
5622
5623	ret = copy_to_user(ubuf, info->spare + info->read, size);
5624	if (ret == size)
5625		return -EFAULT;
5626
5627	size -= ret;
5628
5629	*ppos += size;
5630	info->read += size;
5631
5632	return size;
5633}
5634
5635static int tracing_buffers_release(struct inode *inode, struct file *file)
5636{
5637	struct ftrace_buffer_info *info = file->private_data;
5638	struct trace_iterator *iter = &info->iter;
5639
5640	mutex_lock(&trace_types_lock);
5641
5642	iter->tr->current_trace->ref--;
5643
5644	__trace_array_put(iter->tr);
5645
5646	if (info->spare)
5647		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5648	kfree(info);
5649
5650	mutex_unlock(&trace_types_lock);
5651
5652	return 0;
5653}
5654
5655struct buffer_ref {
5656	struct ring_buffer	*buffer;
5657	void			*page;
5658	int			ref;
5659};
5660
5661static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5662				    struct pipe_buffer *buf)
5663{
5664	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5665
5666	if (--ref->ref)
5667		return;
5668
5669	ring_buffer_free_read_page(ref->buffer, ref->page);
5670	kfree(ref);
5671	buf->private = 0;
5672}
5673
5674static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5675				struct pipe_buffer *buf)
5676{
5677	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5678
5679	ref->ref++;
5680}
5681
5682/* Pipe buffer operations for a buffer. */
5683static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5684	.can_merge		= 0,
5685	.confirm		= generic_pipe_buf_confirm,
5686	.release		= buffer_pipe_buf_release,
5687	.steal			= generic_pipe_buf_steal,
5688	.get			= buffer_pipe_buf_get,
5689};
5690
5691/*
5692 * Callback from splice_to_pipe(), if we need to release some pages
5693 * at the end of the spd in case we error'ed out in filling the pipe.
5694 */
5695static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5696{
5697	struct buffer_ref *ref =
5698		(struct buffer_ref *)spd->partial[i].private;
5699
5700	if (--ref->ref)
5701		return;
5702
5703	ring_buffer_free_read_page(ref->buffer, ref->page);
5704	kfree(ref);
5705	spd->partial[i].private = 0;
5706}
5707
5708static ssize_t
5709tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5710			    struct pipe_inode_info *pipe, size_t len,
5711			    unsigned int flags)
5712{
5713	struct ftrace_buffer_info *info = file->private_data;
5714	struct trace_iterator *iter = &info->iter;
5715	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5716	struct page *pages_def[PIPE_DEF_BUFFERS];
5717	struct splice_pipe_desc spd = {
5718		.pages		= pages_def,
5719		.partial	= partial_def,
5720		.nr_pages_max	= PIPE_DEF_BUFFERS,
5721		.flags		= flags,
5722		.ops		= &buffer_pipe_buf_ops,
5723		.spd_release	= buffer_spd_release,
5724	};
5725	struct buffer_ref *ref;
5726	int entries, size, i;
5727	ssize_t ret = 0;
5728
5729#ifdef CONFIG_TRACER_MAX_TRACE
5730	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5731		return -EBUSY;
5732#endif
5733
5734	if (splice_grow_spd(pipe, &spd))
5735		return -ENOMEM;
5736
5737	if (*ppos & (PAGE_SIZE - 1))
5738		return -EINVAL;
5739
5740	if (len & (PAGE_SIZE - 1)) {
5741		if (len < PAGE_SIZE)
5742			return -EINVAL;
5743		len &= PAGE_MASK;
5744	}
5745
5746 again:
5747	trace_access_lock(iter->cpu_file);
5748	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5749
5750	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5751		struct page *page;
5752		int r;
5753
5754		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5755		if (!ref) {
5756			ret = -ENOMEM;
5757			break;
5758		}
5759
5760		ref->ref = 1;
5761		ref->buffer = iter->trace_buffer->buffer;
5762		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5763		if (!ref->page) {
5764			ret = -ENOMEM;
5765			kfree(ref);
5766			break;
5767		}
5768
5769		r = ring_buffer_read_page(ref->buffer, &ref->page,
5770					  len, iter->cpu_file, 1);
5771		if (r < 0) {
5772			ring_buffer_free_read_page(ref->buffer, ref->page);
5773			kfree(ref);
5774			break;
5775		}
5776
5777		/*
5778		 * zero out any left over data, this is going to
5779		 * user land.
5780		 */
5781		size = ring_buffer_page_len(ref->page);
5782		if (size < PAGE_SIZE)
5783			memset(ref->page + size, 0, PAGE_SIZE - size);
5784
5785		page = virt_to_page(ref->page);
5786
5787		spd.pages[i] = page;
5788		spd.partial[i].len = PAGE_SIZE;
5789		spd.partial[i].offset = 0;
5790		spd.partial[i].private = (unsigned long)ref;
5791		spd.nr_pages++;
5792		*ppos += PAGE_SIZE;
5793
5794		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5795	}
5796
5797	trace_access_unlock(iter->cpu_file);
5798	spd.nr_pages = i;
5799
5800	/* did we read anything? */
5801	if (!spd.nr_pages) {
5802		if (ret)
5803			return ret;
5804
5805		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5806			return -EAGAIN;
5807
5808		ret = wait_on_pipe(iter, true);
5809		if (ret)
5810			return ret;
5811
5812		goto again;
5813	}
5814
5815	ret = splice_to_pipe(pipe, &spd);
5816	splice_shrink_spd(&spd);
5817
5818	return ret;
5819}
5820
5821static const struct file_operations tracing_buffers_fops = {
5822	.open		= tracing_buffers_open,
5823	.read		= tracing_buffers_read,
5824	.poll		= tracing_buffers_poll,
5825	.release	= tracing_buffers_release,
5826	.splice_read	= tracing_buffers_splice_read,
5827	.llseek		= no_llseek,
5828};
5829
5830static ssize_t
5831tracing_stats_read(struct file *filp, char __user *ubuf,
5832		   size_t count, loff_t *ppos)
5833{
5834	struct inode *inode = file_inode(filp);
5835	struct trace_array *tr = inode->i_private;
5836	struct trace_buffer *trace_buf = &tr->trace_buffer;
5837	int cpu = tracing_get_cpu(inode);
5838	struct trace_seq *s;
5839	unsigned long cnt;
5840	unsigned long long t;
5841	unsigned long usec_rem;
5842
5843	s = kmalloc(sizeof(*s), GFP_KERNEL);
5844	if (!s)
5845		return -ENOMEM;
5846
5847	trace_seq_init(s);
5848
5849	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5850	trace_seq_printf(s, "entries: %ld\n", cnt);
5851
5852	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5853	trace_seq_printf(s, "overrun: %ld\n", cnt);
5854
5855	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5856	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5857
5858	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5859	trace_seq_printf(s, "bytes: %ld\n", cnt);
5860
5861	if (trace_clocks[tr->clock_id].in_ns) {
5862		/* local or global for trace_clock */
5863		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5864		usec_rem = do_div(t, USEC_PER_SEC);
5865		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5866								t, usec_rem);
5867
5868		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5869		usec_rem = do_div(t, USEC_PER_SEC);
5870		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5871	} else {
5872		/* counter or tsc mode for trace_clock */
5873		trace_seq_printf(s, "oldest event ts: %llu\n",
5874				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5875
5876		trace_seq_printf(s, "now ts: %llu\n",
5877				ring_buffer_time_stamp(trace_buf->buffer, cpu));
5878	}
5879
5880	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5881	trace_seq_printf(s, "dropped events: %ld\n", cnt);
5882
5883	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5884	trace_seq_printf(s, "read events: %ld\n", cnt);
5885
5886	count = simple_read_from_buffer(ubuf, count, ppos,
5887					s->buffer, trace_seq_used(s));
5888
5889	kfree(s);
5890
5891	return count;
5892}
5893
5894static const struct file_operations tracing_stats_fops = {
5895	.open		= tracing_open_generic_tr,
5896	.read		= tracing_stats_read,
5897	.llseek		= generic_file_llseek,
5898	.release	= tracing_release_generic_tr,
5899};
5900
5901#ifdef CONFIG_DYNAMIC_FTRACE
5902
5903int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5904{
5905	return 0;
5906}
5907
5908static ssize_t
5909tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5910		  size_t cnt, loff_t *ppos)
5911{
5912	static char ftrace_dyn_info_buffer[1024];
5913	static DEFINE_MUTEX(dyn_info_mutex);
5914	unsigned long *p = filp->private_data;
5915	char *buf = ftrace_dyn_info_buffer;
5916	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5917	int r;
5918
5919	mutex_lock(&dyn_info_mutex);
5920	r = sprintf(buf, "%ld ", *p);
5921
5922	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5923	buf[r++] = '\n';
5924
5925	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5926
5927	mutex_unlock(&dyn_info_mutex);
5928
5929	return r;
5930}
5931
5932static const struct file_operations tracing_dyn_info_fops = {
5933	.open		= tracing_open_generic,
5934	.read		= tracing_read_dyn_info,
5935	.llseek		= generic_file_llseek,
5936};
5937#endif /* CONFIG_DYNAMIC_FTRACE */
5938
5939#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5940static void
5941ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5942{
5943	tracing_snapshot();
5944}
5945
5946static void
5947ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5948{
5949	unsigned long *count = (long *)data;
5950
5951	if (!*count)
5952		return;
5953
5954	if (*count != -1)
5955		(*count)--;
5956
5957	tracing_snapshot();
5958}
5959
5960static int
5961ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5962		      struct ftrace_probe_ops *ops, void *data)
5963{
5964	long count = (long)data;
5965
5966	seq_printf(m, "%ps:", (void *)ip);
5967
5968	seq_puts(m, "snapshot");
5969
5970	if (count == -1)
5971		seq_puts(m, ":unlimited\n");
5972	else
5973		seq_printf(m, ":count=%ld\n", count);
5974
5975	return 0;
5976}
5977
5978static struct ftrace_probe_ops snapshot_probe_ops = {
5979	.func			= ftrace_snapshot,
5980	.print			= ftrace_snapshot_print,
5981};
5982
5983static struct ftrace_probe_ops snapshot_count_probe_ops = {
5984	.func			= ftrace_count_snapshot,
5985	.print			= ftrace_snapshot_print,
5986};
5987
5988static int
5989ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5990			       char *glob, char *cmd, char *param, int enable)
5991{
5992	struct ftrace_probe_ops *ops;
5993	void *count = (void *)-1;
5994	char *number;
5995	int ret;
5996
5997	/* hash funcs only work with set_ftrace_filter */
5998	if (!enable)
5999		return -EINVAL;
6000
6001	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6002
6003	if (glob[0] == '!') {
6004		unregister_ftrace_function_probe_func(glob+1, ops);
6005		return 0;
6006	}
6007
6008	if (!param)
6009		goto out_reg;
6010
6011	number = strsep(&param, ":");
6012
6013	if (!strlen(number))
6014		goto out_reg;
6015
6016	/*
6017	 * We use the callback data field (which is a pointer)
6018	 * as our counter.
6019	 */
6020	ret = kstrtoul(number, 0, (unsigned long *)&count);
6021	if (ret)
6022		return ret;
6023
6024 out_reg:
6025	ret = register_ftrace_function_probe(glob, ops, count);
6026
6027	if (ret >= 0)
6028		alloc_snapshot(&global_trace);
6029
6030	return ret < 0 ? ret : 0;
6031}
6032
6033static struct ftrace_func_command ftrace_snapshot_cmd = {
6034	.name			= "snapshot",
6035	.func			= ftrace_trace_snapshot_callback,
6036};
6037
6038static __init int register_snapshot_cmd(void)
6039{
6040	return register_ftrace_command(&ftrace_snapshot_cmd);
6041}
6042#else
6043static inline __init int register_snapshot_cmd(void) { return 0; }
6044#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6045
6046static struct dentry *tracing_get_dentry(struct trace_array *tr)
6047{
6048	if (WARN_ON(!tr->dir))
6049		return ERR_PTR(-ENODEV);
6050
6051	/* Top directory uses NULL as the parent */
6052	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6053		return NULL;
6054
6055	/* All sub buffers have a descriptor */
6056	return tr->dir;
6057}
6058
6059static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6060{
6061	struct dentry *d_tracer;
6062
6063	if (tr->percpu_dir)
6064		return tr->percpu_dir;
6065
6066	d_tracer = tracing_get_dentry(tr);
6067	if (IS_ERR(d_tracer))
6068		return NULL;
6069
6070	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6071
6072	WARN_ONCE(!tr->percpu_dir,
6073		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6074
6075	return tr->percpu_dir;
6076}
6077
6078static struct dentry *
6079trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6080		      void *data, long cpu, const struct file_operations *fops)
6081{
6082	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6083
6084	if (ret) /* See tracing_get_cpu() */
6085		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6086	return ret;
6087}
6088
6089static void
6090tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6091{
6092	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6093	struct dentry *d_cpu;
6094	char cpu_dir[30]; /* 30 characters should be more than enough */
6095
6096	if (!d_percpu)
6097		return;
6098
6099	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6100	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6101	if (!d_cpu) {
6102		pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6103		return;
6104	}
6105
6106	/* per cpu trace_pipe */
6107	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6108				tr, cpu, &tracing_pipe_fops);
6109
6110	/* per cpu trace */
6111	trace_create_cpu_file("trace", 0644, d_cpu,
6112				tr, cpu, &tracing_fops);
6113
6114	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6115				tr, cpu, &tracing_buffers_fops);
6116
6117	trace_create_cpu_file("stats", 0444, d_cpu,
6118				tr, cpu, &tracing_stats_fops);
6119
6120	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6121				tr, cpu, &tracing_entries_fops);
6122
6123#ifdef CONFIG_TRACER_SNAPSHOT
6124	trace_create_cpu_file("snapshot", 0644, d_cpu,
6125				tr, cpu, &snapshot_fops);
6126
6127	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6128				tr, cpu, &snapshot_raw_fops);
6129#endif
6130}
6131
6132#ifdef CONFIG_FTRACE_SELFTEST
6133/* Let selftest have access to static functions in this file */
6134#include "trace_selftest.c"
6135#endif
6136
6137struct trace_option_dentry {
6138	struct tracer_opt		*opt;
6139	struct tracer_flags		*flags;
6140	struct trace_array		*tr;
6141	struct dentry			*entry;
6142};
6143
6144static ssize_t
6145trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6146			loff_t *ppos)
6147{
6148	struct trace_option_dentry *topt = filp->private_data;
6149	char *buf;
6150
6151	if (topt->flags->val & topt->opt->bit)
6152		buf = "1\n";
6153	else
6154		buf = "0\n";
6155
6156	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6157}
6158
6159static ssize_t
6160trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6161			 loff_t *ppos)
6162{
6163	struct trace_option_dentry *topt = filp->private_data;
6164	unsigned long val;
6165	int ret;
6166
6167	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6168	if (ret)
6169		return ret;
6170
6171	if (val != 0 && val != 1)
6172		return -EINVAL;
6173
6174	if (!!(topt->flags->val & topt->opt->bit) != val) {
6175		mutex_lock(&trace_types_lock);
6176		ret = __set_tracer_option(topt->tr, topt->flags,
6177					  topt->opt, !val);
6178		mutex_unlock(&trace_types_lock);
6179		if (ret)
6180			return ret;
6181	}
6182
6183	*ppos += cnt;
6184
6185	return cnt;
6186}
6187
6188
6189static const struct file_operations trace_options_fops = {
6190	.open = tracing_open_generic,
6191	.read = trace_options_read,
6192	.write = trace_options_write,
6193	.llseek	= generic_file_llseek,
6194};
6195
6196static ssize_t
6197trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6198			loff_t *ppos)
6199{
6200	long index = (long)filp->private_data;
6201	char *buf;
6202
6203	if (trace_flags & (1 << index))
6204		buf = "1\n";
6205	else
6206		buf = "0\n";
6207
6208	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6209}
6210
6211static ssize_t
6212trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6213			 loff_t *ppos)
6214{
6215	struct trace_array *tr = &global_trace;
6216	long index = (long)filp->private_data;
6217	unsigned long val;
6218	int ret;
6219
6220	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6221	if (ret)
6222		return ret;
6223
6224	if (val != 0 && val != 1)
6225		return -EINVAL;
6226
6227	mutex_lock(&trace_types_lock);
6228	ret = set_tracer_flag(tr, 1 << index, val);
6229	mutex_unlock(&trace_types_lock);
6230
6231	if (ret < 0)
6232		return ret;
6233
6234	*ppos += cnt;
6235
6236	return cnt;
6237}
6238
6239static const struct file_operations trace_options_core_fops = {
6240	.open = tracing_open_generic,
6241	.read = trace_options_core_read,
6242	.write = trace_options_core_write,
6243	.llseek = generic_file_llseek,
6244};
6245
6246struct dentry *trace_create_file(const char *name,
6247				 umode_t mode,
6248				 struct dentry *parent,
6249				 void *data,
6250				 const struct file_operations *fops)
6251{
6252	struct dentry *ret;
6253
6254	ret = tracefs_create_file(name, mode, parent, data, fops);
6255	if (!ret)
6256		pr_warning("Could not create tracefs '%s' entry\n", name);
6257
6258	return ret;
6259}
6260
6261
6262static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6263{
6264	struct dentry *d_tracer;
6265
6266	if (tr->options)
6267		return tr->options;
6268
6269	d_tracer = tracing_get_dentry(tr);
6270	if (IS_ERR(d_tracer))
6271		return NULL;
6272
6273	tr->options = tracefs_create_dir("options", d_tracer);
6274	if (!tr->options) {
6275		pr_warning("Could not create tracefs directory 'options'\n");
6276		return NULL;
6277	}
6278
6279	return tr->options;
6280}
6281
6282static void
6283create_trace_option_file(struct trace_array *tr,
6284			 struct trace_option_dentry *topt,
6285			 struct tracer_flags *flags,
6286			 struct tracer_opt *opt)
6287{
6288	struct dentry *t_options;
6289
6290	t_options = trace_options_init_dentry(tr);
6291	if (!t_options)
6292		return;
6293
6294	topt->flags = flags;
6295	topt->opt = opt;
6296	topt->tr = tr;
6297
6298	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6299				    &trace_options_fops);
6300
6301}
6302
6303static struct trace_option_dentry *
6304create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6305{
6306	struct trace_option_dentry *topts;
6307	struct tracer_flags *flags;
6308	struct tracer_opt *opts;
6309	int cnt;
6310
6311	if (!tracer)
6312		return NULL;
6313
6314	flags = tracer->flags;
6315
6316	if (!flags || !flags->opts)
6317		return NULL;
6318
6319	opts = flags->opts;
6320
6321	for (cnt = 0; opts[cnt].name; cnt++)
6322		;
6323
6324	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6325	if (!topts)
6326		return NULL;
6327
6328	for (cnt = 0; opts[cnt].name; cnt++)
6329		create_trace_option_file(tr, &topts[cnt], flags,
6330					 &opts[cnt]);
6331
6332	return topts;
6333}
6334
6335static void
6336destroy_trace_option_files(struct trace_option_dentry *topts)
6337{
6338	int cnt;
6339
6340	if (!topts)
6341		return;
6342
6343	for (cnt = 0; topts[cnt].opt; cnt++)
6344		tracefs_remove(topts[cnt].entry);
6345
6346	kfree(topts);
6347}
6348
6349static struct dentry *
6350create_trace_option_core_file(struct trace_array *tr,
6351			      const char *option, long index)
6352{
6353	struct dentry *t_options;
6354
6355	t_options = trace_options_init_dentry(tr);
6356	if (!t_options)
6357		return NULL;
6358
6359	return trace_create_file(option, 0644, t_options, (void *)index,
6360				    &trace_options_core_fops);
6361}
6362
6363static __init void create_trace_options_dir(struct trace_array *tr)
6364{
6365	struct dentry *t_options;
6366	int i;
6367
6368	t_options = trace_options_init_dentry(tr);
6369	if (!t_options)
6370		return;
6371
6372	for (i = 0; trace_options[i]; i++)
6373		create_trace_option_core_file(tr, trace_options[i], i);
6374}
6375
6376static ssize_t
6377rb_simple_read(struct file *filp, char __user *ubuf,
6378	       size_t cnt, loff_t *ppos)
6379{
6380	struct trace_array *tr = filp->private_data;
6381	char buf[64];
6382	int r;
6383
6384	r = tracer_tracing_is_on(tr);
6385	r = sprintf(buf, "%d\n", r);
6386
6387	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6388}
6389
6390static ssize_t
6391rb_simple_write(struct file *filp, const char __user *ubuf,
6392		size_t cnt, loff_t *ppos)
6393{
6394	struct trace_array *tr = filp->private_data;
6395	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6396	unsigned long val;
6397	int ret;
6398
6399	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6400	if (ret)
6401		return ret;
6402
6403	if (buffer) {
6404		mutex_lock(&trace_types_lock);
6405		if (val) {
6406			tracer_tracing_on(tr);
6407			if (tr->current_trace->start)
6408				tr->current_trace->start(tr);
6409		} else {
6410			tracer_tracing_off(tr);
6411			if (tr->current_trace->stop)
6412				tr->current_trace->stop(tr);
6413		}
6414		mutex_unlock(&trace_types_lock);
6415	}
6416
6417	(*ppos)++;
6418
6419	return cnt;
6420}
6421
6422static const struct file_operations rb_simple_fops = {
6423	.open		= tracing_open_generic_tr,
6424	.read		= rb_simple_read,
6425	.write		= rb_simple_write,
6426	.release	= tracing_release_generic_tr,
6427	.llseek		= default_llseek,
6428};
6429
6430struct dentry *trace_instance_dir;
6431
6432static void
6433init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6434
6435static int
6436allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6437{
6438	enum ring_buffer_flags rb_flags;
6439
6440	rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6441
6442	buf->tr = tr;
6443
6444	buf->buffer = ring_buffer_alloc(size, rb_flags);
6445	if (!buf->buffer)
6446		return -ENOMEM;
6447
6448	buf->data = alloc_percpu(struct trace_array_cpu);
6449	if (!buf->data) {
6450		ring_buffer_free(buf->buffer);
6451		return -ENOMEM;
6452	}
6453
6454	/* Allocate the first page for all buffers */
6455	set_buffer_entries(&tr->trace_buffer,
6456			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6457
6458	return 0;
6459}
6460
6461static int allocate_trace_buffers(struct trace_array *tr, int size)
6462{
6463	int ret;
6464
6465	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6466	if (ret)
6467		return ret;
6468
6469#ifdef CONFIG_TRACER_MAX_TRACE
6470	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6471				    allocate_snapshot ? size : 1);
6472	if (WARN_ON(ret)) {
6473		ring_buffer_free(tr->trace_buffer.buffer);
6474		free_percpu(tr->trace_buffer.data);
6475		return -ENOMEM;
6476	}
6477	tr->allocated_snapshot = allocate_snapshot;
6478
6479	/*
6480	 * Only the top level trace array gets its snapshot allocated
6481	 * from the kernel command line.
6482	 */
6483	allocate_snapshot = false;
6484#endif
6485	return 0;
6486}
6487
6488static void free_trace_buffer(struct trace_buffer *buf)
6489{
6490	if (buf->buffer) {
6491		ring_buffer_free(buf->buffer);
6492		buf->buffer = NULL;
6493		free_percpu(buf->data);
6494		buf->data = NULL;
6495	}
6496}
6497
6498static void free_trace_buffers(struct trace_array *tr)
6499{
6500	if (!tr)
6501		return;
6502
6503	free_trace_buffer(&tr->trace_buffer);
6504
6505#ifdef CONFIG_TRACER_MAX_TRACE
6506	free_trace_buffer(&tr->max_buffer);
6507#endif
6508}
6509
6510static int instance_mkdir(const char *name)
6511{
6512	struct trace_array *tr;
6513	int ret;
6514
6515	mutex_lock(&trace_types_lock);
6516
6517	ret = -EEXIST;
6518	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6519		if (tr->name && strcmp(tr->name, name) == 0)
6520			goto out_unlock;
6521	}
6522
6523	ret = -ENOMEM;
6524	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6525	if (!tr)
6526		goto out_unlock;
6527
6528	tr->name = kstrdup(name, GFP_KERNEL);
6529	if (!tr->name)
6530		goto out_free_tr;
6531
6532	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6533		goto out_free_tr;
6534
6535	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6536
6537	raw_spin_lock_init(&tr->start_lock);
6538
6539	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6540
6541	tr->current_trace = &nop_trace;
6542
6543	INIT_LIST_HEAD(&tr->systems);
6544	INIT_LIST_HEAD(&tr->events);
6545
6546	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6547		goto out_free_tr;
6548
6549	tr->dir = tracefs_create_dir(name, trace_instance_dir);
6550	if (!tr->dir)
6551		goto out_free_tr;
6552
6553	ret = event_trace_add_tracer(tr->dir, tr);
6554	if (ret) {
6555		tracefs_remove_recursive(tr->dir);
6556		goto out_free_tr;
6557	}
6558
6559	init_tracer_tracefs(tr, tr->dir);
6560
6561	list_add(&tr->list, &ftrace_trace_arrays);
6562
6563	mutex_unlock(&trace_types_lock);
6564
6565	return 0;
6566
6567 out_free_tr:
6568	free_trace_buffers(tr);
6569	free_cpumask_var(tr->tracing_cpumask);
6570	kfree(tr->name);
6571	kfree(tr);
6572
6573 out_unlock:
6574	mutex_unlock(&trace_types_lock);
6575
6576	return ret;
6577
6578}
6579
6580static int instance_rmdir(const char *name)
6581{
6582	struct trace_array *tr;
6583	int found = 0;
6584	int ret;
6585
6586	mutex_lock(&trace_types_lock);
6587
6588	ret = -ENODEV;
6589	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6590		if (tr->name && strcmp(tr->name, name) == 0) {
6591			found = 1;
6592			break;
6593		}
6594	}
6595	if (!found)
6596		goto out_unlock;
6597
6598	ret = -EBUSY;
6599	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6600		goto out_unlock;
6601
6602	list_del(&tr->list);
6603
6604	tracing_set_nop(tr);
6605	event_trace_del_tracer(tr);
6606	ftrace_destroy_function_files(tr);
6607	debugfs_remove_recursive(tr->dir);
6608	free_trace_buffers(tr);
6609
6610	kfree(tr->name);
6611	kfree(tr);
6612
6613	ret = 0;
6614
6615 out_unlock:
6616	mutex_unlock(&trace_types_lock);
6617
6618	return ret;
6619}
6620
6621static __init void create_trace_instances(struct dentry *d_tracer)
6622{
6623	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6624							 instance_mkdir,
6625							 instance_rmdir);
6626	if (WARN_ON(!trace_instance_dir))
6627		return;
6628}
6629
6630static void
6631init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6632{
6633	int cpu;
6634
6635	trace_create_file("available_tracers", 0444, d_tracer,
6636			tr, &show_traces_fops);
6637
6638	trace_create_file("current_tracer", 0644, d_tracer,
6639			tr, &set_tracer_fops);
6640
6641	trace_create_file("tracing_cpumask", 0644, d_tracer,
6642			  tr, &tracing_cpumask_fops);
6643
6644	trace_create_file("trace_options", 0644, d_tracer,
6645			  tr, &tracing_iter_fops);
6646
6647	trace_create_file("trace", 0644, d_tracer,
6648			  tr, &tracing_fops);
6649
6650	trace_create_file("trace_pipe", 0444, d_tracer,
6651			  tr, &tracing_pipe_fops);
6652
6653	trace_create_file("buffer_size_kb", 0644, d_tracer,
6654			  tr, &tracing_entries_fops);
6655
6656	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6657			  tr, &tracing_total_entries_fops);
6658
6659	trace_create_file("free_buffer", 0200, d_tracer,
6660			  tr, &tracing_free_buffer_fops);
6661
6662	trace_create_file("trace_marker", 0220, d_tracer,
6663			  tr, &tracing_mark_fops);
6664
6665	trace_create_file("trace_clock", 0644, d_tracer, tr,
6666			  &trace_clock_fops);
6667
6668	trace_create_file("tracing_on", 0644, d_tracer,
6669			  tr, &rb_simple_fops);
6670
6671#ifdef CONFIG_TRACER_MAX_TRACE
6672	trace_create_file("tracing_max_latency", 0644, d_tracer,
6673			&tr->max_latency, &tracing_max_lat_fops);
6674#endif
6675
6676	if (ftrace_create_function_files(tr, d_tracer))
6677		WARN(1, "Could not allocate function filter files");
6678
6679#ifdef CONFIG_TRACER_SNAPSHOT
6680	trace_create_file("snapshot", 0644, d_tracer,
6681			  tr, &snapshot_fops);
6682#endif
6683
6684	for_each_tracing_cpu(cpu)
6685		tracing_init_tracefs_percpu(tr, cpu);
6686
6687}
6688
6689static struct vfsmount *trace_automount(void *ingore)
6690{
6691	struct vfsmount *mnt;
6692	struct file_system_type *type;
6693
6694	/*
6695	 * To maintain backward compatibility for tools that mount
6696	 * debugfs to get to the tracing facility, tracefs is automatically
6697	 * mounted to the debugfs/tracing directory.
6698	 */
6699	type = get_fs_type("tracefs");
6700	if (!type)
6701		return NULL;
6702	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6703	put_filesystem(type);
6704	if (IS_ERR(mnt))
6705		return NULL;
6706	mntget(mnt);
6707
6708	return mnt;
6709}
6710
6711/**
6712 * tracing_init_dentry - initialize top level trace array
6713 *
6714 * This is called when creating files or directories in the tracing
6715 * directory. It is called via fs_initcall() by any of the boot up code
6716 * and expects to return the dentry of the top level tracing directory.
6717 */
6718struct dentry *tracing_init_dentry(void)
6719{
6720	struct trace_array *tr = &global_trace;
6721
6722	/* The top level trace array uses  NULL as parent */
6723	if (tr->dir)
6724		return NULL;
6725
6726	if (WARN_ON(!debugfs_initialized()))
6727		return ERR_PTR(-ENODEV);
6728
6729	/*
6730	 * As there may still be users that expect the tracing
6731	 * files to exist in debugfs/tracing, we must automount
6732	 * the tracefs file system there, so older tools still
6733	 * work with the newer kerenl.
6734	 */
6735	tr->dir = debugfs_create_automount("tracing", NULL,
6736					   trace_automount, NULL);
6737	if (!tr->dir) {
6738		pr_warn_once("Could not create debugfs directory 'tracing'\n");
6739		return ERR_PTR(-ENOMEM);
6740	}
6741
6742	return NULL;
6743}
6744
6745extern struct trace_enum_map *__start_ftrace_enum_maps[];
6746extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6747
6748static void __init trace_enum_init(void)
6749{
6750	int len;
6751
6752	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6753	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6754}
6755
6756#ifdef CONFIG_MODULES
6757static void trace_module_add_enums(struct module *mod)
6758{
6759	if (!mod->num_trace_enums)
6760		return;
6761
6762	/*
6763	 * Modules with bad taint do not have events created, do
6764	 * not bother with enums either.
6765	 */
6766	if (trace_module_has_bad_taint(mod))
6767		return;
6768
6769	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6770}
6771
6772#ifdef CONFIG_TRACE_ENUM_MAP_FILE
6773static void trace_module_remove_enums(struct module *mod)
6774{
6775	union trace_enum_map_item *map;
6776	union trace_enum_map_item **last = &trace_enum_maps;
6777
6778	if (!mod->num_trace_enums)
6779		return;
6780
6781	mutex_lock(&trace_enum_mutex);
6782
6783	map = trace_enum_maps;
6784
6785	while (map) {
6786		if (map->head.mod == mod)
6787			break;
6788		map = trace_enum_jmp_to_tail(map);
6789		last = &map->tail.next;
6790		map = map->tail.next;
6791	}
6792	if (!map)
6793		goto out;
6794
6795	*last = trace_enum_jmp_to_tail(map)->tail.next;
6796	kfree(map);
6797 out:
6798	mutex_unlock(&trace_enum_mutex);
6799}
6800#else
6801static inline void trace_module_remove_enums(struct module *mod) { }
6802#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6803
6804static int trace_module_notify(struct notifier_block *self,
6805			       unsigned long val, void *data)
6806{
6807	struct module *mod = data;
6808
6809	switch (val) {
6810	case MODULE_STATE_COMING:
6811		trace_module_add_enums(mod);
6812		break;
6813	case MODULE_STATE_GOING:
6814		trace_module_remove_enums(mod);
6815		break;
6816	}
6817
6818	return 0;
6819}
6820
6821static struct notifier_block trace_module_nb = {
6822	.notifier_call = trace_module_notify,
6823	.priority = 0,
6824};
6825#endif /* CONFIG_MODULES */
6826
6827static __init int tracer_init_tracefs(void)
6828{
6829	struct dentry *d_tracer;
6830
6831	trace_access_lock_init();
6832
6833	d_tracer = tracing_init_dentry();
6834	if (IS_ERR(d_tracer))
6835		return 0;
6836
6837	init_tracer_tracefs(&global_trace, d_tracer);
6838
6839	trace_create_file("tracing_thresh", 0644, d_tracer,
6840			&global_trace, &tracing_thresh_fops);
6841
6842	trace_create_file("README", 0444, d_tracer,
6843			NULL, &tracing_readme_fops);
6844
6845	trace_create_file("saved_cmdlines", 0444, d_tracer,
6846			NULL, &tracing_saved_cmdlines_fops);
6847
6848	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6849			  NULL, &tracing_saved_cmdlines_size_fops);
6850
6851	trace_enum_init();
6852
6853	trace_create_enum_file(d_tracer);
6854
6855#ifdef CONFIG_MODULES
6856	register_module_notifier(&trace_module_nb);
6857#endif
6858
6859#ifdef CONFIG_DYNAMIC_FTRACE
6860	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6861			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6862#endif
6863
6864	create_trace_instances(d_tracer);
6865
6866	create_trace_options_dir(&global_trace);
6867
6868	/* If the tracer was started via cmdline, create options for it here */
6869	if (global_trace.current_trace != &nop_trace)
6870		update_tracer_options(&global_trace, global_trace.current_trace);
6871
6872	return 0;
6873}
6874
6875static int trace_panic_handler(struct notifier_block *this,
6876			       unsigned long event, void *unused)
6877{
6878	if (ftrace_dump_on_oops)
6879		ftrace_dump(ftrace_dump_on_oops);
6880	return NOTIFY_OK;
6881}
6882
6883static struct notifier_block trace_panic_notifier = {
6884	.notifier_call  = trace_panic_handler,
6885	.next           = NULL,
6886	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
6887};
6888
6889static int trace_die_handler(struct notifier_block *self,
6890			     unsigned long val,
6891			     void *data)
6892{
6893	switch (val) {
6894	case DIE_OOPS:
6895		if (ftrace_dump_on_oops)
6896			ftrace_dump(ftrace_dump_on_oops);
6897		break;
6898	default:
6899		break;
6900	}
6901	return NOTIFY_OK;
6902}
6903
6904static struct notifier_block trace_die_notifier = {
6905	.notifier_call = trace_die_handler,
6906	.priority = 200
6907};
6908
6909/*
6910 * printk is set to max of 1024, we really don't need it that big.
6911 * Nothing should be printing 1000 characters anyway.
6912 */
6913#define TRACE_MAX_PRINT		1000
6914
6915/*
6916 * Define here KERN_TRACE so that we have one place to modify
6917 * it if we decide to change what log level the ftrace dump
6918 * should be at.
6919 */
6920#define KERN_TRACE		KERN_EMERG
6921
6922void
6923trace_printk_seq(struct trace_seq *s)
6924{
6925	/* Probably should print a warning here. */
6926	if (s->seq.len >= TRACE_MAX_PRINT)
6927		s->seq.len = TRACE_MAX_PRINT;
6928
6929	/*
6930	 * More paranoid code. Although the buffer size is set to
6931	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
6932	 * an extra layer of protection.
6933	 */
6934	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
6935		s->seq.len = s->seq.size - 1;
6936
6937	/* should be zero ended, but we are paranoid. */
6938	s->buffer[s->seq.len] = 0;
6939
6940	printk(KERN_TRACE "%s", s->buffer);
6941
6942	trace_seq_init(s);
6943}
6944
6945void trace_init_global_iter(struct trace_iterator *iter)
6946{
6947	iter->tr = &global_trace;
6948	iter->trace = iter->tr->current_trace;
6949	iter->cpu_file = RING_BUFFER_ALL_CPUS;
6950	iter->trace_buffer = &global_trace.trace_buffer;
6951
6952	if (iter->trace && iter->trace->open)
6953		iter->trace->open(iter);
6954
6955	/* Annotate start of buffers if we had overruns */
6956	if (ring_buffer_overruns(iter->trace_buffer->buffer))
6957		iter->iter_flags |= TRACE_FILE_ANNOTATE;
6958
6959	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6960	if (trace_clocks[iter->tr->clock_id].in_ns)
6961		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6962}
6963
6964void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6965{
6966	/* use static because iter can be a bit big for the stack */
6967	static struct trace_iterator iter;
6968	static atomic_t dump_running;
6969	unsigned int old_userobj;
6970	unsigned long flags;
6971	int cnt = 0, cpu;
6972
6973	/* Only allow one dump user at a time. */
6974	if (atomic_inc_return(&dump_running) != 1) {
6975		atomic_dec(&dump_running);
6976		return;
6977	}
6978
6979	/*
6980	 * Always turn off tracing when we dump.
6981	 * We don't need to show trace output of what happens
6982	 * between multiple crashes.
6983	 *
6984	 * If the user does a sysrq-z, then they can re-enable
6985	 * tracing with echo 1 > tracing_on.
6986	 */
6987	tracing_off();
6988
6989	local_irq_save(flags);
6990
6991	/* Simulate the iterator */
6992	trace_init_global_iter(&iter);
6993
6994	for_each_tracing_cpu(cpu) {
6995		atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6996	}
6997
6998	old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6999
7000	/* don't look at user memory in panic mode */
7001	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7002
7003	switch (oops_dump_mode) {
7004	case DUMP_ALL:
7005		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7006		break;
7007	case DUMP_ORIG:
7008		iter.cpu_file = raw_smp_processor_id();
7009		break;
7010	case DUMP_NONE:
7011		goto out_enable;
7012	default:
7013		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7014		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7015	}
7016
7017	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7018
7019	/* Did function tracer already get disabled? */
7020	if (ftrace_is_dead()) {
7021		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7022		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7023	}
7024
7025	/*
7026	 * We need to stop all tracing on all CPUS to read the
7027	 * the next buffer. This is a bit expensive, but is
7028	 * not done often. We fill all what we can read,
7029	 * and then release the locks again.
7030	 */
7031
7032	while (!trace_empty(&iter)) {
7033
7034		if (!cnt)
7035			printk(KERN_TRACE "---------------------------------\n");
7036
7037		cnt++;
7038
7039		/* reset all but tr, trace, and overruns */
7040		memset(&iter.seq, 0,
7041		       sizeof(struct trace_iterator) -
7042		       offsetof(struct trace_iterator, seq));
7043		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7044		iter.pos = -1;
7045
7046		if (trace_find_next_entry_inc(&iter) != NULL) {
7047			int ret;
7048
7049			ret = print_trace_line(&iter);
7050			if (ret != TRACE_TYPE_NO_CONSUME)
7051				trace_consume(&iter);
7052		}
7053		touch_nmi_watchdog();
7054
7055		trace_printk_seq(&iter.seq);
7056	}
7057
7058	if (!cnt)
7059		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7060	else
7061		printk(KERN_TRACE "---------------------------------\n");
7062
7063 out_enable:
7064	trace_flags |= old_userobj;
7065
7066	for_each_tracing_cpu(cpu) {
7067		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7068	}
7069 	atomic_dec(&dump_running);
7070	local_irq_restore(flags);
7071}
7072EXPORT_SYMBOL_GPL(ftrace_dump);
7073
7074__init static int tracer_alloc_buffers(void)
7075{
7076	int ring_buf_size;
7077	int ret = -ENOMEM;
7078
7079	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7080		goto out;
7081
7082	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7083		goto out_free_buffer_mask;
7084
7085	/* Only allocate trace_printk buffers if a trace_printk exists */
7086	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7087		/* Must be called before global_trace.buffer is allocated */
7088		trace_printk_init_buffers();
7089
7090	/* To save memory, keep the ring buffer size to its minimum */
7091	if (ring_buffer_expanded)
7092		ring_buf_size = trace_buf_size;
7093	else
7094		ring_buf_size = 1;
7095
7096	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7097	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7098
7099	raw_spin_lock_init(&global_trace.start_lock);
7100
7101	/* Used for event triggers */
7102	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7103	if (!temp_buffer)
7104		goto out_free_cpumask;
7105
7106	if (trace_create_savedcmd() < 0)
7107		goto out_free_temp_buffer;
7108
7109	/* TODO: make the number of buffers hot pluggable with CPUS */
7110	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7111		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7112		WARN_ON(1);
7113		goto out_free_savedcmd;
7114	}
7115
7116	if (global_trace.buffer_disabled)
7117		tracing_off();
7118
7119	if (trace_boot_clock) {
7120		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7121		if (ret < 0)
7122			pr_warning("Trace clock %s not defined, going back to default\n",
7123				   trace_boot_clock);
7124	}
7125
7126	/*
7127	 * register_tracer() might reference current_trace, so it
7128	 * needs to be set before we register anything. This is
7129	 * just a bootstrap of current_trace anyway.
7130	 */
7131	global_trace.current_trace = &nop_trace;
7132
7133	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7134
7135	ftrace_init_global_array_ops(&global_trace);
7136
7137	register_tracer(&nop_trace);
7138
7139	/* All seems OK, enable tracing */
7140	tracing_disabled = 0;
7141
7142	atomic_notifier_chain_register(&panic_notifier_list,
7143				       &trace_panic_notifier);
7144
7145	register_die_notifier(&trace_die_notifier);
7146
7147	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7148
7149	INIT_LIST_HEAD(&global_trace.systems);
7150	INIT_LIST_HEAD(&global_trace.events);
7151	list_add(&global_trace.list, &ftrace_trace_arrays);
7152
7153	while (trace_boot_options) {
7154		char *option;
7155
7156		option = strsep(&trace_boot_options, ",");
7157		trace_set_options(&global_trace, option);
7158	}
7159
7160	register_snapshot_cmd();
7161
7162	return 0;
7163
7164out_free_savedcmd:
7165	free_saved_cmdlines_buffer(savedcmd);
7166out_free_temp_buffer:
7167	ring_buffer_free(temp_buffer);
7168out_free_cpumask:
7169	free_cpumask_var(global_trace.tracing_cpumask);
7170out_free_buffer_mask:
7171	free_cpumask_var(tracing_buffer_mask);
7172out:
7173	return ret;
7174}
7175
7176void __init trace_init(void)
7177{
7178	if (tracepoint_printk) {
7179		tracepoint_print_iter =
7180			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7181		if (WARN_ON(!tracepoint_print_iter))
7182			tracepoint_printk = 0;
7183	}
7184	tracer_alloc_buffers();
7185	trace_event_init();
7186}
7187
7188__init static int clear_boot_tracer(void)
7189{
7190	/*
7191	 * The default tracer at boot buffer is an init section.
7192	 * This function is called in lateinit. If we did not
7193	 * find the boot tracer, then clear it out, to prevent
7194	 * later registration from accessing the buffer that is
7195	 * about to be freed.
7196	 */
7197	if (!default_bootup_tracer)
7198		return 0;
7199
7200	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7201	       default_bootup_tracer);
7202	default_bootup_tracer = NULL;
7203
7204	return 0;
7205}
7206
7207fs_initcall(tracer_init_tracefs);
7208late_initcall(clear_boot_tracer);
7209