1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM sched
3
4#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_SCHED_H
6
7#include <linux/sched.h>
8#include <linux/tracepoint.h>
9#include <linux/binfmts.h>
10
11/*
12 * Tracepoint for calling kthread_stop, performed to end a kthread:
13 */
14TRACE_EVENT(sched_kthread_stop,
15
16	TP_PROTO(struct task_struct *t),
17
18	TP_ARGS(t),
19
20	TP_STRUCT__entry(
21		__array(	char,	comm,	TASK_COMM_LEN	)
22		__field(	pid_t,	pid			)
23	),
24
25	TP_fast_assign(
26		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
27		__entry->pid	= t->pid;
28	),
29
30	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
31);
32
33/*
34 * Tracepoint for the return value of the kthread stopping:
35 */
36TRACE_EVENT(sched_kthread_stop_ret,
37
38	TP_PROTO(int ret),
39
40	TP_ARGS(ret),
41
42	TP_STRUCT__entry(
43		__field(	int,	ret	)
44	),
45
46	TP_fast_assign(
47		__entry->ret	= ret;
48	),
49
50	TP_printk("ret=%d", __entry->ret)
51);
52
53/*
54 * Tracepoint for waking up a task:
55 */
56DECLARE_EVENT_CLASS(sched_wakeup_template,
57
58	TP_PROTO(struct task_struct *p, int success),
59
60	TP_ARGS(__perf_task(p), success),
61
62	TP_STRUCT__entry(
63		__array(	char,	comm,	TASK_COMM_LEN	)
64		__field(	pid_t,	pid			)
65		__field(	int,	prio			)
66		__field(	int,	success			)
67		__field(	int,	target_cpu		)
68	),
69
70	TP_fast_assign(
71		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
72		__entry->pid		= p->pid;
73		__entry->prio		= p->prio;
74		__entry->success	= success;
75		__entry->target_cpu	= task_cpu(p);
76	),
77
78	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
79		  __entry->comm, __entry->pid, __entry->prio,
80		  __entry->success, __entry->target_cpu)
81);
82
83DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
84	     TP_PROTO(struct task_struct *p, int success),
85	     TP_ARGS(p, success));
86
87/*
88 * Tracepoint for waking up a new task:
89 */
90DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
91	     TP_PROTO(struct task_struct *p, int success),
92	     TP_ARGS(p, success));
93
94#ifdef CREATE_TRACE_POINTS
95static inline long __trace_sched_switch_state(struct task_struct *p)
96{
97	long state = p->state;
98
99#ifdef CONFIG_PREEMPT
100#ifdef CONFIG_SCHED_DEBUG
101	BUG_ON(p != current);
102#endif /* CONFIG_SCHED_DEBUG */
103	/*
104	 * For all intents and purposes a preempted task is a running task.
105	 */
106	if (preempt_count() & PREEMPT_ACTIVE)
107		state = TASK_RUNNING | TASK_STATE_MAX;
108#endif /* CONFIG_PREEMPT */
109
110	return state;
111}
112#endif /* CREATE_TRACE_POINTS */
113
114/*
115 * Tracepoint for task switches, performed by the scheduler:
116 */
117TRACE_EVENT(sched_switch,
118
119	TP_PROTO(struct task_struct *prev,
120		 struct task_struct *next),
121
122	TP_ARGS(prev, next),
123
124	TP_STRUCT__entry(
125		__array(	char,	prev_comm,	TASK_COMM_LEN	)
126		__field(	pid_t,	prev_pid			)
127		__field(	int,	prev_prio			)
128		__field(	long,	prev_state			)
129		__array(	char,	next_comm,	TASK_COMM_LEN	)
130		__field(	pid_t,	next_pid			)
131		__field(	int,	next_prio			)
132	),
133
134	TP_fast_assign(
135		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
136		__entry->prev_pid	= prev->pid;
137		__entry->prev_prio	= prev->prio;
138		__entry->prev_state	= __trace_sched_switch_state(prev);
139		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
140		__entry->next_pid	= next->pid;
141		__entry->next_prio	= next->prio;
142	),
143
144	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
145		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
146		__entry->prev_state & (TASK_STATE_MAX-1) ?
147		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
148				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
149				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
150				{ 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
151		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
152		__entry->next_comm, __entry->next_pid, __entry->next_prio)
153);
154
155/*
156 * Tracepoint for a task being migrated:
157 */
158TRACE_EVENT(sched_migrate_task,
159
160	TP_PROTO(struct task_struct *p, int dest_cpu),
161
162	TP_ARGS(p, dest_cpu),
163
164	TP_STRUCT__entry(
165		__array(	char,	comm,	TASK_COMM_LEN	)
166		__field(	pid_t,	pid			)
167		__field(	int,	prio			)
168		__field(	int,	orig_cpu		)
169		__field(	int,	dest_cpu		)
170	),
171
172	TP_fast_assign(
173		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
174		__entry->pid		= p->pid;
175		__entry->prio		= p->prio;
176		__entry->orig_cpu	= task_cpu(p);
177		__entry->dest_cpu	= dest_cpu;
178	),
179
180	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
181		  __entry->comm, __entry->pid, __entry->prio,
182		  __entry->orig_cpu, __entry->dest_cpu)
183);
184
185DECLARE_EVENT_CLASS(sched_process_template,
186
187	TP_PROTO(struct task_struct *p),
188
189	TP_ARGS(p),
190
191	TP_STRUCT__entry(
192		__array(	char,	comm,	TASK_COMM_LEN	)
193		__field(	pid_t,	pid			)
194		__field(	int,	prio			)
195	),
196
197	TP_fast_assign(
198		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
199		__entry->pid		= p->pid;
200		__entry->prio		= p->prio;
201	),
202
203	TP_printk("comm=%s pid=%d prio=%d",
204		  __entry->comm, __entry->pid, __entry->prio)
205);
206
207/*
208 * Tracepoint for freeing a task:
209 */
210DEFINE_EVENT(sched_process_template, sched_process_free,
211	     TP_PROTO(struct task_struct *p),
212	     TP_ARGS(p));
213
214
215/*
216 * Tracepoint for a task exiting:
217 */
218DEFINE_EVENT(sched_process_template, sched_process_exit,
219	     TP_PROTO(struct task_struct *p),
220	     TP_ARGS(p));
221
222/*
223 * Tracepoint for waiting on task to unschedule:
224 */
225DEFINE_EVENT(sched_process_template, sched_wait_task,
226	TP_PROTO(struct task_struct *p),
227	TP_ARGS(p));
228
229/*
230 * Tracepoint for a waiting task:
231 */
232TRACE_EVENT(sched_process_wait,
233
234	TP_PROTO(struct pid *pid),
235
236	TP_ARGS(pid),
237
238	TP_STRUCT__entry(
239		__array(	char,	comm,	TASK_COMM_LEN	)
240		__field(	pid_t,	pid			)
241		__field(	int,	prio			)
242	),
243
244	TP_fast_assign(
245		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
246		__entry->pid		= pid_nr(pid);
247		__entry->prio		= current->prio;
248	),
249
250	TP_printk("comm=%s pid=%d prio=%d",
251		  __entry->comm, __entry->pid, __entry->prio)
252);
253
254/*
255 * Tracepoint for do_fork:
256 */
257TRACE_EVENT(sched_process_fork,
258
259	TP_PROTO(struct task_struct *parent, struct task_struct *child),
260
261	TP_ARGS(parent, child),
262
263	TP_STRUCT__entry(
264		__array(	char,	parent_comm,	TASK_COMM_LEN	)
265		__field(	pid_t,	parent_pid			)
266		__array(	char,	child_comm,	TASK_COMM_LEN	)
267		__field(	pid_t,	child_pid			)
268	),
269
270	TP_fast_assign(
271		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
272		__entry->parent_pid	= parent->pid;
273		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
274		__entry->child_pid	= child->pid;
275	),
276
277	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
278		__entry->parent_comm, __entry->parent_pid,
279		__entry->child_comm, __entry->child_pid)
280);
281
282/*
283 * Tracepoint for exec:
284 */
285TRACE_EVENT(sched_process_exec,
286
287	TP_PROTO(struct task_struct *p, pid_t old_pid,
288		 struct linux_binprm *bprm),
289
290	TP_ARGS(p, old_pid, bprm),
291
292	TP_STRUCT__entry(
293		__string(	filename,	bprm->filename	)
294		__field(	pid_t,		pid		)
295		__field(	pid_t,		old_pid		)
296	),
297
298	TP_fast_assign(
299		__assign_str(filename, bprm->filename);
300		__entry->pid		= p->pid;
301		__entry->old_pid	= old_pid;
302	),
303
304	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
305		  __entry->pid, __entry->old_pid)
306);
307
308/*
309 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
310 *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
311 */
312DECLARE_EVENT_CLASS(sched_stat_template,
313
314	TP_PROTO(struct task_struct *tsk, u64 delay),
315
316	TP_ARGS(__perf_task(tsk), __perf_count(delay)),
317
318	TP_STRUCT__entry(
319		__array( char,	comm,	TASK_COMM_LEN	)
320		__field( pid_t,	pid			)
321		__field( u64,	delay			)
322	),
323
324	TP_fast_assign(
325		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
326		__entry->pid	= tsk->pid;
327		__entry->delay	= delay;
328	),
329
330	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
331			__entry->comm, __entry->pid,
332			(unsigned long long)__entry->delay)
333);
334
335
336/*
337 * Tracepoint for accounting wait time (time the task is runnable
338 * but not actually running due to scheduler contention).
339 */
340DEFINE_EVENT(sched_stat_template, sched_stat_wait,
341	     TP_PROTO(struct task_struct *tsk, u64 delay),
342	     TP_ARGS(tsk, delay));
343
344/*
345 * Tracepoint for accounting sleep time (time the task is not runnable,
346 * including iowait, see below).
347 */
348DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
349	     TP_PROTO(struct task_struct *tsk, u64 delay),
350	     TP_ARGS(tsk, delay));
351
352/*
353 * Tracepoint for accounting iowait time (time the task is not runnable
354 * due to waiting on IO to complete).
355 */
356DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
357	     TP_PROTO(struct task_struct *tsk, u64 delay),
358	     TP_ARGS(tsk, delay));
359
360/*
361 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
362 */
363DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
364	     TP_PROTO(struct task_struct *tsk, u64 delay),
365	     TP_ARGS(tsk, delay));
366
367/*
368 * Tracepoint for accounting runtime (time the task is executing
369 * on a CPU).
370 */
371DECLARE_EVENT_CLASS(sched_stat_runtime,
372
373	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
374
375	TP_ARGS(tsk, __perf_count(runtime), vruntime),
376
377	TP_STRUCT__entry(
378		__array( char,	comm,	TASK_COMM_LEN	)
379		__field( pid_t,	pid			)
380		__field( u64,	runtime			)
381		__field( u64,	vruntime			)
382	),
383
384	TP_fast_assign(
385		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
386		__entry->pid		= tsk->pid;
387		__entry->runtime	= runtime;
388		__entry->vruntime	= vruntime;
389	),
390
391	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
392			__entry->comm, __entry->pid,
393			(unsigned long long)__entry->runtime,
394			(unsigned long long)__entry->vruntime)
395);
396
397DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
398	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
399	     TP_ARGS(tsk, runtime, vruntime));
400
401/*
402 * Tracepoint for showing priority inheritance modifying a tasks
403 * priority.
404 */
405TRACE_EVENT(sched_pi_setprio,
406
407	TP_PROTO(struct task_struct *tsk, int newprio),
408
409	TP_ARGS(tsk, newprio),
410
411	TP_STRUCT__entry(
412		__array( char,	comm,	TASK_COMM_LEN	)
413		__field( pid_t,	pid			)
414		__field( int,	oldprio			)
415		__field( int,	newprio			)
416	),
417
418	TP_fast_assign(
419		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
420		__entry->pid		= tsk->pid;
421		__entry->oldprio	= tsk->prio;
422		__entry->newprio	= newprio;
423	),
424
425	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
426			__entry->comm, __entry->pid,
427			__entry->oldprio, __entry->newprio)
428);
429
430#ifdef CONFIG_DETECT_HUNG_TASK
431TRACE_EVENT(sched_process_hang,
432	TP_PROTO(struct task_struct *tsk),
433	TP_ARGS(tsk),
434
435	TP_STRUCT__entry(
436		__array( char,	comm,	TASK_COMM_LEN	)
437		__field( pid_t,	pid			)
438	),
439
440	TP_fast_assign(
441		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
442		__entry->pid = tsk->pid;
443	),
444
445	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
446);
447#endif /* CONFIG_DETECT_HUNG_TASK */
448
449DECLARE_EVENT_CLASS(sched_move_task_template,
450
451	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
452
453	TP_ARGS(tsk, src_cpu, dst_cpu),
454
455	TP_STRUCT__entry(
456		__field( pid_t,	pid			)
457		__field( pid_t,	tgid			)
458		__field( pid_t,	ngid			)
459		__field( int,	src_cpu			)
460		__field( int,	src_nid			)
461		__field( int,	dst_cpu			)
462		__field( int,	dst_nid			)
463	),
464
465	TP_fast_assign(
466		__entry->pid		= task_pid_nr(tsk);
467		__entry->tgid		= task_tgid_nr(tsk);
468		__entry->ngid		= task_numa_group_id(tsk);
469		__entry->src_cpu	= src_cpu;
470		__entry->src_nid	= cpu_to_node(src_cpu);
471		__entry->dst_cpu	= dst_cpu;
472		__entry->dst_nid	= cpu_to_node(dst_cpu);
473	),
474
475	TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
476			__entry->pid, __entry->tgid, __entry->ngid,
477			__entry->src_cpu, __entry->src_nid,
478			__entry->dst_cpu, __entry->dst_nid)
479);
480
481/*
482 * Tracks migration of tasks from one runqueue to another. Can be used to
483 * detect if automatic NUMA balancing is bouncing between nodes
484 */
485DEFINE_EVENT(sched_move_task_template, sched_move_numa,
486	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
487
488	TP_ARGS(tsk, src_cpu, dst_cpu)
489);
490
491DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
492	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
493
494	TP_ARGS(tsk, src_cpu, dst_cpu)
495);
496
497TRACE_EVENT(sched_swap_numa,
498
499	TP_PROTO(struct task_struct *src_tsk, int src_cpu,
500		 struct task_struct *dst_tsk, int dst_cpu),
501
502	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
503
504	TP_STRUCT__entry(
505		__field( pid_t,	src_pid			)
506		__field( pid_t,	src_tgid		)
507		__field( pid_t,	src_ngid		)
508		__field( int,	src_cpu			)
509		__field( int,	src_nid			)
510		__field( pid_t,	dst_pid			)
511		__field( pid_t,	dst_tgid		)
512		__field( pid_t,	dst_ngid		)
513		__field( int,	dst_cpu			)
514		__field( int,	dst_nid			)
515	),
516
517	TP_fast_assign(
518		__entry->src_pid	= task_pid_nr(src_tsk);
519		__entry->src_tgid	= task_tgid_nr(src_tsk);
520		__entry->src_ngid	= task_numa_group_id(src_tsk);
521		__entry->src_cpu	= src_cpu;
522		__entry->src_nid	= cpu_to_node(src_cpu);
523		__entry->dst_pid	= task_pid_nr(dst_tsk);
524		__entry->dst_tgid	= task_tgid_nr(dst_tsk);
525		__entry->dst_ngid	= task_numa_group_id(dst_tsk);
526		__entry->dst_cpu	= dst_cpu;
527		__entry->dst_nid	= cpu_to_node(dst_cpu);
528	),
529
530	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
531			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
532			__entry->src_cpu, __entry->src_nid,
533			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
534			__entry->dst_cpu, __entry->dst_nid)
535);
536
537/*
538 * Tracepoint for waking a polling cpu without an IPI.
539 */
540TRACE_EVENT(sched_wake_idle_without_ipi,
541
542	TP_PROTO(int cpu),
543
544	TP_ARGS(cpu),
545
546	TP_STRUCT__entry(
547		__field(	int,	cpu	)
548	),
549
550	TP_fast_assign(
551		__entry->cpu	= cpu;
552	),
553
554	TP_printk("cpu=%d", __entry->cpu)
555);
556#endif /* _TRACE_SCHED_H */
557
558/* This part must be outside protection */
559#include <trace/define_trace.h>
560