1/*
2 * Performance events:
3 *
4 *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
5 *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
6 *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
7 *
8 * Data type definitions, declarations, prototypes.
9 *
10 *    Started by: Thomas Gleixner and Ingo Molnar
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14#ifndef _LINUX_PERF_EVENT_H
15#define _LINUX_PERF_EVENT_H
16
17#include <uapi/linux/perf_event.h>
18
19/*
20 * Kernel-internal data types and definitions:
21 */
22
23#ifdef CONFIG_PERF_EVENTS
24# include <asm/perf_event.h>
25# include <asm/local64.h>
26#endif
27
28struct perf_guest_info_callbacks {
29	int				(*is_in_guest)(void);
30	int				(*is_user_mode)(void);
31	unsigned long			(*get_guest_ip)(void);
32};
33
34#ifdef CONFIG_HAVE_HW_BREAKPOINT
35#include <asm/hw_breakpoint.h>
36#endif
37
38#include <linux/list.h>
39#include <linux/mutex.h>
40#include <linux/rculist.h>
41#include <linux/rcupdate.h>
42#include <linux/spinlock.h>
43#include <linux/hrtimer.h>
44#include <linux/fs.h>
45#include <linux/pid_namespace.h>
46#include <linux/workqueue.h>
47#include <linux/ftrace.h>
48#include <linux/cpu.h>
49#include <linux/irq_work.h>
50#include <linux/static_key.h>
51#include <linux/jump_label_ratelimit.h>
52#include <linux/atomic.h>
53#include <linux/sysfs.h>
54#include <linux/perf_regs.h>
55#include <linux/workqueue.h>
56#include <linux/cgroup.h>
57#include <asm/local.h>
58
59struct perf_callchain_entry {
60	__u64				nr;
61	__u64				ip[PERF_MAX_STACK_DEPTH];
62};
63
64struct perf_raw_record {
65	u32				size;
66	void				*data;
67};
68
69/*
70 * branch stack layout:
71 *  nr: number of taken branches stored in entries[]
72 *
73 * Note that nr can vary from sample to sample
74 * branches (to, from) are stored from most recent
75 * to least recent, i.e., entries[0] contains the most
76 * recent branch.
77 */
78struct perf_branch_stack {
79	__u64				nr;
80	struct perf_branch_entry	entries[0];
81};
82
83struct task_struct;
84
85/*
86 * extra PMU register associated with an event
87 */
88struct hw_perf_event_extra {
89	u64		config;	/* register value */
90	unsigned int	reg;	/* register address or index */
91	int		alloc;	/* extra register already allocated */
92	int		idx;	/* index in shared_regs->regs[] */
93};
94
95/**
96 * struct hw_perf_event - performance event hardware details:
97 */
98struct hw_perf_event {
99#ifdef CONFIG_PERF_EVENTS
100	union {
101		struct { /* hardware */
102			u64		config;
103			u64		last_tag;
104			unsigned long	config_base;
105			unsigned long	event_base;
106			int		event_base_rdpmc;
107			int		idx;
108			int		last_cpu;
109			int		flags;
110
111			struct hw_perf_event_extra extra_reg;
112			struct hw_perf_event_extra branch_reg;
113		};
114		struct { /* software */
115			struct hrtimer	hrtimer;
116		};
117		struct { /* tracepoint */
118			/* for tp_event->class */
119			struct list_head	tp_list;
120		};
121		struct { /* intel_cqm */
122			int			cqm_state;
123			int			cqm_rmid;
124			struct list_head	cqm_events_entry;
125			struct list_head	cqm_groups_entry;
126			struct list_head	cqm_group_entry;
127		};
128		struct { /* itrace */
129			int			itrace_started;
130		};
131#ifdef CONFIG_HAVE_HW_BREAKPOINT
132		struct { /* breakpoint */
133			/*
134			 * Crufty hack to avoid the chicken and egg
135			 * problem hw_breakpoint has with context
136			 * creation and event initalization.
137			 */
138			struct arch_hw_breakpoint	info;
139			struct list_head		bp_list;
140		};
141#endif
142	};
143	struct task_struct		*target;
144	int				state;
145	local64_t			prev_count;
146	u64				sample_period;
147	u64				last_period;
148	local64_t			period_left;
149	u64                             interrupts_seq;
150	u64				interrupts;
151
152	u64				freq_time_stamp;
153	u64				freq_count_stamp;
154#endif
155};
156
157/*
158 * hw_perf_event::state flags
159 */
160#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
161#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
162#define PERF_HES_ARCH		0x04
163
164struct perf_event;
165
166/*
167 * Common implementation detail of pmu::{start,commit,cancel}_txn
168 */
169#define PERF_EVENT_TXN 0x1
170
171/**
172 * pmu::capabilities flags
173 */
174#define PERF_PMU_CAP_NO_INTERRUPT		0x01
175#define PERF_PMU_CAP_NO_NMI			0x02
176#define PERF_PMU_CAP_AUX_NO_SG			0x04
177#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
178#define PERF_PMU_CAP_EXCLUSIVE			0x10
179#define PERF_PMU_CAP_ITRACE			0x20
180
181/**
182 * struct pmu - generic performance monitoring unit
183 */
184struct pmu {
185	struct list_head		entry;
186
187	struct module			*module;
188	struct device			*dev;
189	const struct attribute_group	**attr_groups;
190	const char			*name;
191	int				type;
192
193	/*
194	 * various common per-pmu feature flags
195	 */
196	int				capabilities;
197
198	int * __percpu			pmu_disable_count;
199	struct perf_cpu_context * __percpu pmu_cpu_context;
200	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
201	int				task_ctx_nr;
202	int				hrtimer_interval_ms;
203
204	/*
205	 * Fully disable/enable this PMU, can be used to protect from the PMI
206	 * as well as for lazy/batch writing of the MSRs.
207	 */
208	void (*pmu_enable)		(struct pmu *pmu); /* optional */
209	void (*pmu_disable)		(struct pmu *pmu); /* optional */
210
211	/*
212	 * Try and initialize the event for this PMU.
213	 * Should return -ENOENT when the @event doesn't match this PMU.
214	 */
215	int (*event_init)		(struct perf_event *event);
216
217	/*
218	 * Notification that the event was mapped or unmapped.  Called
219	 * in the context of the mapping task.
220	 */
221	void (*event_mapped)		(struct perf_event *event); /*optional*/
222	void (*event_unmapped)		(struct perf_event *event); /*optional*/
223
224#define PERF_EF_START	0x01		/* start the counter when adding    */
225#define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
226#define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
227
228	/*
229	 * Adds/Removes a counter to/from the PMU, can be done inside
230	 * a transaction, see the ->*_txn() methods.
231	 */
232	int  (*add)			(struct perf_event *event, int flags);
233	void (*del)			(struct perf_event *event, int flags);
234
235	/*
236	 * Starts/Stops a counter present on the PMU. The PMI handler
237	 * should stop the counter when perf_event_overflow() returns
238	 * !0. ->start() will be used to continue.
239	 */
240	void (*start)			(struct perf_event *event, int flags);
241	void (*stop)			(struct perf_event *event, int flags);
242
243	/*
244	 * Updates the counter value of the event.
245	 */
246	void (*read)			(struct perf_event *event);
247
248	/*
249	 * Group events scheduling is treated as a transaction, add
250	 * group events as a whole and perform one schedulability test.
251	 * If the test fails, roll back the whole group
252	 *
253	 * Start the transaction, after this ->add() doesn't need to
254	 * do schedulability tests.
255	 */
256	void (*start_txn)		(struct pmu *pmu); /* optional */
257	/*
258	 * If ->start_txn() disabled the ->add() schedulability test
259	 * then ->commit_txn() is required to perform one. On success
260	 * the transaction is closed. On error the transaction is kept
261	 * open until ->cancel_txn() is called.
262	 */
263	int  (*commit_txn)		(struct pmu *pmu); /* optional */
264	/*
265	 * Will cancel the transaction, assumes ->del() is called
266	 * for each successful ->add() during the transaction.
267	 */
268	void (*cancel_txn)		(struct pmu *pmu); /* optional */
269
270	/*
271	 * Will return the value for perf_event_mmap_page::index for this event,
272	 * if no implementation is provided it will default to: event->hw.idx + 1.
273	 */
274	int (*event_idx)		(struct perf_event *event); /*optional */
275
276	/*
277	 * context-switches callback
278	 */
279	void (*sched_task)		(struct perf_event_context *ctx,
280					bool sched_in);
281	/*
282	 * PMU specific data size
283	 */
284	size_t				task_ctx_size;
285
286
287	/*
288	 * Return the count value for a counter.
289	 */
290	u64 (*count)			(struct perf_event *event); /*optional*/
291
292	/*
293	 * Set up pmu-private data structures for an AUX area
294	 */
295	void *(*setup_aux)		(int cpu, void **pages,
296					 int nr_pages, bool overwrite);
297					/* optional */
298
299	/*
300	 * Free pmu-private AUX data structures
301	 */
302	void (*free_aux)		(void *aux); /* optional */
303};
304
305/**
306 * enum perf_event_active_state - the states of a event
307 */
308enum perf_event_active_state {
309	PERF_EVENT_STATE_EXIT		= -3,
310	PERF_EVENT_STATE_ERROR		= -2,
311	PERF_EVENT_STATE_OFF		= -1,
312	PERF_EVENT_STATE_INACTIVE	=  0,
313	PERF_EVENT_STATE_ACTIVE		=  1,
314};
315
316struct file;
317struct perf_sample_data;
318
319typedef void (*perf_overflow_handler_t)(struct perf_event *,
320					struct perf_sample_data *,
321					struct pt_regs *regs);
322
323enum perf_group_flag {
324	PERF_GROUP_SOFTWARE		= 0x1,
325};
326
327#define SWEVENT_HLIST_BITS		8
328#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
329
330struct swevent_hlist {
331	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
332	struct rcu_head			rcu_head;
333};
334
335#define PERF_ATTACH_CONTEXT	0x01
336#define PERF_ATTACH_GROUP	0x02
337#define PERF_ATTACH_TASK	0x04
338#define PERF_ATTACH_TASK_DATA	0x08
339
340struct perf_cgroup;
341struct ring_buffer;
342
343/**
344 * struct perf_event - performance event kernel representation:
345 */
346struct perf_event {
347#ifdef CONFIG_PERF_EVENTS
348	/*
349	 * entry onto perf_event_context::event_list;
350	 *   modifications require ctx->lock
351	 *   RCU safe iterations.
352	 */
353	struct list_head		event_entry;
354
355	/*
356	 * XXX: group_entry and sibling_list should be mutually exclusive;
357	 * either you're a sibling on a group, or you're the group leader.
358	 * Rework the code to always use the same list element.
359	 *
360	 * Locked for modification by both ctx->mutex and ctx->lock; holding
361	 * either sufficies for read.
362	 */
363	struct list_head		group_entry;
364	struct list_head		sibling_list;
365
366	/*
367	 * We need storage to track the entries in perf_pmu_migrate_context; we
368	 * cannot use the event_entry because of RCU and we want to keep the
369	 * group in tact which avoids us using the other two entries.
370	 */
371	struct list_head		migrate_entry;
372
373	struct hlist_node		hlist_entry;
374	struct list_head		active_entry;
375	int				nr_siblings;
376	int				group_flags;
377	struct perf_event		*group_leader;
378	struct pmu			*pmu;
379
380	enum perf_event_active_state	state;
381	unsigned int			attach_state;
382	local64_t			count;
383	atomic64_t			child_count;
384
385	/*
386	 * These are the total time in nanoseconds that the event
387	 * has been enabled (i.e. eligible to run, and the task has
388	 * been scheduled in, if this is a per-task event)
389	 * and running (scheduled onto the CPU), respectively.
390	 *
391	 * They are computed from tstamp_enabled, tstamp_running and
392	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
393	 */
394	u64				total_time_enabled;
395	u64				total_time_running;
396
397	/*
398	 * These are timestamps used for computing total_time_enabled
399	 * and total_time_running when the event is in INACTIVE or
400	 * ACTIVE state, measured in nanoseconds from an arbitrary point
401	 * in time.
402	 * tstamp_enabled: the notional time when the event was enabled
403	 * tstamp_running: the notional time when the event was scheduled on
404	 * tstamp_stopped: in INACTIVE state, the notional time when the
405	 *	event was scheduled off.
406	 */
407	u64				tstamp_enabled;
408	u64				tstamp_running;
409	u64				tstamp_stopped;
410
411	/*
412	 * timestamp shadows the actual context timing but it can
413	 * be safely used in NMI interrupt context. It reflects the
414	 * context time as it was when the event was last scheduled in.
415	 *
416	 * ctx_time already accounts for ctx->timestamp. Therefore to
417	 * compute ctx_time for a sample, simply add perf_clock().
418	 */
419	u64				shadow_ctx_time;
420
421	struct perf_event_attr		attr;
422	u16				header_size;
423	u16				id_header_size;
424	u16				read_size;
425	struct hw_perf_event		hw;
426
427	struct perf_event_context	*ctx;
428	atomic_long_t			refcount;
429
430	/*
431	 * These accumulate total time (in nanoseconds) that children
432	 * events have been enabled and running, respectively.
433	 */
434	atomic64_t			child_total_time_enabled;
435	atomic64_t			child_total_time_running;
436
437	/*
438	 * Protect attach/detach and child_list:
439	 */
440	struct mutex			child_mutex;
441	struct list_head		child_list;
442	struct perf_event		*parent;
443
444	int				oncpu;
445	int				cpu;
446
447	struct list_head		owner_entry;
448	struct task_struct		*owner;
449
450	/* mmap bits */
451	struct mutex			mmap_mutex;
452	atomic_t			mmap_count;
453
454	struct ring_buffer		*rb;
455	struct list_head		rb_entry;
456	unsigned long			rcu_batches;
457	int				rcu_pending;
458
459	/* poll related */
460	wait_queue_head_t		waitq;
461	struct fasync_struct		*fasync;
462
463	/* delayed work for NMIs and such */
464	int				pending_wakeup;
465	int				pending_kill;
466	int				pending_disable;
467	struct irq_work			pending;
468
469	atomic_t			event_limit;
470
471	void (*destroy)(struct perf_event *);
472	struct rcu_head			rcu_head;
473
474	struct pid_namespace		*ns;
475	u64				id;
476
477	u64				(*clock)(void);
478	perf_overflow_handler_t		overflow_handler;
479	void				*overflow_handler_context;
480
481#ifdef CONFIG_EVENT_TRACING
482	struct ftrace_event_call	*tp_event;
483	struct event_filter		*filter;
484#ifdef CONFIG_FUNCTION_TRACER
485	struct ftrace_ops               ftrace_ops;
486#endif
487#endif
488
489#ifdef CONFIG_CGROUP_PERF
490	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
491	int				cgrp_defer_enabled;
492#endif
493
494#endif /* CONFIG_PERF_EVENTS */
495};
496
497/**
498 * struct perf_event_context - event context structure
499 *
500 * Used as a container for task events and CPU events as well:
501 */
502struct perf_event_context {
503	struct pmu			*pmu;
504	/*
505	 * Protect the states of the events in the list,
506	 * nr_active, and the list:
507	 */
508	raw_spinlock_t			lock;
509	/*
510	 * Protect the list of events.  Locking either mutex or lock
511	 * is sufficient to ensure the list doesn't change; to change
512	 * the list you need to lock both the mutex and the spinlock.
513	 */
514	struct mutex			mutex;
515
516	struct list_head		active_ctx_list;
517	struct list_head		pinned_groups;
518	struct list_head		flexible_groups;
519	struct list_head		event_list;
520	int				nr_events;
521	int				nr_active;
522	int				is_active;
523	int				nr_stat;
524	int				nr_freq;
525	int				rotate_disable;
526	atomic_t			refcount;
527	struct task_struct		*task;
528
529	/*
530	 * Context clock, runs when context enabled.
531	 */
532	u64				time;
533	u64				timestamp;
534
535	/*
536	 * These fields let us detect when two contexts have both
537	 * been cloned (inherited) from a common ancestor.
538	 */
539	struct perf_event_context	*parent_ctx;
540	u64				parent_gen;
541	u64				generation;
542	int				pin_count;
543	int				nr_cgroups;	 /* cgroup evts */
544	void				*task_ctx_data; /* pmu specific data */
545	struct rcu_head			rcu_head;
546
547	struct delayed_work		orphans_remove;
548	bool				orphans_remove_sched;
549};
550
551/*
552 * Number of contexts where an event can trigger:
553 *	task, softirq, hardirq, nmi.
554 */
555#define PERF_NR_CONTEXTS	4
556
557/**
558 * struct perf_event_cpu_context - per cpu event context structure
559 */
560struct perf_cpu_context {
561	struct perf_event_context	ctx;
562	struct perf_event_context	*task_ctx;
563	int				active_oncpu;
564	int				exclusive;
565	struct hrtimer			hrtimer;
566	ktime_t				hrtimer_interval;
567	struct pmu			*unique_pmu;
568	struct perf_cgroup		*cgrp;
569};
570
571struct perf_output_handle {
572	struct perf_event		*event;
573	struct ring_buffer		*rb;
574	unsigned long			wakeup;
575	unsigned long			size;
576	union {
577		void			*addr;
578		unsigned long		head;
579	};
580	int				page;
581};
582
583#ifdef CONFIG_CGROUP_PERF
584
585/*
586 * perf_cgroup_info keeps track of time_enabled for a cgroup.
587 * This is a per-cpu dynamically allocated data structure.
588 */
589struct perf_cgroup_info {
590	u64				time;
591	u64				timestamp;
592};
593
594struct perf_cgroup {
595	struct cgroup_subsys_state	css;
596	struct perf_cgroup_info	__percpu *info;
597};
598
599/*
600 * Must ensure cgroup is pinned (css_get) before calling
601 * this function. In other words, we cannot call this function
602 * if there is no cgroup event for the current CPU context.
603 */
604static inline struct perf_cgroup *
605perf_cgroup_from_task(struct task_struct *task)
606{
607	return container_of(task_css(task, perf_event_cgrp_id),
608			    struct perf_cgroup, css);
609}
610#endif /* CONFIG_CGROUP_PERF */
611
612#ifdef CONFIG_PERF_EVENTS
613
614extern void *perf_aux_output_begin(struct perf_output_handle *handle,
615				   struct perf_event *event);
616extern void perf_aux_output_end(struct perf_output_handle *handle,
617				unsigned long size, bool truncated);
618extern int perf_aux_output_skip(struct perf_output_handle *handle,
619				unsigned long size);
620extern void *perf_get_aux(struct perf_output_handle *handle);
621
622extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
623extern void perf_pmu_unregister(struct pmu *pmu);
624
625extern int perf_num_counters(void);
626extern const char *perf_pmu_name(void);
627extern void __perf_event_task_sched_in(struct task_struct *prev,
628				       struct task_struct *task);
629extern void __perf_event_task_sched_out(struct task_struct *prev,
630					struct task_struct *next);
631extern int perf_event_init_task(struct task_struct *child);
632extern void perf_event_exit_task(struct task_struct *child);
633extern void perf_event_free_task(struct task_struct *task);
634extern void perf_event_delayed_put(struct task_struct *task);
635extern void perf_event_print_debug(void);
636extern void perf_pmu_disable(struct pmu *pmu);
637extern void perf_pmu_enable(struct pmu *pmu);
638extern void perf_sched_cb_dec(struct pmu *pmu);
639extern void perf_sched_cb_inc(struct pmu *pmu);
640extern int perf_event_task_disable(void);
641extern int perf_event_task_enable(void);
642extern int perf_event_refresh(struct perf_event *event, int refresh);
643extern void perf_event_update_userpage(struct perf_event *event);
644extern int perf_event_release_kernel(struct perf_event *event);
645extern struct perf_event *
646perf_event_create_kernel_counter(struct perf_event_attr *attr,
647				int cpu,
648				struct task_struct *task,
649				perf_overflow_handler_t callback,
650				void *context);
651extern void perf_pmu_migrate_context(struct pmu *pmu,
652				int src_cpu, int dst_cpu);
653extern u64 perf_event_read_value(struct perf_event *event,
654				 u64 *enabled, u64 *running);
655
656
657struct perf_sample_data {
658	/*
659	 * Fields set by perf_sample_data_init(), group so as to
660	 * minimize the cachelines touched.
661	 */
662	u64				addr;
663	struct perf_raw_record		*raw;
664	struct perf_branch_stack	*br_stack;
665	u64				period;
666	u64				weight;
667	u64				txn;
668	union  perf_mem_data_src	data_src;
669
670	/*
671	 * The other fields, optionally {set,used} by
672	 * perf_{prepare,output}_sample().
673	 */
674	u64				type;
675	u64				ip;
676	struct {
677		u32	pid;
678		u32	tid;
679	}				tid_entry;
680	u64				time;
681	u64				id;
682	u64				stream_id;
683	struct {
684		u32	cpu;
685		u32	reserved;
686	}				cpu_entry;
687	struct perf_callchain_entry	*callchain;
688
689	/*
690	 * regs_user may point to task_pt_regs or to regs_user_copy, depending
691	 * on arch details.
692	 */
693	struct perf_regs		regs_user;
694	struct pt_regs			regs_user_copy;
695
696	struct perf_regs		regs_intr;
697	u64				stack_user_size;
698} ____cacheline_aligned;
699
700/* default value for data source */
701#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
702		    PERF_MEM_S(LVL, NA)   |\
703		    PERF_MEM_S(SNOOP, NA) |\
704		    PERF_MEM_S(LOCK, NA)  |\
705		    PERF_MEM_S(TLB, NA))
706
707static inline void perf_sample_data_init(struct perf_sample_data *data,
708					 u64 addr, u64 period)
709{
710	/* remaining struct members initialized in perf_prepare_sample() */
711	data->addr = addr;
712	data->raw  = NULL;
713	data->br_stack = NULL;
714	data->period = period;
715	data->weight = 0;
716	data->data_src.val = PERF_MEM_NA;
717	data->txn = 0;
718}
719
720extern void perf_output_sample(struct perf_output_handle *handle,
721			       struct perf_event_header *header,
722			       struct perf_sample_data *data,
723			       struct perf_event *event);
724extern void perf_prepare_sample(struct perf_event_header *header,
725				struct perf_sample_data *data,
726				struct perf_event *event,
727				struct pt_regs *regs);
728
729extern int perf_event_overflow(struct perf_event *event,
730				 struct perf_sample_data *data,
731				 struct pt_regs *regs);
732
733static inline bool is_sampling_event(struct perf_event *event)
734{
735	return event->attr.sample_period != 0;
736}
737
738/*
739 * Return 1 for a software event, 0 for a hardware event
740 */
741static inline int is_software_event(struct perf_event *event)
742{
743	return event->pmu->task_ctx_nr == perf_sw_context;
744}
745
746extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
747
748extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
749extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
750
751#ifndef perf_arch_fetch_caller_regs
752static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
753#endif
754
755/*
756 * Take a snapshot of the regs. Skip ip and frame pointer to
757 * the nth caller. We only need a few of the regs:
758 * - ip for PERF_SAMPLE_IP
759 * - cs for user_mode() tests
760 * - bp for callchains
761 * - eflags, for future purposes, just in case
762 */
763static inline void perf_fetch_caller_regs(struct pt_regs *regs)
764{
765	memset(regs, 0, sizeof(*regs));
766
767	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
768}
769
770static __always_inline void
771perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
772{
773	if (static_key_false(&perf_swevent_enabled[event_id]))
774		__perf_sw_event(event_id, nr, regs, addr);
775}
776
777DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
778
779/*
780 * 'Special' version for the scheduler, it hard assumes no recursion,
781 * which is guaranteed by us not actually scheduling inside other swevents
782 * because those disable preemption.
783 */
784static __always_inline void
785perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
786{
787	if (static_key_false(&perf_swevent_enabled[event_id])) {
788		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
789
790		perf_fetch_caller_regs(regs);
791		___perf_sw_event(event_id, nr, regs, addr);
792	}
793}
794
795extern struct static_key_deferred perf_sched_events;
796
797static inline void perf_event_task_sched_in(struct task_struct *prev,
798					    struct task_struct *task)
799{
800	if (static_key_false(&perf_sched_events.key))
801		__perf_event_task_sched_in(prev, task);
802}
803
804static inline void perf_event_task_sched_out(struct task_struct *prev,
805					     struct task_struct *next)
806{
807	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
808
809	if (static_key_false(&perf_sched_events.key))
810		__perf_event_task_sched_out(prev, next);
811}
812
813static inline u64 __perf_event_count(struct perf_event *event)
814{
815	return local64_read(&event->count) + atomic64_read(&event->child_count);
816}
817
818extern void perf_event_mmap(struct vm_area_struct *vma);
819extern struct perf_guest_info_callbacks *perf_guest_cbs;
820extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
821extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
822
823extern void perf_event_exec(void);
824extern void perf_event_comm(struct task_struct *tsk, bool exec);
825extern void perf_event_fork(struct task_struct *tsk);
826
827/* Callchains */
828DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
829
830extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
831extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
832
833static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
834{
835	if (entry->nr < PERF_MAX_STACK_DEPTH)
836		entry->ip[entry->nr++] = ip;
837}
838
839extern int sysctl_perf_event_paranoid;
840extern int sysctl_perf_event_mlock;
841extern int sysctl_perf_event_sample_rate;
842extern int sysctl_perf_cpu_time_max_percent;
843
844extern void perf_sample_event_took(u64 sample_len_ns);
845
846extern int perf_proc_update_handler(struct ctl_table *table, int write,
847		void __user *buffer, size_t *lenp,
848		loff_t *ppos);
849extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
850		void __user *buffer, size_t *lenp,
851		loff_t *ppos);
852
853
854static inline bool perf_paranoid_tracepoint_raw(void)
855{
856	return sysctl_perf_event_paranoid > -1;
857}
858
859static inline bool perf_paranoid_cpu(void)
860{
861	return sysctl_perf_event_paranoid > 0;
862}
863
864static inline bool perf_paranoid_kernel(void)
865{
866	return sysctl_perf_event_paranoid > 1;
867}
868
869extern void perf_event_init(void);
870extern void perf_tp_event(u64 addr, u64 count, void *record,
871			  int entry_size, struct pt_regs *regs,
872			  struct hlist_head *head, int rctx,
873			  struct task_struct *task);
874extern void perf_bp_event(struct perf_event *event, void *data);
875
876#ifndef perf_misc_flags
877# define perf_misc_flags(regs) \
878		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
879# define perf_instruction_pointer(regs)	instruction_pointer(regs)
880#endif
881
882static inline bool has_branch_stack(struct perf_event *event)
883{
884	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
885}
886
887static inline bool needs_branch_stack(struct perf_event *event)
888{
889	return event->attr.branch_sample_type != 0;
890}
891
892static inline bool has_aux(struct perf_event *event)
893{
894	return event->pmu->setup_aux;
895}
896
897extern int perf_output_begin(struct perf_output_handle *handle,
898			     struct perf_event *event, unsigned int size);
899extern void perf_output_end(struct perf_output_handle *handle);
900extern unsigned int perf_output_copy(struct perf_output_handle *handle,
901			     const void *buf, unsigned int len);
902extern unsigned int perf_output_skip(struct perf_output_handle *handle,
903				     unsigned int len);
904extern int perf_swevent_get_recursion_context(void);
905extern void perf_swevent_put_recursion_context(int rctx);
906extern u64 perf_swevent_set_period(struct perf_event *event);
907extern void perf_event_enable(struct perf_event *event);
908extern void perf_event_disable(struct perf_event *event);
909extern int __perf_event_disable(void *info);
910extern void perf_event_task_tick(void);
911#else /* !CONFIG_PERF_EVENTS: */
912static inline void *
913perf_aux_output_begin(struct perf_output_handle *handle,
914		      struct perf_event *event)				{ return NULL; }
915static inline void
916perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
917		    bool truncated)					{ }
918static inline int
919perf_aux_output_skip(struct perf_output_handle *handle,
920		     unsigned long size)				{ return -EINVAL; }
921static inline void *
922perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
923static inline void
924perf_event_task_sched_in(struct task_struct *prev,
925			 struct task_struct *task)			{ }
926static inline void
927perf_event_task_sched_out(struct task_struct *prev,
928			  struct task_struct *next)			{ }
929static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
930static inline void perf_event_exit_task(struct task_struct *child)	{ }
931static inline void perf_event_free_task(struct task_struct *task)	{ }
932static inline void perf_event_delayed_put(struct task_struct *task)	{ }
933static inline void perf_event_print_debug(void)				{ }
934static inline int perf_event_task_disable(void)				{ return -EINVAL; }
935static inline int perf_event_task_enable(void)				{ return -EINVAL; }
936static inline int perf_event_refresh(struct perf_event *event, int refresh)
937{
938	return -EINVAL;
939}
940
941static inline void
942perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
943static inline void
944perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)			{ }
945static inline void
946perf_bp_event(struct perf_event *event, void *data)			{ }
947
948static inline int perf_register_guest_info_callbacks
949(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
950static inline int perf_unregister_guest_info_callbacks
951(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
952
953static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
954static inline void perf_event_exec(void)				{ }
955static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
956static inline void perf_event_fork(struct task_struct *tsk)		{ }
957static inline void perf_event_init(void)				{ }
958static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
959static inline void perf_swevent_put_recursion_context(int rctx)		{ }
960static inline u64 perf_swevent_set_period(struct perf_event *event)	{ return 0; }
961static inline void perf_event_enable(struct perf_event *event)		{ }
962static inline void perf_event_disable(struct perf_event *event)		{ }
963static inline int __perf_event_disable(void *info)			{ return -1; }
964static inline void perf_event_task_tick(void)				{ }
965#endif
966
967#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
968extern bool perf_event_can_stop_tick(void);
969#else
970static inline bool perf_event_can_stop_tick(void)			{ return true; }
971#endif
972
973#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
974extern void perf_restore_debug_store(void);
975#else
976static inline void perf_restore_debug_store(void)			{ }
977#endif
978
979#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
980
981/*
982 * This has to have a higher priority than migration_notifier in sched/core.c.
983 */
984#define perf_cpu_notifier(fn)						\
985do {									\
986	static struct notifier_block fn##_nb =				\
987		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
988	unsigned long cpu = smp_processor_id();				\
989	unsigned long flags;						\
990									\
991	cpu_notifier_register_begin();					\
992	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,			\
993		(void *)(unsigned long)cpu);				\
994	local_irq_save(flags);						\
995	fn(&fn##_nb, (unsigned long)CPU_STARTING,			\
996		(void *)(unsigned long)cpu);				\
997	local_irq_restore(flags);					\
998	fn(&fn##_nb, (unsigned long)CPU_ONLINE,				\
999		(void *)(unsigned long)cpu);				\
1000	__register_cpu_notifier(&fn##_nb);				\
1001	cpu_notifier_register_done();					\
1002} while (0)
1003
1004/*
1005 * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
1006 * callback for already online CPUs.
1007 */
1008#define __perf_cpu_notifier(fn)						\
1009do {									\
1010	static struct notifier_block fn##_nb =				\
1011		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
1012									\
1013	__register_cpu_notifier(&fn##_nb);				\
1014} while (0)
1015
1016struct perf_pmu_events_attr {
1017	struct device_attribute attr;
1018	u64 id;
1019	const char *event_str;
1020};
1021
1022ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
1023			      char *page);
1024
1025#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
1026static struct perf_pmu_events_attr _var = {				\
1027	.attr = __ATTR(_name, 0444, _show, NULL),			\
1028	.id   =  _id,							\
1029};
1030
1031#define PMU_EVENT_ATTR_STRING(_name, _var, _str)			    \
1032static struct perf_pmu_events_attr _var = {				    \
1033	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
1034	.id		= 0,						    \
1035	.event_str	= _str,						    \
1036};
1037
1038#define PMU_FORMAT_ATTR(_name, _format)					\
1039static ssize_t								\
1040_name##_show(struct device *dev,					\
1041			       struct device_attribute *attr,		\
1042			       char *page)				\
1043{									\
1044	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
1045	return sprintf(page, _format "\n");				\
1046}									\
1047									\
1048static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
1049
1050#endif /* _LINUX_PERF_EVENT_H */
1051